diff --git a/.env b/.env new file mode 100644 index 00000000..61cb8f08 --- /dev/null +++ b/.env @@ -0,0 +1,12 @@ +COMPOSE_PROJECT_NAME=OSINT +SEARXNG_URL=http://localhost:8567 +# TOR_URL=socks5://localhost:9050 +TOR_URL=http://172.20.0.1:8118 +SELENIUM_URL=http://localhost:4444/wd/hub + +# DB Connection +HOST=localhost +PORT=5432 +USERNAME=OSINT +PASSWORD=My$cre4tP4ssw0rDTh1sSh0uLdB3KeP7S3cReT +DATABASE=postgres \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..cb1fc7eb --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +venv/* +data/* +__pycache__ +logs +geckodriver.log + +# social.py \ No newline at end of file diff --git a/README.md b/README.md index 578bb9cc..0c2bd230 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,49 @@ -# ksp-submission -This repository is created for Karnataka State Police Hackathon 2023 - submission collection. +# ksp - submission +This repository is created for Karnataka State Police Hackathon 2023 - submission collection ## Team Information -### Team Name - -### Problem Statement - +### Team Name - Team Breakpoint +### Problem Statement - Crowdsourcing of records + +# Enolahomes +Enolahomes is an extensive suite built on top of the Django Python framework with scheduled multithreaded submodules like spiders, web crawler with sock proxy, OSINT modules for scraping information like social media account and other publicly available data including darkweb. This tool has an intelligent dashboard to monitor job queue and case investigate report portal. + +## Installation + +### Docker Setup +Installation is required, Please do follow the guide [docker-install](https://github.com/docker/docker-install). Also Make sure install docker-compose if not follow this [docker-compose](https://docs.docker.com/compose/install/). + +```bash +$ docker-compose up -d +``` + +### Virtual Environment +It is recommended to use a virtual environment to install the dependencies. This can be done by running the following commands: + +1. Linux/MacOS: + ```bash + $ python3 -m venv venv + $ source venv/bin/activate + ``` +2. Windows: + ```ps1 + > python -m venv venv + > venv\Scripts\activate + ``` + +### Dependencies +The dependencies can be installed by running the following command: + +```bash +$ pip install -r requirements.txt + +$ python manage.py makemigrations +$ python manage.py migrate +``` + +## Running the program +The program can be run by running the following command: + +```bash +$ python manage.py runserver 0.0.0.0:8000 +``` +visit [http://localhost:8000/](http://localhost:8000/) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..e2bbe65a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,54 @@ +version: '3' +services: + searxng: + container_name: searxng + image: searxng/searxng:latest + restart: always + ports: + - "8567:8080" + volumes: + - ./data/searxng:/etc/searxng:rw + environment: + - SEARXNG_BASE_URL=http://localhost + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + + tor: + container_name: tor + image: dperson/torproxy + restart: always + ports: + - '8118:8118' + - '9050:9050' + + selenium-chrome: + container_name: selenium + image: 'selenium/standalone-chrome' + restart: always + shm_size: '2gb' + ports: + - '4444:4444' + - '7900:7900' + environment: + - SE_NODE_MAX_SESSIONS=10 + - SE_NODE_OVERRIDE_MAX_SESSIONS=true + - SE_VNC_NO_PASSWORD=1 + + postgres: + container_name: postgres + image: postgres:alpine + restart: always + environment: + - POSTGRES_USER=${USERNAME} + - POSTGRES_PASSWORD=${PASSWORD} + - POSTGRES_DB=${DATABASE} + ports: + - '5432:5432' + volumes: + - ./data/postgres:/var/lib/postgresql/data + \ No newline at end of file diff --git a/logger.py b/logger.py new file mode 100644 index 00000000..fad69e03 --- /dev/null +++ b/logger.py @@ -0,0 +1,28 @@ +import logging +from logging.handlers import RotatingFileHandler + +# Main logger +log = logging.getLogger() +log.setLevel(logging.DEBUG) + +# Debug Logger +formatter = logging.Formatter("%(asctime)s : %(levelname)s : [%(filename)s:%(lineno)s - %(funcName)s()] : %(message)s") +debug_handler = RotatingFileHandler("logs/debug",maxBytes=5000000,backupCount=5) +debug_handler.setLevel(logging.DEBUG) +debug_handler.setFormatter(formatter) +log.addHandler(debug_handler) + + +# Error Logger +formatter = logging.Formatter("%(asctime)s : [%(filename)s: %(funcName)s()] : %(message)s") +error_handler = logging.FileHandler("logs/error") +error_handler.setLevel(logging.ERROR) +error_handler.setFormatter(formatter) +log.addHandler(error_handler) + +# Info Logger +formatter = logging.Formatter("%(asctime)s : %(message)s") +info_handler = logging.FileHandler("logs/info") +info_handler.setLevel(logging.INFO) +info_handler.setFormatter(formatter) +log.addHandler(info_handler) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 00000000..32342a1d --- /dev/null +++ b/main.py @@ -0,0 +1,41 @@ + +import os, django +os.environ['DJANGO_SETTINGS_MODULE'] = 'OSINT.settings' +django.setup() + +from modules.social.main import main as social +from modules.search import main as search +from modules.news.main import main as news +from modules.tor import search as tor + +# social(["aravindha1234u"]) +# print(search("aravindha hariharan")) +news() +# search("Drugs LSD") + +# import json +# from nltk.corpus import stopwords +# import itertools +# import collections + +# data = json.load(open("/tmp/temp.json")) +# stop_words = set(stopwords.words('english')) + +# overall = [] +# for category,news in data.items(): +# print(category) +# news_text = [n['title']+" "+(n['description'] or "") for n in news] +# lower_news = [[w for w in n.lower().split() if w not in stop_words] for n in news_text] +# overall += lower_news + +# all_words_nsw = list(itertools.chain(*lower_news)) +# counts_nsw = collections.Counter(all_words_nsw) + +# print(counts_nsw.most_common(15)) + +# # break + +# all_words_nsw = list(itertools.chain(*overall)) +# counts_nsw = collections.Counter(all_words_nsw) + +# print(counts_nsw.most_common(15)) \ No newline at end of file diff --git a/manage.py b/manage.py new file mode 100644 index 00000000..0af62b47 --- /dev/null +++ b/manage.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Django's command-line utility for administrative tasks.""" +import os +import sys +from scheduler import check_scheduler, threading + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'OSINT.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + + thread = threading.Thread(target=check_scheduler) + thread.daemon=True + thread.start() + + execute_from_command_line(sys.argv) + + import ctypes + try: + ctypes.pythonapi.PyThreadState_SetAsyncExc(thread._thread_id,ctypes.py_object(SystemExit)) + thread.killed=True + except: + for tid, t in threading._active.items(): + if t is thread: + ctypes.pythonapi.PyThreadState_SetAsyncExc(tid,ctypes.py_object(SystemExit)) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..e5d2b5da --- /dev/null +++ b/requirements.txt @@ -0,0 +1,33 @@ +requests +python-dotenv +pycryptodome +xhtml2pdf + +# Scrapping +selenium +beautifulsoup4 +xmltodict +deep-translator + +# Framework +Django +psycopg2 + +# Proxy +free-proxy + +# Sherlock +certifi +colorama +PySocks +requests-futures +stem +torrequest +pandas +openpyxl + +#Language Processing +nltk + +# Input Sanitization +bleach \ No newline at end of file diff --git a/scheduler.py b/scheduler.py new file mode 100644 index 00000000..a3e62f99 --- /dev/null +++ b/scheduler.py @@ -0,0 +1,48 @@ +import os, django +os.environ['DJANGO_SETTINGS_MODULE'] = 'OSINT.settings' +django.setup() + +import schedule, time, threading +from logger import log + +from dashboard.models import * + +from modules.social.main import main as social +from modules.search import main as search +from modules.tor import search as tor +from modules.phone.main import main as phone + +def check_scheduler(): + log.debug("Starting Scheduler in background...") + while True: + schedule.run_pending() + time.sleep(1) + +def threaded_function(func,job): + job.stage = 2 + job.save() + + job.data = func(**job.parameters) + job.stage = 3 + job.save() + +def check_job(): + jobs = JobQueue.objects.filter(stage=1) + + # Pripority sort + jobs = sorted(jobs,key=lambda x:x.priority,reverse=True) + for j in jobs: + log.info("Invoking the function....") + func = [c for c in category_type if c[0] == j.category][0] + + t = threading.Thread(target=threaded_function,args=(globals()[func[1].lower()],j)) + t.daemon = True + t.start() + +schedule.every(10).seconds.do(check_job) + +# Restart job in pending stage +jobs = JobQueue.objects.filter(stage=2) +for j in jobs: + j.stage = 1 + j.save() \ No newline at end of file diff --git a/social.py b/social.py new file mode 100644 index 00000000..37a7d2a4 --- /dev/null +++ b/social.py @@ -0,0 +1,16 @@ + +import os, django +os.environ['DJANGO_SETTINGS_MODULE'] = 'OSINT.settings' +django.setup() + +from modules.social.main import main as social +print(social("aravindha1234u")) + +from modules.search import main as search +# print(search(**{"keyword":"aravindha"})) + +from modules.phone.main import main as phone +# print(phone(**{"phonenumber":"9486324742"})) + +from modules.upi import get_upi +# print(get_upi("aravindha1234u")) \ No newline at end of file