diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..8a29f2a6 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,32 @@ +[run] +source = greedybear +omit = + */migrations/* + */tests/* + */test_*.py + */__pycache__/* + */venv/* + */env/* + manage.py + greedybear/settings.py + greedybear/wsgi.py + +[report] +show_missing = True +precision = 2 +skip_covered = False +skip_empty = True +exclude_lines = + pragma: no cover + def __repr__ + def __str__ + raise AssertionError + raise NotImplementedError + if __name__ == "__main__": + @(abc\.)?abstractmethod + +[html] +directory = htmlcov + +[xml] +output = coverage.xml diff --git a/.env_template b/.env_template index 85c172c3..fb2e84a4 100644 --- a/.env_template +++ b/.env_template @@ -13,4 +13,4 @@ COMPOSE_FILE=docker/default.yml:docker/local.override.yml #COMPOSE_FILE=docker/default.yml:docker/local.override.yml:docker/elasticsearch.yml # If you want to run a specific version, populate this -# REACT_APP_INTELOWL_VERSION="2.1.0" +# REACT_APP_INTELOWL_VERSION="3.0.0" diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..26310858 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,67 @@ +# ============================================================================= +# Git Attributes Configuration for GreedyBear +# Ensures consistent line endings across all platforms +# ============================================================================= + +# Default behavior: Auto-detect text files and normalize to LF +* text=auto eol=lf + +# ----------------------------------------------------------------------------- +# Text files (normalize to LF) +# ----------------------------------------------------------------------------- + +# Python +*.py text eol=lf + +# JavaScript/React +*.js text eol=lf +*.jsx text eol=lf +*.mjs text eol=lf +*.cjs text eol=lf + +# Styles +*.css text eol=lf +*.scss text eol=lf + +# Web +*.html text eol=lf + +# Config files +*.json text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.toml text eol=lf +*.conf text eol=lf + +# Documentation +*.md text eol=lf +*.txt text eol=lf + +# Shell scripts +*.sh text eol=lf + +# Docker +Dockerfile text eol=lf +Dockerfile_nginx text eol=lf + +# Git +.gitignore text eol=lf +.gitattributes text eol=lf + +# ----------------------------------------------------------------------------- +# Binary files (do not normalize) +# ----------------------------------------------------------------------------- + +# Images +*.png binary +*.ico binary + +# ----------------------------------------------------------------------------- +# Linguist overrides (GitHub language statistics) +# ----------------------------------------------------------------------------- + +# Exclude from language statistics +*.min.js linguist-vendored +*.min.css linguist-vendored +**/migrations/* linguist-generated +package-lock.json linguist-generated diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml index 42878d62..08c9b4c2 100644 --- a/.github/.pre-commit-config.yaml +++ b/.github/.pre-commit-config.yaml @@ -1,18 +1,32 @@ repos: -- repo: https://github.com/pycqa/flake8 - rev: 7.1.1 +# Python linting with Ruff +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.7 hooks: - - id: flake8 - args: ["--config", ".github/configurations/python_linters/.flake8"] + - id: ruff + name: ruff-lint + args: ["--fix", "--config", "./.github/configurations/python_linters/.ruff.toml"] + - id: ruff-format + args: ["--config", "./.github/configurations/python_linters/.ruff.toml"] -- repo: https://github.com/pycqa/isort - rev: 5.13.2 +# Frontend formatting with Prettier (using local npm) +# Skips if frontend/node_modules doesn't exist (for backend-only contributors) +- repo: local hooks: - - id: isort - args: ["--settings-path", ".github/configurations/python_linters/.isort.cfg", "--filter-files", "--skip", "venv"] + - id: prettier + name: prettier + entry: bash -c 'if [ -d frontend/node_modules ]; then cd frontend && npm run formatter; else echo "Skipping prettier - run npm install in frontend/ to enable"; fi' + language: system + files: ^frontend/src/.*\.(js|jsx)$|^frontend/tests/.*\.(js|jsx)$|^frontend/src/styles/.*\.(css|scss)$ + pass_filenames: false -- repo: https://github.com/psf/black - rev: 24.8.0 +# Frontend linting with ESLint (using local npm) +# Skips if frontend/node_modules doesn't exist (for backend-only contributors) +- repo: local hooks: - - id: black - args: ["--config", ".github/configurations/python_linters/.black"] + - id: eslint + name: eslint + entry: bash -c 'if [ -d frontend/node_modules ]; then cd frontend && npm run lint; else echo "Skipping eslint - run npm install in frontend/ to enable"; fi' + language: system + files: ^frontend/src/.*\.(js|jsx)$|^frontend/tests/.*\.(js|jsx)$ + pass_filenames: false diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 473e1c33..9bd60775 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -1,5 +1,40 @@ # Changelog -From the v1.3.0 afterwards please check the Release Pages on Github for information regarding the changelog +From the v1.3.0 onwards please check the Release Pages on Github for information regarding the changelog + +## Certego .github Package Changelog + +## 2.0.x +### 2.0.0 +#### Features +* Added "release.yml" action to to push containers to AWS ECR +* Added *create_apt_cache.yaml* workflow to cache APT requirements each time a commit is pushed on selected branch and **when the requirements file has changed**. +* Added documentation. +* Added "Ruff" to the list of available Python linters. +#### Bugfix +* Updated python linters also in '_python.yml' workflow (missing from previous release) +* Explicitly disabled `xpack.security` in Elasticsearch container, since it is enabled by default in newer versions of Elasticsearch +* Added missing inputs for "create_linter_requirements_file" action. +#### Changes +* Deprecation of license check table-headers +* Updated Python linters: + * bandit 1.7.9 -> 1.8.3 + * black 24.8.0 -> 25.1.0 + * flake8 7.1.1 -> 7.1.2 + * isort 5.13.2 -> 6.0.1 + * pylint-django 2.5.5 -> 2.6.1 + * pylint 3.2.6 -> 3.3.5 +* Removed `awalsh128/cache-apt-pkgs-action@latest` action and rewrote APT caching using GitHub's `actions/cache/restore@v4` and `actions/cache/save@v4`. +* Added both frontend and backend exclusions on _detect_changes.yaml (paths that won't be considered by git diff) +* Updated CodeQL action v2 -> v3 (v2 has been [deprecated](https://github.blog/changelog/2024-01-12-code-scanning-deprecation-of-codeql-action-v2/) on december '24) +* Removed `setup-python-dependencies` from `codeql/action.yml` since it has no effect anymore. See [this](https://github.blog/changelog/2024-01-23-codeql-2-16-python-dependency-installation-disabled-new-queries-and-bug-fixes/) for more information. +* Linters versions in step `Create requirements-linters.txt` of `_python.yml` action are now computed according to `configurations/python_linters/requirements-linters.txt`. As of now, linter updates are only required in `configurations/python_linters/requirements-linters.txt`. +* Reworked Python requirements caching. +* Updated some Github actions: + * setup-python v4 -> v5 + * action-gh-release v1 -> v2 +* Added "Install system dependencies required by Python packages" step to "Create Python cache" workflow. + +## GreedyBear Changelog ## [v1.2.1](https://github.com/honeynet/GreedyBear/releases/tag/v1.2.1) * Fixes and adjusts in the "Feeds Page" @@ -42,4 +77,5 @@ Added support for all the other available honeypots! (#86) ## [v1.0.0](https://github.com/honeynet/GreedyBear/releases/tag/v1.0.0) ** FIRST RELEASE! ** -A new GUI is available to explore the data with an awesome dashboard! \ No newline at end of file +A new GUI is available to explore the data with an awesome dashboard! + diff --git a/.github/actions/apt_requirements/action.yml b/.github/actions/apt_requirements/action.yml deleted file mode 100644 index 872cbe58..00000000 --- a/.github/actions/apt_requirements/action.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Composite action install apt requirements -description: Composite action install apt requirements -inputs: - working_directory: - description: Working directory - required: true - requirements_file: - description: Requirements file - required: true - -runs: - using: "composite" - steps: - - name: Export apt requirements - id: export-apt-requirements - run: | - PKG=$(cat ${{ inputs.requirements_file }}) - echo apt_packages=$PKG | awk '{print}' ORS=' ' >> $GITHUB_OUTPUT - shell: bash - - - name: Cache apt packages - id: cache-apt-packages - uses: awalsh128/cache-apt-pkgs-action@latest - with: - packages: ${{ steps.export-apt-requirements.outputs.apt_packages }} \ No newline at end of file diff --git a/.github/actions/apt_requirements/restore_apt_cache/README.md b/.github/actions/apt_requirements/restore_apt_cache/README.md new file mode 100644 index 00000000..046b58e4 --- /dev/null +++ b/.github/actions/apt_requirements/restore_apt_cache/README.md @@ -0,0 +1,29 @@ +# Composite action restore APT cache + +This action restores an APT cache from GitHub's cache. + +Combined with [**save_apt_cache**](../save_apt_cache/README.md), it helps save time by avoiding the download of APT requirements. + +The action is composed of five steps: + +1. **Compute APT requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute a single SHA256 hash of the APT requirements file described by the *apt_rquirements_file_path* input variable. The computed SHA256 hash will be part of the cache key. +2. **Backup `/var/cache/apt/archives permissions`** - This step backs up the permissions associated to the `/var/cache/apt/archives` directory. So, after restoring the APT cache they can be restored to the original ones. +3. **Add write permissions for all to `/var/cache/apt/archives`** - This step sets the write permission to the `/var/cache/apt/archives`. This is crucial because the [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) GitHub's action needs to be able to write to it. Without setting the correct write permission, a permission error is raised. +4. **Restore APT cache** - This step restores the APT cache. It uses the GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action with the following parameters: + * **path** - A list of files, directories, or paths to restore - set to `/var/cache/apt/archives/*.deb`. + * **key** - An explicit key for a cache entry - set to the combination of three strings: + * *git_reference*, provided as an input to the action. + * A static part, `-apt-` + * The previously computed SHA256 hash of the APT requirements file. +5. **Restore original permissions to `/var/cache/apt/archives` and delete backup** - This step restore the original permissions to the `/var/cache/apt/archives` directory. Finally, the backup file is deleted. + +## Documentation + +### Inputs + +* **apt_requirements_file_path** - Required - Path to the APT requirements file. It will be used to compute a SHA256 hash used in the cache key. +* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`. + +### Outputs + +* **cache-hit** - A boolean value which is true when APT cache is found in the GitHub's cache, false otherwise. diff --git a/.github/actions/apt_requirements/restore_apt_cache/action.yml b/.github/actions/apt_requirements/restore_apt_cache/action.yml new file mode 100644 index 00000000..282935bd --- /dev/null +++ b/.github/actions/apt_requirements/restore_apt_cache/action.yml @@ -0,0 +1,64 @@ +name: Composite action restore APT cache +description: Composite action to restore APT cache +inputs: + apt_requirements_file_path: + description: Path to the APT requirements file + required: true + git_reference: + description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key. + required: false + default: ${{ github.ref_name }} + +outputs: + cache-hit: + description: Whether the APT cache was found in the GitHub's cache or not. + value: ${{ steps.restore_apt_cache.outputs.cache-hit }} + + +runs: + using: "composite" + steps: + - name: Compute APT requirements file SHA256 hash + id: compute_apt_requirements_file_sha256_hash + uses: ./.github/actions/misc/compute_files_hash + with: + file_paths: ${{ inputs.apt_requirements_file_path }} + + - name: Backup /var/cache/apt/archives permissions + id: backup_apt_cache_dir_permissions + run: | + PERMISSIONS_FILE_PATH="/tmp/apt_cache_dir_permissions.facl" + echo "apt_cache_dir_permissions_file=$PERMISSIONS_FILE_PATH" > $GITHUB_OUTPUT + sudo getfacl -p /var/cache/apt/archives > $PERMISSIONS_FILE_PATH + ARCHIVES_PERMISSIONS=$(ls -ld /var/cache/apt/archives) + echo "::debug::Original permissions given to /var/cache/apt/archives: $ARCHIVES_PERMISSIONS" + echo "::debug::Created /var/cache/apt/archives permissions backup to $PERMISSIONS_FILE_PATH" + shell: bash + + # Vital to be able to restore cache + # If write permission is not set, a permissions error will be raised + - name: Add write permission for all to /var/cache/apt/archives + run: | + sudo chmod a+w /var/cache/apt/archives + ARCHIVES_NEW_PERMISSIONS=$(ls -ld /var/cache/apt/archives) + echo "::debug::New permissions given to /var/cache/apt/archives: $ARCHIVES_NEW_PERMISSIONS" + shell: bash + + - name: Restore APT cache + uses: actions/cache/restore@v4 + id: restore_apt_cache + with: + path: /var/cache/apt/archives/*.deb + key: ${{ inputs.git_reference }}-apt-${{ steps.compute_apt_requirements_file_sha256_hash.outputs.computed_hash }} + + - name: Restore original permissions to /var/cache/apt/archives and delete backup + run: | + PERMISSIONS_FILE_PATH=${{ steps.backup_apt_cache_dir_permissions.outputs.apt_cache_dir_permissions_file }} + sudo setfacl --restore="$PERMISSIONS_FILE_PATH" + ARCHIVES_RESTORED_PERMISSIONS=$(ls -ld /var/cache/apt/archives) + echo "::debug::Restored original permissions to /var/cache/apt/archives: $ARCHIVES_RESTORED_PERMISSIONS" + if [[ -f "$PERMISSIONS_FILE_PATH" ]]; then + sudo rm "$PERMISSIONS_FILE_PATH" + echo "::debug::Correctly removed $PERMISSIONS_FILE_PATH permissions backup file" + fi + shell: bash \ No newline at end of file diff --git a/.github/actions/apt_requirements/save_apt_cache/README.md b/.github/actions/apt_requirements/save_apt_cache/README.md new file mode 100644 index 00000000..4d8dca82 --- /dev/null +++ b/.github/actions/apt_requirements/save_apt_cache/README.md @@ -0,0 +1,22 @@ +# Composite action save APT cache + +This action saves the APT cache, almost always located at `/var/cache/apt/archives/*.deb` to the GitHub's cache. + +Combined with [**restore_apt_cache**](../restore_apt_cache/README.md) helps save time by avoiding the download of APT requirements. + +The action is composed of two steps: + +1. **Compute APT requirements file SHA256 hash** - This step uses the [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute the SHA256 hash of the APT requriments file that will be part of the cache key. +2. **Save APT cache** - This step does the real caching on GitHub. The GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) is used with the following parameters: + 1. **path** - A list of files, directories, or paths to cache - set to `/var/cache/apt/archives/*.deb` to save all `*.deb` files in APT cache. + 2. **key** - An explicit key for a cache entry - set to the combination of three strings: + 1. *git_reference*, provided as an input to the action. + 2. A static part, `-apt-` + 3. The previously computed SHA256 hash of the APT requirements file. + +## Documentation + +### Inputs + +* **apt_requirements_file_path** - Required - Path to the APT requirements file. It will be used to compute a SHA256 hash used in the cache key. +* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`. diff --git a/.github/actions/apt_requirements/save_apt_cache/action.yml b/.github/actions/apt_requirements/save_apt_cache/action.yml new file mode 100644 index 00000000..af41cfde --- /dev/null +++ b/.github/actions/apt_requirements/save_apt_cache/action.yml @@ -0,0 +1,24 @@ +name: Composite action save APT cache +description: Composite action to save APT cache +inputs: + apt_requirements_file_path: + description: Path to the APT requirements file + required: true + git_reference: + description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key. + required: false + default: ${{ github.ref_name }} + +runs: + using: "composite" + steps: + - name: Compute APT requiremments file SHA256 hash + id: compute_apt_requirements_file_sha256_hash + uses: ./.github/actions/misc/compute_files_hash + with: + file_paths: ${{ inputs.apt_requirements_file_path }} + - name: Save APT cache + uses: actions/cache/save@v4 + with: + path: /var/cache/apt/archives/*.deb + key: ${{ inputs.git_reference }}-apt-${{ steps.compute_apt_requirements_file_sha256_hash.outputs.computed_hash }} \ No newline at end of file diff --git a/.github/actions/codeql/action.yml b/.github/actions/codeql/action.yml index b49e2b60..22c16e19 100644 --- a/.github/actions/codeql/action.yml +++ b/.github/actions/codeql/action.yml @@ -12,13 +12,12 @@ runs: using: "composite" steps: - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ inputs.language }} - setup-python-dependencies: false source-root: ${{ inputs.working_directory }} - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 diff --git a/.github/actions/misc/compute_files_hash/README.md b/.github/actions/misc/compute_files_hash/README.md new file mode 100644 index 00000000..f1c594f3 --- /dev/null +++ b/.github/actions/misc/compute_files_hash/README.md @@ -0,0 +1,18 @@ +# Composite action compute files hash + +This action computes a single SHA256 hash of one or more files. +Given a **space separated list of file paths**, a new file is created by concatenating all those files together. Then the SHA256 hash of the newly created file is computed and returned as the output. + +Before being joined together, each file is tested to ensure that it **exists** and that it is **a regular file**. + +This action is useful when saving/restoring a cache in which a unique key is required. As a matter of fact, the hash is used as a part of the hash key. + +## Documentation + +### Inputs + +* `file_paths` - Mandatory - Space separated list of file paths for which a single SHA256 hash will be computed. + +### Outputs + +* `computed_hash` - A SHA256 hash of the file obtained by joining (concatenating) all input files together. diff --git a/.github/actions/misc/compute_files_hash/action.yml b/.github/actions/misc/compute_files_hash/action.yml new file mode 100644 index 00000000..fca2a53a --- /dev/null +++ b/.github/actions/misc/compute_files_hash/action.yml @@ -0,0 +1,40 @@ +name: Composite action compute files hash +description: Composite action to compute a single hash of one or more files +inputs: + file_paths: + description: Space separeted list of files for which a single SHA256 hash will be computed. + required: true + +outputs: + computed_hash: + description: The hash of the concatenated files + value: ${{ steps.compute_files_sha256_hash.outputs.computed_hash }} + +runs: + using: "composite" + steps: + - name: Compute files SHA256 hash + id: compute_files_sha256_hash + run: | + if [[ -z '${{ inputs.file_paths }}' ]]; then + echo "::error::file_paths cannot be empty!" + exit 1 + fi + JOINED_FILES="cat " + # Create a bash array of file paths + for file in ${{ inputs.file_paths }}; + do + if [[ -f $file ]]; then + # Concat file path to cat command + JOINED_FILES+="$file " + echo "::debug::Current file is $file" + echo "::debug::JOINED_FILES variable state is $JOINED_FILES" + else + echo "::error::$file does not exist or it is not a regular file!" + exit 1 + fi + done + COMPUTED_HASH=$($JOINED_FILES | sha256sum | cut -d ' ' -f 1) + echo "::debug::Hash is $COMPUTED_HASH" + echo "computed_hash=$COMPUTED_HASH" >> $GITHUB_OUTPUT + shell: bash \ No newline at end of file diff --git a/.github/actions/push_on_ecr/action.yml b/.github/actions/push_on_ecr/action.yml new file mode 100644 index 00000000..f130e595 --- /dev/null +++ b/.github/actions/push_on_ecr/action.yml @@ -0,0 +1,56 @@ +name: Composite action push on ecr +description: Composite action push on ecr +inputs: + repository: + description: Repository name + required: true + dockerfile: + description: Path for dockerfile from working directory + required: true + working_directory: + description: Docker build context + required: true + + aws_account_id: + description: Aws User code + required: true + aws_access_key: + description: Aws access key + required: true + aws_secret_access_key: + description: Aws secret access key + required: true + image_tag: + description: Directory that must be run against the linters + required: true + + aws_region: + description: Aws region + required: true + +runs: + using: "composite" + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ inputs.aws_region}} + aws-access-key-id: ${{ inputs.aws_access_key }} + aws-secret-access-key: ${{ inputs.aws_secret_access_key }} + + - name: Login to Amazon ECR Private + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: ${{ inputs.working_directory }} + push: true + cache-from: type=gha + cache-to: type=gha,mode=max + tags: ${{inputs.aws_account_id}}.dkr.ecr.${{inputs.aws_region}}.amazonaws.com/${{ inputs.repository }}:${{ inputs.image_tag }} + file: ${{ inputs.working_directory }}/${{ inputs.dockerfile }} diff --git a/.github/actions/python_linter/action.yml b/.github/actions/python_linter/action.yml index 8c285e00..1bd46376 100644 --- a/.github/actions/python_linter/action.yml +++ b/.github/actions/python_linter/action.yml @@ -5,6 +5,12 @@ inputs: description: Directory that must be run against the linters required: true + use_autoflake: + description: Use autoflake + required: true + use_bandit: + description: Use bandit linter + required: true use_black: description: Use black formatter required: true @@ -17,11 +23,11 @@ inputs: use_pylint: description: Use pylint linter required: true - use_bandit: - description: Use bandit linter + use_ruff_formatter: + description: Use ruff formatter required: true - use_autoflake: - description: Use autoflake + use_ruff_linter: + description: Use ruff linter required: true runs: @@ -66,11 +72,9 @@ runs: else echo "Skipping isort linter" fi - working-directory: ${{ inputs.working_directory }} shell: bash - - name: bandit run: | if [[ ${{inputs.use_bandit }} != 'false' ]]; then @@ -78,7 +82,6 @@ runs: else echo "Skipping bandit linter" fi - working-directory: ${{ inputs.working_directory }} shell: bash @@ -90,4 +93,24 @@ runs: echo "Skipping autoflake" fi working-directory: ${{ inputs.working_directory }} + shell: bash + + - name: ruff formatter + run: | + if [[ ${{ inputs.use_ruff_formatter }} != 'false' ]]; then + ruff format --config ${GITHUB_WORKSPACE}/.github/configurations/python_linters/.ruff.toml --diff . + else + echo "Skipping ruff formatter" + fi + working-directory: ${{ inputs.working_directory }} + shell: bash + + - name: ruff linter + run: | + if [[ ${{ inputs.use_ruff_linter }} != 'false' ]]; then + ruff check --config ${GITHUB_WORKSPACE}/.github/configurations/python_linters/.ruff.toml . + else + echo "Skipping ruff linter" + fi + working-directory: ${{ inputs.working_directory }} shell: bash \ No newline at end of file diff --git a/.github/actions/python_requirements/create_dev_requirements_file/README.md b/.github/actions/python_requirements/create_dev_requirements_file/README.md new file mode 100644 index 00000000..ae32be02 --- /dev/null +++ b/.github/actions/python_requirements/create_dev_requirements_file/README.md @@ -0,0 +1,13 @@ +# Composite action create Python dev requirements file + +This action creates the `requirements-dev.txt` file which will contain all **development dependencies**. + +As of today, the only development dependency supported is `coverage`. + +## Documentation + +### Inputs + +* **install_from** - Optional - The path used as working directory when creating the `requirements-dev.txt` file. It defaults to the current directory (i.e. `.`). +* **project_dev_requirements_file** - Optional - The path of a project `requirements-dev.txt`. This was designed in case development requirements other than coverage are required. If specified, the dependencies in the project `requirements-dev.txt` will be appended in the newly created `requirements-dev.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-dev.txt`**. +* **use_coverage** - Optional - Whether to use coverage or not. It defaults to false. diff --git a/.github/actions/python_requirements/create_dev_requirements_file/action.yml b/.github/actions/python_requirements/create_dev_requirements_file/action.yml new file mode 100644 index 00000000..b11c58f2 --- /dev/null +++ b/.github/actions/python_requirements/create_dev_requirements_file/action.yml @@ -0,0 +1,22 @@ +name: Composite action create Python dev requirements file +description: Composite action to create Python dev requirements file +inputs: + install_from: + description: Directory that must be used to install the packages + required: false + default: . + project_dev_requirements_file: + description: An additional project dev requirements file + required: false + +runs: + using: "composite" + steps: + - name: Create requirements-dev.txt + run: | + echo > requirements-dev.txt + if [[ -n '${{ inputs.project_dev_requirements_file }}' ]];then + cat $(echo ${{ inputs.project_dev_requirements_file }}) >> requirements-dev.txt + fi + shell: bash + working-directory: ${{ inputs.install_from }} \ No newline at end of file diff --git a/.github/actions/python_requirements/create_docs_requirements_file/README.md b/.github/actions/python_requirements/create_docs_requirements_file/README.md new file mode 100644 index 00000000..913192f8 --- /dev/null +++ b/.github/actions/python_requirements/create_docs_requirements_file/README.md @@ -0,0 +1,12 @@ +# Composite action create Python docs requirements file + +This action creates the `requirements-docs.txt` file. This is a Python requirements file that will contain all **dependencies required to build the documentation**. + +## Documentation + +### Inputs + +* **install_from** - Optional - The path used as working directory when creating the `requirements-docs.txt` file. It defaults to the current directory (i.e. `.`). +* **project_docs_requirements_file** - Optional - The path of a project `requirements-docs.txt`. This was designed in case requirements to build documentation other than rstcheck, sphinx, sphinx_rtd_theme, sphinxcontrib-spelling and sphinxcontrib-django2 are required. If specified, the dependencies in the project `requirements-docs.txt` will be appended in the newly created `requirements-docs.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-docs.txt`**. +* **django_settings_module** - Optional - Path to the Django settings file. It's used to make GitHub action aware of Django presence. In this case, `sphinxcontrib-django2` is also added to the newly created requirement file. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no Django settings file**. +* **check_docs_directory** - Optional - Path that will be used by rstcheck to check documentation. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **documentation won't be checked**. diff --git a/.github/actions/python_requirements/create_docs_requirements_file/action.yml b/.github/actions/python_requirements/create_docs_requirements_file/action.yml new file mode 100644 index 00000000..fb674f80 --- /dev/null +++ b/.github/actions/python_requirements/create_docs_requirements_file/action.yml @@ -0,0 +1,37 @@ +name: Composite action create Python docs requirements file +description: Composite action to create Python docs requirements file +inputs: + install_from: + description: Directory that must be used to install the packages + required: false + default: . + project_docs_requirements_file: + description: An additional project docs requirements file + required: false + django_settings_module: + description: Path to the django settings file + required: false + check_docs_directory: + description: Check docs using rstcheck inside this directory + required: false + +runs: + using: "composite" + steps: + - name: Create requirements-docs.txt + run: | + echo > requirements-docs.txt + if [[ -n '${{ inputs.check_docs_directory }}' ]]; then + echo "rstcheck[sphinx]" >> requirements-docs.txt + echo "sphinx==7.2.6" >> requirements-docs.txt + echo "sphinx_rtd_theme==1.3.0" >> requirements-docs.txt + echo "sphinxcontrib-spelling==8.0.0" >> requirements-docs.txt + if [[ -n '${{ inputs.django_settings_module }}' ]]; then + echo "sphinxcontrib-django2==1.9" >> requirements-docs.txt + fi + if [[ -z '${{ inputs.project_docs_requirements_file }}' ]];then + cat $(echo ${{ inputs.project_docs_requirements_file }}) >> requirements-docs.txt + fi + fi + shell: bash + working-directory: ${{ inputs.install_from }} \ No newline at end of file diff --git a/.github/actions/python_requirements/create_linter_requirements_file/README.md b/.github/actions/python_requirements/create_linter_requirements_file/README.md new file mode 100644 index 00000000..fafbb9b1 --- /dev/null +++ b/.github/actions/python_requirements/create_linter_requirements_file/README.md @@ -0,0 +1,32 @@ +# Composite action create Python linter requirements file + +This action creates the `requirements-linters.txt` file which will contain all **linter dependencies** required by the CI. +The user can then choose which linters will be run, and hence written to the `requirements-linters.txt`, by the CI by setting some flags to true like *use_black*. + +As of today only the following linters are supported: + +* `autoflake` +* `bandit` +* `black` +* `flake8` +* `flake8-django` +* `isort` +* `pylint` +* `pylint-django` +* `ruff` + +## Documentation + +### Inputs + +* **install_from** - Optional - The path used as working directory when creating the `requirements-linters.txt` file. It defaults to the current directory (i.e. `.`). +* `project_linter_requirements_file` - Optional - The path of a project `requirements-linters.txt`. This was designed in case requirements for linters other than `autoflake`, `bandit`, `black`, `flake8`, `flake8-django`, `isort`, `pylint` and `pylint-django` are required. If specified, the dependencies in the project `requirements-linters.txt` will be appended in the newly created `requirements-linters.txt`. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no custom `requirements-linters.txt`**. +* **django_settings_module** - Optional - Path to the Django settings file. It's used to make GitHub action aware of Django presence. In the case of a Django project, `flake8-django` and `pylint-django`, may be used and hence they will be added to the newly created requirements file. **Be careful: if a relative path is used this will depend on *install_from*.** Defaults to empty strings, and hence **no Django settings file**. +* **use_autoflake** - Optional - Flag to state whether to use or not `autoflake` linter. It defaults to false. +* **use_bandit** - Optional - Flag to state whether to use or not `bandit` linter. It defaults to false. +* **use_black** - Optional - Flag to state whether to use `black` formatter. It defaults to false. +* **use_flake8** - Optional - Flag to state whether to use or not `flake8` linter. It defaults to false. +* **use_isort** - Optional - Flag to state whether to use or not `isort` formatter. It defaults to false. +* **use_pylint** - Optional - Flag to state whether to use or not `pylint` linter. It defaults to false. +* **use_ruff_formatter** - Optional - Flag to state whether to use `ruff` **formatter** (so without the linting). It defaults to false. +* **use_ruff_linter** - Optional - Flag to state whether to use `ruff` **linter** (so without the formatting). It defaults to false. diff --git a/.github/actions/python_requirements/create_linter_requirements_file/action.yml b/.github/actions/python_requirements/create_linter_requirements_file/action.yml new file mode 100644 index 00000000..b7ac0923 --- /dev/null +++ b/.github/actions/python_requirements/create_linter_requirements_file/action.yml @@ -0,0 +1,103 @@ +name: Composite action create Python linter requirements file +description: Composite action to create Python linter requirements file +inputs: + install_from: + description: Directory that must be used to install the packages + required: false + default: . + project_linter_requirements_file: + description: An additional project linter requirements file + required: false + django_settings_module: + description: Path to the django settings file + required: false + use_autoflake: + description: Use autoflake linter + required: false + use_bandit: + description: Use bandit linter + required: false + use_black: + description: Use black formatter + required: false + use_flake8: + description: Use flake8 linter + required: false + use_isort: + description: Use isort formatter + required: false + use_pylint: + description: Use pylint linter + required: false + use_ruff_formatter: + description: Use ruff formatter + required: false + use_ruff_linter: + description: Use ruff linter + required: false + + +runs: + using: "composite" + steps: + - name: Create requirements-linters.txt + run: | + function check_linter_dependency_and_append_to_file { + # + # Function to check whether a specific linter is in the requirements file + # If it can be found inside the requirements, said linter dependency will be appended to a newly created requirements-linter.txt file. + # If the linter is not found inside the requirements file an error will be raised. + # + # 1st parameter: Name of the linter. + # 2nd parameter: Path of the requirements file. + # + if [[ -z $(grep -P "^$1[^a-zA-Z0-9_-].*" "$2") ]]; then + echo "::error::$1 dependency not found in $2 file!" + exit 1 + else + echo "$1 dependency found in $2!" + echo "$(grep -P ^$1[^a-zA-Z0-9_-].* $2)" >> requirements-linters.txt + fi + } + CI_REQUIREMENTS_LINTERS="${GITHUB_WORKSPACE}/.github/configurations/python_linters/requirements-linters.txt" + echo > requirements-linters.txt + + if [[ '${{ inputs.use_black }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "black" "$CI_REQUIREMENTS_LINTERS" + fi + + if [[ '${{ inputs.use_isort }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "isort" "$CI_REQUIREMENTS_LINTERS" + fi + + if [[ '${{ inputs.use_flake8 }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "flake8" "$CI_REQUIREMENTS_LINTERS" + if [[ -n '${{ inputs.django_settings_module }}' ]]; then + check_linter_dependency_and_append_to_file "flake8-django" "$CI_REQUIREMENTS_LINTERS" + fi + fi + + if [[ '${{ inputs.use_pylint }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "pylint" "$CI_REQUIREMENTS_LINTERS" + if [[ -n '${{ inputs.django_settings_module }}' ]]; then + check_linter_dependency_and_append_to_file "pylint-django" "$CI_REQUIREMENTS_LINTERS" + fi + fi + + if [[ '${{ inputs.use_bandit }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "bandit" "$CI_REQUIREMENTS_LINTERS" + fi + + if [[ '${{ inputs.use_autoflake }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "autoflake" "$CI_REQUIREMENTS_LINTERS" + fi + + if [[ '${{ inputs.use_ruff_formatter }}' != 'false' || '${{ inputs.use_ruff_linter }}' != 'false' ]]; then + check_linter_dependency_and_append_to_file "ruff" "$CI_REQUIREMENTS_LINTERS" + fi + + if [[ -z '${{ inputs.project_linter_requirements_file }}' ]]; then + cat $(echo ${{ inputs.project_linter_requirements_file }}) >> requirements-linters.txt + fi + shell: bash + working-directory: ${{ inputs.install_from }} \ No newline at end of file diff --git a/.github/actions/python_requirements/create_virtualenv/README.md b/.github/actions/python_requirements/create_virtualenv/README.md new file mode 100644 index 00000000..8f3361a6 --- /dev/null +++ b/.github/actions/python_requirements/create_virtualenv/README.md @@ -0,0 +1,20 @@ +# Composite action create Python virtual environment + +This GitHub action creates a Python virtual environment using Python's `venv` module. + +When the *activate_only* flag set is to true, the virtual environment at *virtualenv_path* will only be activated—**no creation will take place**. + +NOTE: + +To activate a Python virtual environment, the `activate` script is often used. +However, in a GitHub Action environment, this is not enough because environment variables are "lost" at the end of the Action. For this we need to do two things: + +1. Append the `VIRTUAL_ENV` environment variable to the `GITHUB_ENV` environment file. The [`GITHUB_ENV`](https://docs.github.com/en/enterprise-cloud@latest/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable) files makes environment variables available to any subsequent steps in a workflow job. Finally, it's important to note that `VIRTUAL_ENV` variable is created by the `activate` script and contains the path to the virtual environment. +2. Prepend the virtual environment's `bin` path to the system PATH. To allow also any subsequent steps in a workflow to be able to use it, [`GITHUB_PATH`](https://docs.github.com/en/enterprise-cloud@latest/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-system-path) is employed. + +## Documentation + +### Inputs + +* **virtualenv_path** - Optional - The path where the virtual environment will be created. It defaults to `.venv`. +* **activate_only** - Optional - Flag that states whether to only activate the virtual environment. If false, a new virtual environment will be created before being activated. It defaults to false. \ No newline at end of file diff --git a/.github/actions/python_requirements/create_virtualenv/action.yml b/.github/actions/python_requirements/create_virtualenv/action.yml new file mode 100644 index 00000000..02dcb936 --- /dev/null +++ b/.github/actions/python_requirements/create_virtualenv/action.yml @@ -0,0 +1,28 @@ +name: Composite action create Python virtual environment +description: Composite action create Python virtual environment +inputs: + virtualenv_path: + description: Python's virtual environment path. + required: false + default: ".venv" + activate_only: + description: Whether to create the virtual environment or only activate it. + required: false + default: false + +runs: + using: "composite" + steps: + - name: Python's virtualenv creation + if: inputs.activate_only != 'true' + run: python -m venv ${{ inputs.virtualenv_path }} + shell: bash + - name: Activate newly created virtualenv + id: activate_newly_created_virtualenv + run: | + source ${{ inputs.virtualenv_path }}/bin/activate + echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV + echo "::debug::Virtual environment path is $VIRTUAL_ENV" + echo "$VIRTUAL_ENV/bin" >> $GITHUB_PATH + echo "::debug::PATH environment variable state after $VIRTUAL_ENV/bin path being added to it: $GITHUB_PATH" + shell: bash diff --git a/.github/actions/python_requirements/restore_pip_cache/README.md b/.github/actions/python_requirements/restore_pip_cache/README.md new file mode 100644 index 00000000..92a2a2fd --- /dev/null +++ b/.github/actions/python_requirements/restore_pip_cache/README.md @@ -0,0 +1,41 @@ +# Composite action restore pip cache + +This action restores the pip download cache from GitHub's cache. + +The action is composed of four steps: + +1. **Generate random UUID** - This step computes a random UUID, using the shell command `uuidgen`, which will be part of the cache key. Since pip cache will always be restored when a virtual environment is not found on GitHub's cache, a random UUID is required to generate a cache miss. +2. **Get pip cache directory** - This step retrieves the path to the pip cache. If *custom_pip_cache_path* is not an empty string, it will be used as pip cache path. Otherwise, the pip cache will be computed using `pip cache dir`. +3. **Restore pip cache** - This step performs the heavy lifting of the restoring. Using GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action, the cache is restored using a **partial match**. This is performed by setting the following [inputs](https://github.com/actions/cache/tree/main/restore#inputs): + 1. **key** - an explicit key for a cache entry - will be set to a random UUID which will always trigger a cache miss. + 2. **path** - a list of files, directories, paths to restore - will be set to the pip download cache path. + 3. **restore-keys** - an ordered list of prefix-matched keys to use for restoring stale cache if no cache hit occurred for key - will be set to `-pip-cache-` to restore the most recent pip cache for the chosen git reference. +4. **Explain cache output** - This step analyze the results of the [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action and sets *real_cache_hit* environment variable to true if there was a match, false otherwise. This is necessary because, in the case of a **partial match**, the *cache-hit*, output of [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md), will be false. Instead, we use the `cache-matched-key`, another output of [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md), which contains a reference for both **partial** and full matches, but will be empty in the case of a cache miss. + +NOTE: + +This action, despite seeming a bit unusual, is correct because GitHub does not allow cache updates or overwrites. + +Let's think about a real-world scenario: + +A user updates the requirements file. + +In this case our query to GitHub's cache for the previously cached virtual environment will **always** miss. This happens because changing the requirements file results in a new SHA256 hash, so the cache key changes. + +Thus, we aim to restore the pip cache to at least *mitigate* the impact of the changes in the requirements. Specifically, we want to save time by avoiding the download of packages that did not change. + +Next, we try to query the GitHub's cache for the previously cached pip cache. However, there are a few issues: + +1. We cannot use the SHA256 of the requirements file because it has changed, leading to cache misses. +2. We cannot create a cache key without a random component because, as said earlier, GitHub does not allow overwriting or updating of a cache item. For example, a cache key like `develop-pip-cache-` would generate an error when attempting to save a new cache if one already exists with the same name. + +## Documentation + +### Inputs + +* **custom_pip_cache** - Optional - Path to the pip cache. It can be used for setting a custom pip cache path. It defaults to an empty string. In this case, the pip cache path will be computed using `pip cache dir`. More information regarding the previous command is available [here](https://pip.pypa.io/en/stable/cli/pip_cache/#description) +* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`. + +### Outputs + +* **cache-hit** - A boolean value which states whether pip cache was found on GitHub's cache or not. diff --git a/.github/actions/python_requirements/restore_pip_cache/action.yml b/.github/actions/python_requirements/restore_pip_cache/action.yml new file mode 100644 index 00000000..e4568f79 --- /dev/null +++ b/.github/actions/python_requirements/restore_pip_cache/action.yml @@ -0,0 +1,53 @@ +name: Composite action restore pip cache +description: Composite action to restore pip cache +inputs: + custom_pip_cache_path: + description: Path to pip cache. + required: false + git_reference: + description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key. + required: false + default: ${{ github.ref_name }} + +outputs: + cache-hit: + description: Whether pip cache was found in the cache or not. + value: ${{ steps.explain_cache_output.outputs.real_cache_hit }} + +runs: + using: "composite" + steps: + - name: Generate random UUID + id: generate_random_uuid + run: | + random_uuid=$(uuidgen -r) + echo "::debug::Random uuid generated is $random_uuid. Should only cause a cache-miss" + echo "computed_uuid=$random_uuid" >> $GITHUB_OUTPUT + shell: bash + - name: Get pip cache directory + id: get_pip_cache_directory + run: | + if [[ -z '${{ inputs.custom_pip_cache_path }}' ]]; then + echo "pip_cache_path=$(pip cache dir)" >> $GITHUB_OUTPUT + else + echo "pip_cache_path=${{ inputs.custom_pip_cache_path }}" >> $GITHUB_OUTPUT + fi + echo "::debug::Pip cache path $pip_cache_path" + shell: bash + - name: Restore pip cache + id: restore_pip_cache + uses: actions/cache/restore@v4 + with: + key: ${{ steps.generate_random_uuid.outputs.computed_uuid }} + path: ${{ steps.get_pip_cache_directory.outputs.pip_cache_path }} + restore-keys: ${{ inputs.git_reference }}-pip-cache- + - name: Explain cache output + id: explain_cache_output + run: | + echo "::debug::Restore action for pip's cache returned cache-hit: ${{ steps.restore_pip_cache.outputs.cache-hit }} with cache-matched-key: ${{ steps.restore_pip_cache.outputs.cache-matched-key }}" + if [[ -z '${{ steps.restore_pip_cache.outputs.cache-matched-key }}' ]]; then + echo "real_cache_hit=false" >> $GITHUB_OUTPUT + else + echo "real_cache_hit=true" >> $GITHUB_OUTPUT + fi + shell: bash \ No newline at end of file diff --git a/.github/actions/python_requirements/restore_virtualenv/README.md b/.github/actions/python_requirements/restore_virtualenv/README.md new file mode 100644 index 00000000..e40a3c1c --- /dev/null +++ b/.github/actions/python_requirements/restore_virtualenv/README.md @@ -0,0 +1,30 @@ +# Composite action restore Python virtual environment + +This action restores a Python virtual environment from GitHub's cache. + +Combined with [**save_virtualenv**](../save_virtualenv/README.md), **it helps save time by avoiding the installation of Python requirements**. + +The action is composed of three steps: + +1. **Compute requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) action to compute a single SHA256 hash of the files described by the *requirements_paths*. The computed SHA256 hash will be part of the cache key. +2. **Restore virtual environment** - This step does the heavy lifting of restoring the virtual environment from GitHub's cache. It uses the GitHub's [**cache/restore**](https://github.com/actions/cache/blob/main/restore/README.md) action with the following parameters: + * **path** - A list of files, directories, or paths to restore - set to the virtual environment path input variable *virtual_environment_path*. + * **key** - An explicit key for a cache entry - set to the combination of three strings: + * *git_reference*, provided as an input to the action. + * A static part, `-venv-` + * The previously computed SHA256 hash of the requirements files. +3. **Activate restored virtual environment** - If the Python virtual environment was found in the GitHub's cache, it needs to be activated. This is performed using [**python_requirements/create_virtualenv**](../create_virtualenv/README.md) action with the following parameters: + * **virtualenv_path** - set to the Python virtual environment path. + * **activate_only** - set to true because it doesn't need to be created. + +## Documentation + +### Inputs + +* **virtual_environment_path** - Optional - Path where the virtual environment is located. It may be used to provide a custom path for the virtual environment. It defaults to `.venv`. +* **requirements_paths** - Required - A space separated list of requirements file paths. They will be used to compute a SHA256 hash used in the cache key. It defaults to an empty string. +* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`. + +### Outputs + +* **cache-hit** - A boolean value which is true when virtual environment is found in the GitHub's cache, false otherwise. diff --git a/.github/actions/python_requirements/restore_virtualenv/action.yml b/.github/actions/python_requirements/restore_virtualenv/action.yml new file mode 100644 index 00000000..cd76c98e --- /dev/null +++ b/.github/actions/python_requirements/restore_virtualenv/action.yml @@ -0,0 +1,43 @@ +name: Composite action restore Python virtual environment +description: Composite action to restore Python virtual environment +inputs: + virtual_environment_path: + description: Path to where virtual environment will be restored. + required: false + default: ".venv" + requirements_paths: + description: Space separeted list of requirement files. They will be used to compute the hash for the cache key. + required: true + git_reference: + description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key. + required: false + default: ${{ github.ref_name }} + +outputs: + cache-hit: + description: Whether virtual environment was found in the cache or not. + value: ${{ steps.restore_virtual_environment.outputs.cache-hit }} + +runs: + using: "composite" + steps: + - name: Compute requirements files SHA256 hash + id: compute_requirements_files_sha256_hash + uses: ./.github/actions/misc/compute_files_hash + with: + file_paths: ${{ inputs.requirements_paths }} + + - name: Restore virtual environment + id: restore_virtual_environment + uses: actions/cache/restore@v4 + with: + path: ${{ inputs.virtual_environment_path }} + key: ${{ inputs.git_reference }}-venv-${{ steps.compute_requirements_files_sha256_hash.outputs.computed_hash }} + + - name: Activate restored virtual environment + if: > + steps.restore_virtual_environment.outputs.cache-hit == 'true' + uses: ./.github/actions/python_requirements/create_virtualenv + with: + virtualenv_path: ${{ inputs.virtual_environment_path }} + activate_only: true \ No newline at end of file diff --git a/.github/actions/python_requirements/save_pip_cache/README.md b/.github/actions/python_requirements/save_pip_cache/README.md new file mode 100644 index 00000000..e3950a0c --- /dev/null +++ b/.github/actions/python_requirements/save_pip_cache/README.md @@ -0,0 +1,22 @@ +# Composite action save pip cache + +This action saves the pip download cache. + +Every time a user runs `pip install `, pip downloads the package and all its dependencies.The packages are saved in a directory which, by default, is located at `~/.cache/pip`. +Saving this cache in GitHub's cache allows us to save time when installing those packages. As a matter of fact, before installing packages, pip's cache can be restored using [**restore_pip_cache**](../restore_pip_cache/README.md) action. + +The action is composed of three steps: + +1. **Generate random UUID** - This step computes a random UUID, using shell command `uuidgen`, which will be part of the cache key. The uniqueness of the UUID ensures that there will be no collisions between cache keys, which is crucial because **GitHub won't allow the creation of two caches with the same key** (cache update/overwrite **is not supported**). +2. **Get pip cache directory** - This step retrieves the path to the pip cache. If *custom_pip_cache_path* is not an empty string, it will be used as pip cache path. Otherwise, the pip cache will be computed using `pip cache dir`. +3. **Save pip cache** - This step performs the heavy lifting of the caching. Using GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) action, the cache is saved with a key composed of: + 1. The git reference input, *git_reference* + 2. A static part, `pip-cache` + 3. The previously computed UUID + +## Documentation + +### Inputs + +* **custom_pip_cache** - Optional - Path to the pip cache. It can be used for setting a custom pip cache path. It defaults to an empty string. In this case, the pip cache path will be computed using `pip cache dir`. More information regarding the previous command is available [here](https://pip.pypa.io/en/stable/cli/pip_cache/#description) +* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`. diff --git a/.github/actions/python_requirements/save_pip_cache/action.yml b/.github/actions/python_requirements/save_pip_cache/action.yml new file mode 100644 index 00000000..d98e398d --- /dev/null +++ b/.github/actions/python_requirements/save_pip_cache/action.yml @@ -0,0 +1,36 @@ +name: Composite action save pip cache +description: Composite action to save pip cache +inputs: + custom_pip_cache_path: + description: Path to the pip cache. + required: false + git_reference: + description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key. + required: false + default: ${{ github.ref_name }} + +runs: + using: "composite" + steps: + - name: Generate random UUID + id: generate_random_uuid + run: | + random_uuid=$(uuidgen -r) + echo "::debug::Random uuid generated is $random_uuid" + echo "computed_uuid=$random_uuid" >> $GITHUB_OUTPUT + shell: bash + - name: Get pip cache directory + id: get_pip_cache_directory + run: | + if [[ -z '${{ inputs.custom_pip_cache_path }}' ]]; then + echo "pip_cache_path=$(pip cache dir)" >> $GITHUB_OUTPUT + else + echo "pip_cache_path=${{ inputs.custom_pip_cache_path }}" >> $GITHUB_OUTPUT + fi + echo "::debug::The pip cache path is $pip_cache_path" + shell: bash + - name: Save pip cache + uses: actions/cache/save@v4 + with: + path: ${{ steps.get_pip_cache_directory.outputs.pip_cache_path }} + key: ${{ inputs.git_reference }}-pip-cache-${{ steps.generate_random_uuid.outputs.computed_uuid }} \ No newline at end of file diff --git a/.github/actions/python_requirements/save_virtualenv/README.md b/.github/actions/python_requirements/save_virtualenv/README.md new file mode 100644 index 00000000..19d9ab5f --- /dev/null +++ b/.github/actions/python_requirements/save_virtualenv/README.md @@ -0,0 +1,23 @@ +# Composite action save Python virtual environment + +This action saves a Python virtual environment to GitHub's cache. + +Combined with [**restore_virtualenv**](../restore_virtualenv/README.md), **it helps save time by avoiding the installation of Python requirements**. + +The action is composed of two steps: + +1. **Compute requirements files SHA256 hash** - This step uses [**misc/compute_files_hash**](../../misc/compute_files_hash/README.md) to compute a single SHA256 hash of the files described by the *requirements_paths*. The computed SHA256 hash will be part of the cache key. +2. **Cache virtual environment** - This step does the heavy lifting of saving the virtual environment to GitHub's cache. It uses the GitHub's [**cache/save**](https://github.com/actions/cache/blob/main/save/README.md) action with the following parameters: + 1. **path** - A list of files, directories, or paths to cache - set to the virtual environment path input variable *virtual_environment_path*. + 2. **key** - An explicit key for a cache entry - set to the combination of three strings: + 1. *git_reference*, provided as an input to the action. + 2. A static part, `-venv-` + 3. The previously computed SHA256 hash of the requirements files. + +## Documentation + +### Inputs + +* **virtual_environment_path** - Optional - Path where the virtual environment is located. It may be used to provide a custom path for the virtual environment. It defaults to `.venv`. +* **requirements_paths** - Required - A space separated list of requirements file paths. They will be used to compute a SHA256 hash used in the cache key. +* **git_reference** - Optional - A git reference that will be used to build the cache key. It defaults to `github.ref_name` which is a context variable containing **the short ref name of the branch or tag that triggered the workflow run**. For example it may be `feature-branch-1` or, for pull requests, `/merge`. diff --git a/.github/actions/python_requirements/save_virtualenv/action.yml b/.github/actions/python_requirements/save_virtualenv/action.yml new file mode 100644 index 00000000..6c6c66c1 --- /dev/null +++ b/.github/actions/python_requirements/save_virtualenv/action.yml @@ -0,0 +1,29 @@ +name: Composite action save Python virtual environment +description: Composite action to save Python virtual environment +inputs: + virtual_environment_path: + description: Path to the virtual environment. + required: false + default: ".venv" + requirements_paths: + description: Space separeted list of requirements files. They will be used to compute the hash for the cache key. + required: true + git_reference: + description: A git reference (name of the branch, reference to the PR) that will be used to build the cache key. + required: false + default: ${{ github.ref_name }} + +runs: + using: "composite" + steps: + - name: Compute requirements files SHA256 hash + id: compute_requirements_files_sha256_hash + uses: ./.github/actions/misc/compute_files_hash + with: + file_paths: ${{ inputs.requirements_paths }} + + - name: Cache virtual environment + uses: actions/cache/save@v4 + with: + path: ${{ inputs.virtual_environment_path }} + key: ${{ inputs.git_reference }}-venv-${{ steps.compute_requirements_files_sha256_hash.outputs.computed_hash }} \ No newline at end of file diff --git a/.github/actions/services/action.yml b/.github/actions/services/action.yml index b814a033..95cf2131 100644 --- a/.github/actions/services/action.yml +++ b/.github/actions/services/action.yml @@ -104,6 +104,7 @@ runs: echo " environment:" >> elastic_search.yml echo " ES_JAVA_OPTS: -Xms1g -Xmx1g" >> elastic_search.yml echo " discovery.type: single-node" >> elastic_search.yml + echo " xpack.security.enabled: 'false'" >> elastic_search.yml echo " ports:" >> elastic_search.yml echo " - ${{ inputs.elasticsearch_port }}:9200" >> elastic_search.yml echo " healthcheck:" >> elastic_search.yml diff --git a/.github/configurations/python_linters/.ruff.toml b/.github/configurations/python_linters/.ruff.toml new file mode 100644 index 00000000..3d61ba06 --- /dev/null +++ b/.github/configurations/python_linters/.ruff.toml @@ -0,0 +1,58 @@ +# Top level settings +## Reference: https://docs.astral.sh/ruff/settings/#top-level + +extend-exclude = [ + ".github", + ".idea", + ".vscode", + "**/migrations/*" +] + +include = ["*.py"] + +indent-width = 4 + +line-length = 160 + +output-format = "full" + +respect-gitignore = false + +show-fixes = true + +target-version = "py312" + +# Format settings level +## Reference: https://docs.astral.sh/ruff/settings/#format +[format] + +docstring-code-format = true + +indent-style = "space" + +line-ending = "lf" + +quote-style = "double" + +skip-magic-trailing-comma = false + +[lint] + +select = [ + "E", # pycodestyle errors - https://docs.astral.sh/ruff/rules/#error-e + "W", # pycodestyle warnings - https://docs.astral.sh/ruff/rules/#warning-w + "F", # pyflakes - https://docs.astral.sh/ruff/rules/#pyflakes-f + "I", # isort - https://docs.astral.sh/ruff/rules/#isort-i + "N", # pep8-naming - https://docs.astral.sh/ruff/rules/#pep8-naming-n + "UP", # pyupgrade - https://docs.astral.sh/ruff/rules/#pyupgrade-up + "B", # flake8-bugbear - https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "C4", # flake8-comprehensions - https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "DJ", # flake8-django - https://docs.astral.sh/ruff/rules/#flake8-django-dj +] + +ignore = [ + # F403: Allow wildcard imports in __init__.py files + "F403", + # E501: Allow long lines in docstrings + "E501", +] diff --git a/.github/configurations/python_linters/requirements-linters.txt b/.github/configurations/python_linters/requirements-linters.txt index 8b8a8a20..8110ca60 100644 --- a/.github/configurations/python_linters/requirements-linters.txt +++ b/.github/configurations/python_linters/requirements-linters.txt @@ -1,6 +1,13 @@ -black==24.8.0 +autoflake~=2.3.1 +bandit~=1.8.3 +black~=25.1.0 # use fork since main repo is not updated # see https://github.com/rocioar/flake8-django/pull/134 +# Note: python 3.12 is not supported flake8-django @ git+https://github.com/terencehonles/flake8-django.git@a6e369e89d275dfd5514f2aa9d091aa36c5ff84b -flake8==7.1.1 -isort==5.13.2 \ No newline at end of file +flake8~=7.1.2 +isort~=6.0.1 +pylint-django~=2.6.1 +pylint~=3.3.5 +ruff~=0.12.7 + diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c075a634..3b93c450 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -20,7 +20,7 @@ Please delete options that are not relevant. - [ ] I have read and understood the rules about [how to Contribute](https://intelowlproject.github.io/docs/GreedyBear/Contribute/) to this project. - [ ] The pull request is for the branch `develop`. - [ ] I have added documentation of the new features. -- [ ] Linters (`Black`, `Flake`, `Isort`) gave 0 errors. If you have correctly installed [pre-commit](https://intelowlproject.github.io/docs/GreedyBear/Contribute/#how-to-start-setup-project-and-development-instance), it does these checks and adjustments on your behalf. +- [ ] Linter (`Ruff`) gave 0 errors. If you have correctly installed [pre-commit](https://intelowlproject.github.io/docs/GreedyBear/Contribute/#how-to-start-setup-project-and-development-instance), it does these checks and adjustments on your behalf. - [ ] I have added tests for the feature/bug I solved. All the tests (new and old ones) gave 0 errors. - [ ] If changes were made to an existing model/serializer/view, the docs were updated and regenerated (check [CONTRIBUTE.md](https://github.com/intelowlproject/docs/blob/main/docs/GreedyBear/Contribute.md)). - [ ] If the GUI has been modified: diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 00000000..258d5a1d --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,217 @@ +# Worflows + +## [Reusable detect changes workflow](_detect_changes.yml) + +This sub workflow detects and enumerates the changes between two branches. + +It is composed of five steps: + +1. **Check out PR target branch** - This step checks out the latest commit of the PR target branch for the current repository. This workflow was designed to detect changes when a PR to a target branch was created. Therefore, the latest commit of the target branch must be checked out as the first step. To achieve this, GitHub's [**checkout**](https://github.com/actions/checkout) action is used with the following parameters: + 1. **ref** - The branch, tag or SHA to checkout - It is set to `github.base_ref`, which corresponds to the **PR target branch**. +2. **Check out source branch latest commit** - This step checks out the latest commit of the source branch on top of the previous one. To do so, GitHub's [**checkout**](https://github.com/actions/checkout) action is used with the following parameters: + 1. **clean** - Whether to execute `git clean -ffdx && git reset --hard HEAD` before fetching - It is set to false, which means **do not delete untracked files**. +3. **Generate summary** - This step creates the title for the action summary. As a matter of fact, the detected changes will be reported below the title in the summary section. The step is performed only if one or both *backend_directories* and *frontend_directories* inputs are not empty. +4. **Generate diffs for backend** - This step detects and enumerates the files that changed between the two branches. This is performed using [`git diff`](https://git-scm.com/docs/git-diff) command. Specifically, the code instructs git to show the changes in the *backend_directories* relative to `origin/` (the target branch). During this process, the [**pathspec**](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec) is used to exclude files or directories specified in the *backend_exclusions* input. The changes are then enumerated and output through the *backend* variable. +5. **Generate diffs for frontend** - This step follow the same pattern as the **Generate diffs for backend** step but for the frontend directories. + +### Documentation + +#### Inputs + +* **backend_directories** - Optional - Space separated list of backend directories to check for changes. By default, it is set to an empty string. +* **backend_exclusions** - Optional - Space separated list of backend files or directories to **exclude** when checking for changes. Globs are supported. By default, it is set to an empty string. +* **frontend_directories** - Optional - Space separated list of frontend directories to check for changes. By default, it is set to an empty string +* **frontend_exclusions** - Optional - Space separated list of frontend files or directories to **exclude** when checking for changes. Globs are supported. By default, it is set to an empty string. +* **ubuntu_version** - Optional - The Ubuntu version to run the workflow against. By default, it is set to `latest`. + +#### Outputs + +* **backend** - The number of backend files that have changed. +* **frontend** - The number of frontend files that have changed. + +## [Reusable node tests workflow](_node.yml) + +This sub workflow install node dependencies and run frontend linters and tests. + +It is composed of nine steps: + +1. **Check out latest commit for current branch** - This step checks out the latest commit for the current branch of the repository. To do so, it uses GitHub's [**checkout**](https://github.com/actions/checkout) action with no parameters. +2. **Set up Node.js** - This step sets Node.js up downloading binaries and project's dependencies. This is done using the GitHub's [**setup-node**](https://github.com/actions/setup-node) action which also allows to cache and restore the project dependencies. It's used with the following parameters: + 1. **node-version** - Node.js version to use - It is set according to *node_version* input variable. + 2. **cache** - Which package manager used to install and cache packages - It is set to `npm`. + 3. **cache-dependency-path** - Path to the dependency file: `package-lock.json`, `yarn.lock` etc. It is set to `/package-lock.json`, where *working_directory* is the input variable. +3. **Add dependencies** - This step adds additional dependencies to the `package-lock.json` file. Specifically, these packages are added to the **devDependencies** part of the aforementioned file. Which packages will be added is chosen accordingly to input variables: + 1. *use_jest* + 2. *use_react* + 3. *use_eslint* + 4. *use_prettier* + 5. *use_stylelint* +4. **Install packages** - This step install all missing packages from the dependency file in the directory specified by the *working_directory* input variable. +5. **Run linters** - This step uses [**node_linter**](../actions/node_linter/action.yml) action to run linters against the frontend source code. +6. **Check packages licenses** - This step uses [**pilosus/action-pip-license-checker**](https://github.com/pilosus/action-pip-license-checker) to check the licenses used by the project requirements. +7. **Run CodeQL** - This step uses [**codeql**](../actions/codeql/action.yml) action to run CodeQL to discover vulnerabilities across the codebase. +8. **Run custom command** - This step is performed only if the input variable *custom_command* is not empty. The step simply run the bash command described in the previously mentioned input variable in the working directory specified by the *working_directory* input variable. +9. **Run jest tests** - This step runs Jest tests if the input variable *use_jest* is set to true. Finally, if *use_coverage* and *upload_coverage* are set to true, a coverage report is generated and uploaded. + +### Documentation + +#### Inputs + +* **node_versions** - Required - An array of Node.js versions to use. +* **working_directory** - Required - Path to the `package.json` file +* **check_packages_licenses** - Optional - Whether to check npm packages licenses or not. By default it is set to true. +* **use_jest** - Optional - Whether to use Jest test suite or not. By default it is set to false. +* **use_react** - Optional - Whether react is used by the project or not. By default it is set to false. +* **use_eslint** - Optional - Whether to use ESlint linter or not. By default it is set to true +* **use_prettier** - Optional - Whether to use Prettier formatter or not. By default it is set to true. +* **use_stylelint** - Optional - Whether to use Stylelint linter or not. By default it is set to true. +* **use_coverage** - Optional - Whether to use Coverage or not. To work, it also require *use_jest* to be true. By default it is set to false. +* **upload_coverage** - Optional - Whether to upload coverage report to GitHub. By default it is set to false +* **run_codeql** - Optional - Whether to run CodeQL against the codebase. By default it is set to false. +* **custom_command** - Optional - A custom bash command to be run by the workflow. By default it is set to an empty string. +* **max_timeout** - Optional - A maximum amount of minutes allowed for the workflow to run. By default it is set to 30. +* **ubuntu_version** - Optional - The Ubuntu version to run the workflow against. By default it is set to `latest`. + +## [Reusable python linter workflow](_python.yml) + +This sub workflow runs Python linters and tests against the codebase. + +It is composed of one job: + +1. **python** - This job is composed of thirty-one steps: + 1. **Check out latest commit** - Checks out the latest commit on the current branch of the repository using the GitHub's [**checkout**](https://github.com/actions/checkout) action. + 2. **Set up Python** - Sets up Python on the runner machine using GitHub's [**setup-python**](https://github.com/actions/setup-python) action with the following parameter: + 1. **python-version** - Which Python version to use - It is set according to the *python_versions* input variable. + 3. **Inject stuff to environment** - This step adds a few environment variables to the system's environment. Specifically: + 1. If *django_settings_module* is set, **PYTHONPATH** and **DJANGO_SETTINGS_MODULE** will be added to the runner's environment. + 2. If *run_codeql* is true, **CODEQL_PYTHON** will be added to the runner's environment. + 4. **Restore APT cache related to PR event** - This step will try to restore the APT cache related to the PR event using [**restore_apt_cache**](../actions/apt_requirements/restore_apt_cache/README.md) with the following parameter: + 1. **apt_requirements_file_path** - Path to the APT requirements file - It is set to the *packages_path* input variable. + 5. **Restore APT cache related to target branch** - This step will try to restore the APT cache related related to the target branch (of the PR) using [**restore_apt_cache**](../actions/apt_requirements/restore_apt_cache/README.md) only if **Restore APT cache related to PR event** produces a cache miss. It is run with the following parameter: + 1. **apt_requirements_file_path** - Path to the APT requirements file - It is set to the *packages_path* input variable. + 2. **git_reference** - A git reference (name of the branch, reference to the PR) that will be used to build the cache key - It is set to the target branch. + 6. **Restore APT repositories** - If both PR event and target branch APT cache restore attempt resulted in a cache miss, the APT repositories list is refreshed using `sudo apt-get update`. + 7. **Install APT requirements** - This step installs APT requirements listed in the *packages_path* requirements file. **Since they are not required, recommended packages are not downloaded**. + 8. **Save APT cache related to PR event** - When the attempt to restore the APT cache related to the PR event results in a cache miss, the newly populated APT cache is saved to GitHub. This is performed using [**save_apt_cache**](../actions/apt_requirements/save_apt_cache/README.md) action with the following parameter: + 1. **apt_requirements_file_path** - Path to the APT requirements file - It is se to the *packages_path* input variable. + 9. **Create linter requirements file** - This step creates the linter requirements file using the [**create_linter_requirements_file**](../actions/python_requirements/create_linter_requirements_file/README.md) action. + 10. **Create dev requirements file** - This step creates the development requirements file using the [**create_dev_requirements_file**](../actions/python_requirements/create_dev_requirements_file/README.md) action. + 11. **Create docs requirement file** - This step creates the documentation requirements file using the [**create_docs_requirements_file**](../actions/python_requirements/create_docs_requirements_file/README.md) action. + 12. **Restore Python virtual environment related to PR event** - This step attempts to restore the Python virtual environment for the PR using the [**restore_python_virtualenv**](../actions/python_requirements/restore_virtualenv/README.md) action. + 13. **Restore Python virtual environment related to target branch** - If the attempt to restore the Python virtual environment for the PR, result in a cache miss, an attempt to restore the Python virtual environment for the target branch is made using the [**restore_python_virtualenv**](../actions/python_requirements/restore_virtualenv/README.md) action. + 14. **Create Python virtual environment** - If both attempts to restore the Python virtual environment for the PR, for the target branch, result in a cache miss, a Python virtual environment is created using the [**create_virtualenv**](../actions/python_requirements/create_virtualenv/README.md) action. + 15. **Restore pip cache related to PR event** - If both attempts to restore the Python virtual environment for the PR, for the target branch, result in a cache miss, an attempt to restore the pip cache for the PR event is made using the [**restore_pip_cache**](../actions/python_requirements/restore_pip_cache/README.md) action. + 16. **Restore pip cache related to target branch** - If both attempts to restore the Python virtual environment for the PR, for the target branch, as well as the pip cache for the PR, result in a cache miss, an attempt to restore the pip cache for the target branch is made using the [**restore_pip_cache**](../actions/python_requirements/restore_pip_cache/README.md) action. + 17. **Install project requirements** - If both attempts to restore the Python virtual environment for the PR event, and the target branch result in a cache miss, project requirements are installed from the working directory specified by the *install_from* input variable. + 18. **Install other requirements** - If the attempt to restore the Python virtual environment for the PR event result in a cache miss, developer, linters and documentation requirements are installed from the working directory specified by *working_directory* input variable. + 19. **Check requirements licenses** - If the input variable *check_requirements_licenses* is set to true and the attempt to restore the Python virtual environment related to the PR event result in a cache miss, this step performs the requirements licenses check using [**pilosus/action-pip-license-checker**](https://github.com/pilosus/action-pip-license-checker). + 20. **Print wrong licenses** - If the output of **Check requirements licenses** is `failure`, the list of licenses for which the check failed will be returned. + 21. **Save Python virtual environment related to PR event** - If the attempt to restore the Python virtual environment resulted in a cache miss, the Python virtual environment is saved for the PR event using the [*save_virtualenv*](../actions/python_requirements/save_virtualenv/README.md) action with the following parameter: + 1. **requirements_paths** - A space separated list of requirements file paths - It is set to the combination of *requirements_path*, `requirements-linters.txt`, `requirements-dev.txt` and `requirements-docs.txt` joined by spaces. + 22. **Save pip cache related to PR event** - If both attempts to restore the Python virtual environment and the pip cache related to the PR resulted in a cache miss, the pip cache is saved for the PR event using the [*save_pip_cache*](../actions/python_requirements/save_pip_cache/README.md) action. + 23. **Run linters** - If one of the following input variables: *use_black*, *use_isort*, *use_flake8*, *use_pylint*, *use_bandit* and *use_autoflake* is true, this step executes the linters against the codebase in the working directory specified by the *working_directory* variable. + 24. **Run CodeQL** - If the *run_codeql* input variable is true, this step runs CodeQL against the codebase using the [**codeql**](../actions/codeql/action.yml) action in the working directory specified by the *working_directory* variable. + 25. **Build Docs** - If the *check_docs_directory* input variable is set, this step executes `rstcheck` to ensure that the documentation in *check_docs_directory* is valid. Finally, the documentation is built using `sphinx`. + 26. **Start services** - If one or more of the following input variables: *use_postgres*, *use_elastic_search*, *use_memcached*, *use_redis*, *use_rabbitmq* and *use_mongo* are true, this step creates the Docker container for the service using the [**services**](../actions/services/action.yml) action. Additional parameters, such as *postgres_db* or *elasticsearch_version* can also be provided to the aforementioned action. + 27. **Start celery worker** - If the *use_celery* input variable is true, a Celery worker is created for the *celery_app* application. The `celery` command is executed in the working directory specified by the *working_directory* input variable. + 28. **Run custom command** - If the *custom_command* input variable is not empty, the command defined by the variable is executed in the working directory specified by the *working_directory* input variable. + 29. **Check migrations** - If *check_migrations* is true and *django_settings_module* is not empty, this step will perform a dry run of `django-admin makemigrations` to ensure that the migrations are valid. + 30. **Run unittest** - This step runs Python tests against the codebase in the directory described by the *working_directory* input variable. Additionally, according to *tags_for_manual_tests* and *tags_for_slow_tests* variables, some tests will be excluded from the run. + 31. **Create coverage output** - If *use_coverage* and *upload_coverage* are set to true, this step produces a coverage report of the codebase and uploads it to GitHub. The *working_directory* input variable is used to determines the directory in which coverage should be run. + +### Documentation + +#### Inputs + +* **python_versions** - Required - Python versions used by this workflow in the form of a JSON array. +* **ubuntu_version** - Optional - Ubuntu version to run workflow against. By default, it is set to `latest`. +* **working_directory** - Required - Directory in which to run linters. +* **requirements_path** - Required - Path to the requirements file of the Python project. +* **install_from** - Optional - Directory where all installation commands will be run. By default, it is set to `.`. +* **packages_path** - Optional - Path to the APT requirements file of the Python project. By default, it is set to an empty string. +* **env** - Optional - A JSON object containing a set of environment variables to be added to the system's environment. By default, it is set to an empty JSON object `{}`. +* **max_timeout** - Optional - Maximum amount of time (in minutes) the workflow is allowed to run. By default, it is set to `30`. +* **use_black** - Optional - Whether to use black formatter. By default, it is set to `false`. +* **use_isort** - Optional - Whether to use isort formatter. By default, it is set to `false`. +* **use_ruff_formatter** - Optional - Whether to use ruff formatter. By default, it is set to `false`. +* **use_autoflake** - Optional - Whether to use autoflake linter. By default, it is set to `false`. +* **use_bandit** - Optional - Whether to use bandit linter. By default, it is set to `false`. +* **use_flake8** - Optional - Whether to use flake8 linter. By default, it is set to `false`. +* **use_pylint** - Optional - Whether to use pylint linter. By default, it is set to `false`. +* **use_ruff_linter** - Optional - Whether to use ruff linter. By default, it is set to `false`. +* **use_coverage** - Optional - Whether to use coverage. By default, it is set to `false`. +* **coverage_config_path** - Optional - Path to the coverage configuration file. By default, it is set to `.coveragerc`. +* **upload_coverage** - Optional - Whether to upload coverage report to GitHub. To work, it needs *use_coverage* to be true. By default, it is set to `false`. +* **run_codeql** - Optional - Whether to run CodeQL against codebase. By default, it is set to `false`. +* **use_celery** - Optional - Whether to create a Celery container. By default, it is set to `false`. +* **use_elastic_search** - Optional - Whether to create an Elasticsearch container. By default, it is set to `false`. +* **use_memcached** - Optional - Whether to create a Memcached container. By default, it is set to `false`. +* **use_mongo** - Optional - Whether to create a MongoDB container. By default, it is set to `false`. +* **use_postgres** - Optional - Whether to create a PostgresDB container. By default, it is set to `false`. +* **use_rabbitmq** - Optional - Whether to create a RabbitMQ container. By default, it is set to `false`. +* **use_redis** - Optional - Whether to create a Redis container. By default, it is set to `false`. +* **celery_app** - Optional - A Celery application name. Requires *use_celery* to be true. By default, it is set to an empty string. +* **celery_queues** - Optional - A comma separated list of Celery queues. Requires *use_celery* to be true. By default, it is set to `default`. +* **elasticsearch_version** - Optional - Elasticsearch's container version. By default, it is set to `latest`. +* **elasticsearch_port** - Optional - Elasticsearch's container exposed port. By default, it is set to `9200`. +* **memcached_version** - Optional - Mecached's container version. By default, it is set to `latest`. +* **mongo_version** - Optional - MongoDB's container version. By default, it is set to `latest`. +* **postgres_db** - Optional - PostgresDB database name. Requires *use_postgres* to be true. By default, it is set to `db`. +* **postgres_user** - Optional - PostgresDB user name. Requires *use_postgres* to be true. By default, it is set to `user`. +* **postgres_password** - Optional - PostgresDB password. Requires *use_postgres* to be true. By default, it is set to `password`. +* **postgres_version** - Optional - PostgresDB's container version. Requires *use_postgres* to be true. By default, it is set to `latest`. +* **rabbitmq_version** - Optional - RabbitMQ's container version. Requires *use_rabbitmq* to be true. By default, it is set to `latest`. +* **redis_version** - Optional - Redis' container version. Requires *use_redis* to be true. By default, it is set to `latest`. +* **django_settings_module** - Optional - Path to the Django settings file. By default, it is set to an empty string. +* **check_migrations** - Optional - Whether to check that the project's migrations are valid. Requires *django_settings_module* to be set. By default, it is set to `false`. +* **check_requirements_licenses** - Optional - Whether to check that the requirements license is valid. Requires *django_settings_module* to be set. By default, it is set to `true`. +* **ignore_requirements_licenses_regex** - Optional - A regex that describes which directories should be ignored when checking the validity of requirements licenses. By default, it is set to `uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.*`. +* **tags_for_slow_tests** - Optional - A space separated list of tags for tests that will only be run on the master/main branch. **Works only for Django projects**. By default, it is set to an `slow`. +* **tags_for_manual_tests** - Optional - A space separated list of tags for tests that will only be run **manually** (CI will ignore them). **Works only for Django projects**. By default, it is set to `manual`. +* **custom_command** - Optional - A custom bash command to run. By default, it is set to an empty string. +* **check_docs_directory** - Optional - Path to the documentation directory in which `rstcheck` will be run to check documentation files. By default, it is set to an empty string. +* **check_dockerfile** - Optional - Path to a Dockerfile to be checked. **Warning: if set it may significantly increase the action time**. By default, it is set to an empty string. + +## [Create APT cache](create_apt_cache.yaml) + +This workflow is run in the event of **a push on branches *main*, *master*, *develop*, *dev***. Specifically, it is triggered only when the APT requirements file is updated. + +The workflow is composed of a single job: + +1. **Create cache for APT dependencies** - This job, as described by its name, creates a cache for APT dependencies and stores it on GitHub. It is composed of four steps: + 1. **Check out latest commit on current branch** - This step checks out the latest commit on the current branch of the repository. + 2. **Install APT dependencies** - This step refreshes APT repositories and then install the project dependecies. This action is required to produce the APT cache that will be saved later. + 3. **Save APT cache** - This step saves APT cache on GitHub. The GitHub's [**save_apt_cache**](../actions/apt_requirements/save_apt_cache/README.md) action is used. + +## [Create Python cache](create_python_cache.yaml) + +This workflow is run in the event of **a push on branches *main*, *master*, *develop*, *dev***. Specifically, it is triggered only when the Python requirements file is updated. + +The workflow is composed of a single job: + +1. **Create cache for Python dependencies** - This job, as described by its name, creates a cache for Python dependencies and stores it on GitHub. It is composed of four steps: + 1. **Check out latest commit** - This step checks out the latest commit on the current branch for the repository. + 2. **Install system dependencies required by Python Packages** - **OPTIONAL** - Sometimes, Python packages require one or more system dependencies. For instance, `python-ldap` Python package requires `libldap2-dev` and `libsasl2-dev`, System dependencies, for a successful installation. This step allows user to install system dependencies required by Python packages. + 3. **Set up Python** - This step install Python on the runner. + 4. **Set up Python virtual environment** - This step uses [**create_virtualenv**](../actions/python_requirements/create_virtualenv/README.md) action to create a Python virtual environment. + 5. **Install Python dependencies** - This step install Python requirements to produce the final virtual environment that will be cached. Also, installing the Python dependencies, creates the pip cache. + 6. **Save pip cache** - This step uses [**save_pip_cache**](../actions/python_requirements/save_pip_cache/README.md) action to save pip's download cache on GitHub. + 7. **Create virtual environment cache** - This step uses [**save_virtualenv**](../actions/python_requirements/save_virtualenv/README.md) action to save virtual environment on GitHub's cache. + +## [CI](pull_request_automation.yml) + +This workflow runs in the case of a **pull request on branches *master*, *main*, *develop*, *dev*** and it's the core CI workflow. + +It is composed of three jobs: + +1. **detect-changes** - This job detects and enumerates changes to backend and/or frontend files. To do so, it uses the [**_detect_changes**](_detect_changes.yml) workflow. +2. **node** - If any changes to the frontend files are found, [**_node**](_node.yml) workflow is run. +3. **python** - If any changes to the backend files are found, [**_python**](_python.yml) workflow is run. + +## [Release and publish](release.yml) + +TODO + +## [Reusable release and tag workflow](_release_and_tag.yml) + +TODO diff --git a/.github/workflows/_detect_changes.yml b/.github/workflows/_detect_changes.yml index efaffa05..9e9b8a65 100644 --- a/.github/workflows/_detect_changes.yml +++ b/.github/workflows/_detect_changes.yml @@ -3,16 +3,24 @@ on: workflow_call: inputs: backend_directories: - description: Backend directories separated by spaces + description: Space separated list of backend directories + required: false + type: string + + backend_exclusions: + description: Space separated list of Backend directories or files to be excluded required: false type: string - default: '' frontend_directories: - description: Frontend directories separated by spaces + description: Space separated list of frontend directories + required: false + type: string + + frontend_exclusions: + description: Space separated list of frontend directories or files to be excluded required: false type: string - default: '' ubuntu_version: description: Ubuntu version to use @@ -37,13 +45,16 @@ jobs: backend: ${{steps.diff_check_backend.outputs.backend}} frontend: ${{steps.diff_check_frontend.outputs.frontend}} steps: - - uses: actions/checkout@v4 + - name: Check out PR target branch + uses: actions/checkout@v4 with: ref: ${{ github.base_ref }} - - uses: actions/checkout@v4 + - name: Check out source branch latest commit + uses: actions/checkout@v4 with: clean: false + - name: Generate summary if: ${{inputs.backend_directories != ''}} | ${{inputs.frontend_directories != ''}} run: | @@ -54,18 +65,34 @@ jobs: if: ${{inputs.backend_directories != ''}} id: diff_check_backend run: | - BACKEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} | head -n -1 | wc -l) + BACKEND_EXCLUSIONS="" + if ${{ inputs.backend_exclusions != ''}}; then + for exclusion in ${{ inputs.backend_exclusions }}; do + BACKEND_EXCLUSIONS+=":(glob,exclude)$exclusion " + done + fi + # No need to add other quotes since they will already be added. + BACKEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS | head -n -1 | wc -l) echo "backend=$BACKEND_CHANGES" >> $GITHUB_OUTPUT echo "Backend Changes: $BACKEND_CHANGES" >> $GITHUB_STEP_SUMMARY + echo "::debug::diff command:git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS" + echo "::debug::diff command results: $(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $BACKEND_EXCLUSIONS | head -n -1 )" echo "backend $BACKEND_CHANGES" - - name: Generate diffs for frontend if: ${{inputs.frontend_directories != ''}} id: diff_check_frontend run: | - FRONTEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.frontend_directories }} | head -n -1 | wc -l) + FRONTEND_EXCLUSIONS="" + if ${{ inputs.frontend_exclusions != ''}}; then + for exclusion in ${{ inputs.frontend_exclusions }}; do + FRONTEND_EXCLUSIONS+=":(glob,exclude)$exclusion " + done + fi + FRONTEND_CHANGES=$(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.frontend_directories }} $FRONTEND_EXCLUSIONS | head -n -1 | wc -l) echo "frontend=$FRONTEND_CHANGES" >> $GITHUB_OUTPUT echo "Frontend Changes: $FRONTEND_CHANGES" >> $GITHUB_STEP_SUMMARY + echo "::debug::diff command:git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $FRONTEND_EXCLUSIONS" + echo "::debug::diff command results: $(git diff --compact-summary origin/${{ github.base_ref }} -- ${{ inputs.backend_directories }} $FRONTEND_EXCLUSIONS | head -n -1 )" echo "frontend $FRONTEND_CHANGES" diff --git a/.github/workflows/_node.yml b/.github/workflows/_node.yml index 97d8980d..a164c844 100644 --- a/.github/workflows/_node.yml +++ b/.github/workflows/_node.yml @@ -82,13 +82,18 @@ jobs: node: name: Run node.js tests runs-on: ubuntu-${{ inputs.ubuntu_version }} + permissions: + actions: read + contents: read + security-events: write timeout-minutes: ${{ inputs.max_timeout }} strategy: matrix: node_version: ${{ fromJson(inputs.node_versions) }} language: ['javascript'] steps: - - uses: actions/checkout@v4 + - name: Check out latest commit for current branch + uses: actions/checkout@v4 - name: Set up Node.js uses: actions/setup-node@v4 @@ -102,7 +107,7 @@ jobs: if [[ '${{ inputs.use_jest }}' != 'false' ]]; then npm i -D --package-lock-only jest @testing-library/jest-dom babel-jest @babel/core @babel/preset-env if [[ '${{ inputs.use_react }}' != 'false' ]]; then - npm i -D --package-lock-only @testing-library/react @testing-library/jest-dom + npm i -D --package-lock-only @testing-library/react fi fi if [[ '${{ inputs.use_eslint }}' != 'false' ]]; then @@ -124,10 +129,10 @@ jobs: uses: ./.github/actions/node_linter with: working_directory: ${{ inputs.working_directory }} - use_eslint: ${{ inputs.use_eslint == true }} - use_prettier: ${{ inputs.use_prettier == true }} - use_stylelint: ${{ inputs.use_stylelint == true }} - check_packages_licenses: ${{ inputs.check_packages_licenses == true }} + use_eslint: ${{ inputs.use_eslint }} + use_prettier: ${{ inputs.use_prettier }} + use_stylelint: ${{ inputs.use_stylelint }} + check_packages_licenses: ${{ inputs.check_packages_licenses }} - name: Check packages licenses if: ${{ inputs.check_packages_licenses }} @@ -138,7 +143,7 @@ jobs: requirements: ${{ inputs.requirements_path }} external: ${{ inputs.working_directory }}/licenses.csv external-format: csv - table-headers: true + headers: true fail: 'StrongCopyleft,NetworkCopyleft,Error' fails-only: true diff --git a/.github/workflows/_python.yml b/.github/workflows/_python.yml index d24cd1b4..28fe71bb 100644 --- a/.github/workflows/_python.yml +++ b/.github/workflows/_python.yml @@ -2,168 +2,171 @@ name: Reusable python linter workflow on: workflow_call: inputs: + # Base configs python_versions: - description: Python versions to use + description: Python versions to use (in the form of a JSON array) type: string required: true + ubuntu_version: + description: Ubuntu version to use + type: string + default: latest + required: false working_directory: description: Directory that must be run against the linters type: string required: true - - use_autoflake: - description: Use autoflake linter - default: false - type: boolean + requirements_path: + description: Path to the requirements.txt file + type: string + required: true + project_dev_requirements_file: + description: Path to an additional project dev requirements file + type: string + required: false + install_from: + description: Directory that must be used to install the packages + type: string + required: false + default: . + packages_path: + description: Path to the packages.txt file (APT requirements) + type: string + required: false + env: + description: Environment variables to set + type: string required: false + default: >- + {} + max_timeout: + description: Max time that the CI can be run + type: number + required: false + default: 30 + + # Formatters use_black: description: Use black formatter - default: false type: boolean required: false use_isort: description: Use isort formatter - default: false type: boolean required: false - use_flake8: - description: Use flake8 linter - default: false + use_ruff_formatter: + description: Use ruff formatter type: boolean required: false - use_pylint: - description: Use pylint linter - default: false + + # Linters + use_autoflake: + description: Use autoflake linter type: boolean required: false use_bandit: description: Use bandit linter - default: false type: boolean required: false - - run_codeql: - description: Run codeql - default: false + use_flake8: + description: Use flake8 linter type: boolean required: false - - requirements_path: - description: Path to the requirements.txt file - type: string - required: true - - install_from: - description: Directory that must be used to install the packages - type: string + use_pylint: + description: Use pylint linter + type: boolean required: false - default: . - - packages_path: - description: Path to the packages.txt file - type: string + use_ruff_linter: + description: Use ruff linter + type: boolean required: false - custom_command: - description: String of custom command to run - type: string + # Coverage configs + use_coverage: + description: Use coverage.py. + type: boolean required: false - django_settings_module: - description: Path to the django settings file + coverage_config_path: + description: Path to the coverage.py config file type: string required: false - default: '' - - check_migrations: - description: Check if migrations are valid. Require django_settings_module to be set. - type: boolean - required: false - default: false - check_requirements_licenses: - description: Check if requirements have a valid license. Require django_settings_module to be set. + default: .coveragerc + upload_coverage: + description: Upload coverage.py report to github type: boolean required: false - default: true - ignore_requirements_licenses_regex: - description: Regex of repositories of which ignore license - type: string - required: false - default: uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.* - check_docs_directory: - description: Check docs using rstcheck inside this directory - type: string - required: false - default: '' - check_dockerfile: - description: Check dockerfile build. WARNING action total time may increase significantly - type: string + # CodeQL configs + run_codeql: + description: Run codeql + type: boolean required: false - default: '' - - use_postgres: - description: Use postgres service - default: false + + # Services + use_celery: + description: Create a celery worker type: boolean required: false use_elastic_search: description: Use elastic_search service - default: false type: boolean required: false use_memcached: description: Use memcached service - default: false type: boolean required: false - use_redis: - description: Use redis service - default: false + use_mongo: + description: Use mongo service type: boolean required: false - use_rabbitmq: - description: Use rabbitmq service - default: false + use_postgres: + description: Use postgres service type: boolean required: false - use_mongo: - description: Use mongo service - default: false + use_rabbitmq: + description: Use rabbitmq service type: boolean required: false - use_celery: - description: Create a celery worker - default: false + use_redis: + description: Use redis service type: boolean required: false - use_coverage: - description: Use coverage.py. - default: false - type: boolean + # Services configs + ## Celery service configs + celery_app: + description: Celery app name. Requires use_celery to be true + type: string required: false - coverage_config_path: - description: Path to the coverage.py config file + celery_queues: + description: Celery queues separated by ,. Requires use_celery to be true type: string required: false - default: .coveragerc - upload_coverage: - description: Upload coverage.py report to github - default: false - type: boolean + default: default + ## Elasticsearch service configs + elasticsearch_version: + description: Elasticsearch container version + type: string required: false - - tags_for_slow_tests: - description: Tags for tests that will be run only on master/main branch, space separated. Can be used only for django projects. - default: slow + default: latest + elasticsearch_port: + description: Elasticsearch container port type: string required: false - tags_for_manual_tests: - description: Tags for tests that will not be run on the CI, space separated. Can be used only for django projects. - default: manual + default: 9200 + ## Memcached service configs + memcached_version: + description: Memcached alpine container version type: string required: false - + default: latest + ## Mongo service configs + mongo_version: + description: Mongo container version + type: string + required: false + default: latest + ## Postgres service configs postgres_db: description: Postgres service db. Requires use_postgres to be true type: string @@ -184,66 +187,63 @@ on: type: string required: false default: latest - - mongo_version: - description: Mongo container version + ## RabbitMQ service configs + rabbitmq_version: + description: RabbitMQ management-alpine container version type: string required: false default: latest - elasticsearch_version: - description: Elasticsearch container version + ## Redis service configs + redis_version: + description: Redis alpine container version type: string required: false - default: 8.11.1 - elasticsearch_port: - description: Elasticsearch container port + default: latest + + + # Django configs + django_settings_module: + description: Path to the django settings file type: string required: false - default: 9200 - memcached_version: - description: Memcached alpine container version - type: string + check_migrations: + description: Check if migrations are valid. Require django_settings_module to be set. + type: boolean required: false - default: latest - redis_version: - description: Redis alpine container version - type: string + check_requirements_licenses: + description: Check if requirements have a valid license. Require django_settings_module to be set. + type: boolean required: false - default: latest - rabbitmq_version: - description: RabbitMQ management-alpine container version + default: true + ignore_requirements_licenses_regex: + description: Regex of repositories of which ignore license type: string required: false - default: 3 - - celery_app: - description: Celery app name. Requires use_celery to be true + default: uWSGI.*|lunardate.*|.*QuokkaClient.*|pyquokka.* + tags_for_slow_tests: + description: Tags for tests that will be run only on master/main branch, space separated. Can be used only for django projects. + default: slow type: string required: false - - celery_queues: - description: Celery queues separated by ,. Requires use_celery to be true + tags_for_manual_tests: + description: Tags for tests that will not be run on the CI, space separated. Can be used only for django projects. + default: manual type: string required: false - default: default - env: - description: Environment variables to set + # Misc configs + custom_command: + description: String of custom command to run type: string required: false - default: >- - {} - max_timeout: - description: Max time that the CI can be run - type: number + check_docs_directory: + description: Check docs using rstcheck inside this directory + type: string required: false - default: 30 - - ubuntu_version: - description: Ubuntu version to use + check_dockerfile: + description: Check dockerfile build. WARNING action total time may increase significantly type: string - default: latest required: false jobs: @@ -257,10 +257,11 @@ jobs: language: ['python'] env: ${{ fromJson(inputs.env) }} steps: - - uses: actions/checkout@v4 + - name: Check out latest commit + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} @@ -276,21 +277,133 @@ jobs: shell: bash - - name: Install apt requirements - if: inputs.packages_path - uses: ./.github/actions/apt_requirements + - name: Restore APT cache related to PR event + id: restore_apt_cache_pr + uses: ./.github/actions/apt_requirements/restore_apt_cache + with: + apt_requirements_file_path: ${{ inputs.packages_path }} + + - name: Restore APT cache related to target branch + id: restore_apt_cache_target_branch + if: steps.restore_apt_cache_pr.outputs.cache-hit != 'true' + uses: ./.github/actions/apt_requirements/restore_apt_cache + with: + apt_requirements_file_path: ${{ inputs.packages_path }} + git_reference: ${{ github.base_ref }} + + - name: Refresh APT repositories + if: > + steps.restore_apt_cache_pr.outputs.cache-hit != 'true' && + steps.restore_apt_cache_target_branch.outputs.cache-hit != 'true' + run: | + sudo apt-get update + shell: bash + + - name: Install APT requirements + run: | + sudo apt-get install -y --no-install-recommends $(tr '\n' ' ' < ${{ inputs.packages_path }}) + shell: bash + + - name: Save APT cache related to PR event + if: > + steps.restore_apt_cache_pr.outputs.cache-hit != 'true' + uses: ./.github/actions/apt_requirements/save_apt_cache + with: + apt_requirements_file_path: ${{ inputs.packages_path }} + + - name: Create linter requirements file + uses: ./.github/actions/python_requirements/create_linter_requirements_file + with: + install_from: ${{ inputs.install_from }} + django_settings_module: ${{ inputs.django_settings_module }} + use_autoflake: ${{ inputs.use_autoflake }} + use_bandit: ${{ inputs.use_bandit }} + use_black: ${{ inputs.use_black }} + use_flake8: ${{ inputs.use_flake8 }} + use_isort: ${{ inputs.use_isort }} + use_pylint: ${{ inputs.use_pylint }} + use_ruff_formatter: ${{ inputs.use_ruff_formatter }} + use_ruff_linter: ${{ inputs.use_ruff_linter }} + + - name: Create dev requirements file + uses: ./.github/actions/python_requirements/create_dev_requirements_file + with: + install_from: ${{ inputs.install_from }} + project_dev_requirements_file: ${{ inputs.project_dev_requirements_file }} + + - name: Create docs requirements file + uses: ./.github/actions/python_requirements/create_docs_requirements_file + with: + install_from: ${{ inputs.install_from }} + check_docs_directory: ${{ inputs.check_docs_directory }} + django_settings_module: ${{ inputs.django_settings_module }} + + - name: Restore Python virtual environment related to PR event + id: restore_python_virtual_environment_pr + uses: ./.github/actions/python_requirements/restore_virtualenv/ with: - requirements_file: ${{ inputs.packages_path }} + requirements_paths: "${{ inputs.requirements_path }} requirements-linters.txt requirements-dev.txt requirements-docs.txt" + + - name: Restore Python virtual environment related to target branch + id: restore_python_virtual_environment_target_branch + if: steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' + uses: ./.github/actions/python_requirements/restore_virtualenv/ + with: + requirements_paths: ${{ inputs.requirements_path }} + git_reference: ${{ github.base_ref }} + + - name: Create Python virtual environment + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && + steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true' + uses: ./.github/actions/python_requirements/create_virtualenv + + - name: Restore pip cache related to PR event + id: restore_pip_cache_pr + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && + steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true' + uses: ./.github/actions/python_requirements/restore_pip_cache + + - name: Restore pip cache related to target branch + id: restore_pip_cache_target_branch + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && + steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true' && + steps.restore_pip_cache_pr.outputs.cache-hit != 'true' + uses: ./.github/actions/python_requirements/restore_pip_cache + with: + git_reference: ${{ github.base_ref }} + + - name: Install project requirements + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && + steps.restore_python_virtual_environment_target_branch.outputs.cache-hit != 'true' + run: pip install -r ${{ inputs.requirements_path }} + shell: bash + working-directory: ${{ inputs.install_from }} + + - name: Install other requirements + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' + run: | + pip install -r requirements-dev.txt + pip install -r requirements-linters.txt + pip install -r requirements-docs.txt + shell: bash + working-directory: ${{ inputs.install_from }} - name: Check requirements licenses - if: inputs.check_requirements_licenses && steps.cache-virtualenv.outputs.cache-hit != 'true' + if: > + inputs.check_requirements_licenses && + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' id: license_check_report continue-on-error: true uses: pilosus/action-pip-license-checker@v2 with: requirements: ${{ inputs.install_from }}/${{ inputs.requirements_path }} exclude: ${{ inputs.ignore_requirements_licenses_regex }} - table-headers: true + headers: true fail: 'StrongCopyleft,NetworkCopyleft,Error' fails-only: true @@ -304,102 +417,30 @@ jobs: exit 1 shell: bash - # not the best solution because i do not think that dependabot supports this - - name: Create requirements-linters.txt - run: | - echo > requirements-linters.txt - - if [[ '${{ inputs.use_black}}' != 'false' ]]; then - echo "black==23.11.0" >> requirements-linters.txt - fi - - if [[ '${{ inputs.use_isort}}' != 'false' ]]; then - echo "isort==5.12.0" >> requirements-linters.txt - fi - - if [[ '${{ inputs.use_flake8}}' != 'false' ]]; then - echo "flake8==6.1.0" >> requirements-linters.txt - if [[ -n '${{ inputs.django_settings_module }}' ]]; then - echo "flake8-django==1.4" >> requirements-linters.txt - fi - fi - - if [[ '${{ inputs.use_pylint}}' != 'false' ]]; then - echo "pylint==2.17.7" >> requirements-linters.txt - if [[ -n '${{ inputs.django_settings_module }}' ]]; then - echo "pylint-django==2.5.5" >> requirements-linters.txt - fi - fi - - if [[ '${{ inputs.use_bandit}}' != 'false' ]]; then - echo "bandit==1.7.5" >> requirements-linters.txt - fi - if [[ '${{ inputs.use_autoflake}}' != 'false' ]]; then - echo "autoflake==2.2.1" >> requirements-linters.txt - fi - cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-linter.txt/') >> requirements-linters.txt 2>/dev/null || exit 0 - shell: bash - working-directory: ${{ inputs.install_from }} - - - name: Create requirements-dev.txt - run: | - echo > requirements-dev.txt - if [[ '${{ inputs.use_coverage }}' != 'false' ]]; then - echo "coverage>=7.3.2" >> requirements-dev.txt - fi - cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-dev.txt/') >> requirements-dev.txt 2>/dev/null || exit 0 - shell: bash - working-directory: ${{ inputs.install_from }} - - - name: Create requirements-docs.txt - run: | - echo > requirements-docs.txt - if [[ -n '${{ inputs.check_docs_directory }}' ]]; then - echo "rstcheck[sphinx]" >> requirements-docs.txt - echo "sphinx==7.2.6" >> requirements-docs.txt - echo "sphinx_rtd_theme==1.3.0" >> requirements-docs.txt - echo "sphinxcontrib-spelling==8.0.0" >> requirements-docs.txt - if [[ -n '${{ inputs.django_settings_module }}' ]]; then - echo "sphinxcontrib-django2==1.9" >> requirements-docs.txt - fi - cat $(echo ${{ inputs.requirements_path }} | sed -e 's/.txt/-docs.txt/') >> requirements-docs.txt 2>/dev/null || exit 0 - fi - shell: bash - working-directory: ${{ inputs.install_from }} - - - name: Check virtualenv cache - uses: syphar/restore-virtualenv@v1 - id: cache-virtualenv + - name: Save Python virtual environment related to PR event + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' + uses: ./.github/actions/python_requirements/save_virtualenv with: - requirement_files: | - ${{ inputs.install_from }}/${{ inputs.requirements_path }} - ${{ inputs.install_from }}/requirements-dev.txt - ${{ inputs.install_from }}/requirements-linters.txt - ${{ inputs.install_from }}/requirements-docs.txt - - - name: Check pip cache - uses: syphar/restore-pip-download-cache@v1 - if: steps.cache-virtualenv.outputs.cache-hit != 'true' - with: - requirement_files: | - ${{ inputs.install_from }}/${{ inputs.requirements_path }} - ${{ inputs.install_from }}/requirements-dev.txt - ${{ inputs.install_from }}/requirements-linters.txt - ${{ inputs.install_from }}/requirements-docs.txt - - - name: Install requirements - if: steps.cache-virtualenv.outputs.cache-hit != 'true' - run: | - pip install -r ${{ inputs.requirements_path }} - pip install -r requirements-dev.txt - pip install -r requirements-linters.txt - pip install -r requirements-docs.txt - shell: bash - working-directory: ${{ inputs.install_from }} + requirements_paths: "${{ inputs.requirements_path }} requirements-linters.txt requirements-dev.txt requirements-docs.txt" + + - name: Save pip cache related to PR event + if: > + steps.restore_python_virtual_environment_pr.outputs.cache-hit != 'true' && + steps.restore_pip_cache_pr.outputs.cache-hit != 'true' + uses: ./.github/actions/python_requirements/save_pip_cache - name: Run linters uses: ./.github/actions/python_linter - if: inputs.use_black || inputs.use_isort || inputs.use_flake8 || inputs.use_pylint || inputs.use_bandit || inputs.use_autoflake + if: > + inputs.use_black || + inputs.use_isort || + inputs.use_flake8 || + inputs.use_pylint || + inputs.use_bandit || + inputs.use_autoflake || + inputs.use_ruff_formatter || + inputs.use_ruff_linter with: working_directory: ${{ inputs.working_directory }} use_black: ${{ inputs.use_black }} @@ -408,6 +449,8 @@ jobs: use_pylint: ${{ inputs.use_pylint }} use_bandit: ${{ inputs.use_bandit }} use_autoflake: ${{ inputs.use_autoflake }} + use_ruff_formatter: ${{ inputs.use_ruff_formatter }} + use_ruff_linter: ${{ inputs.use_ruff_linter }} - name: Run CodeQL if: inputs.run_codeql @@ -424,12 +467,6 @@ jobs: shell: bash working-directory: ${{ inputs.check_docs_directory }} - - name: Build DockerFile - if: inputs.check_dockerfile - run: | - docker build -f ${{ inputs.check_dockerfile }} . - working-directory: ${{ inputs.working_directory }} - - name: Start services uses: ./.github/actions/services if: inputs.use_postgres || inputs.use_elastic_search || inputs.use_memcached || inputs.use_redis || inputs.use_rabbitmq || inputs.use_mongo @@ -510,6 +547,7 @@ jobs: env: ${{ secrets }} shell: bash + - name: Create coverage output if: inputs.use_coverage && inputs.upload_coverage id: coverage-output @@ -518,3 +556,17 @@ jobs: echo "## Coverage.py report" >> $GITHUB_STEP_SUMMARY echo "$(coverage report -m --format=markdown)" >> $GITHUB_STEP_SUMMARY working-directory: ${{ inputs.working_directory }} + + - name: Generate coverage XML + if: inputs.use_coverage && inputs.upload_coverage + run: | + coverage xml + working-directory: ${{ inputs.working_directory }} + + - name: Upload coverage report as artifact + if: inputs.use_coverage && inputs.upload_coverage + uses: actions/upload-artifact@v4 + with: + name: coverage-report-py${{ matrix.python_version }} + path: ${{ inputs.working_directory }}/coverage.xml + retention-days: 30 diff --git a/.github/workflows/_release_and_tag.yml b/.github/workflows/_release_and_tag.yml index acc8181f..92bf029e 100644 --- a/.github/workflows/_release_and_tag.yml +++ b/.github/workflows/_release_and_tag.yml @@ -31,11 +31,40 @@ on: required: false default: #CyberSecurity + publish_on_ecr: + description: Publish on ecr + type: boolean + required: false + default: false + repository: + description: Repository name + type: string + required: false + default: ${{ github.event.repository.name }} + + dockerfiles: + description: Path for dockerfiles from working directory + type: string + required: false + working_directory: + description: Docker build context + type: string + required: false + default: . + aws_region: + description: Aws region + type: string + required: false + default: eu-central-1 + + jobs: release_and_tag: name: Create release and tag runs-on: ubuntu-latest - if: github.event.pull_request.merged == true && ( github.base_ref == 'master' || github.base_ref == 'main' ) + if: github.event.pull_request.merged == true + outputs: + match: ${{ steps.check-tag.outputs.match }} steps: - uses: actions/checkout@v4 with: @@ -43,6 +72,7 @@ jobs: - name: Check Tag id: check-tag + if: github.base_ref == 'master' || github.base_ref == 'main' run: | if [[ "${{ github.event.pull_request.title }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then echo "match=true" >> $GITHUB_OUTPUT @@ -51,7 +81,7 @@ jobs: - name: Create Tag and Release id: create-release if: steps.check-tag.outputs.match == 'true' - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: tag_name: ${{ github.event.pull_request.title }} name: Version ${{ github.event.pull_request.title }} @@ -67,7 +97,7 @@ jobs: with: fetch-depth: 0 # otherwise, you do not retrieve the tags - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 if: steps.check-tag.outputs.match == 'true' && (inputs.publish_on_pypi || inputs.publish_on_test_pypi) with: python-version: "3.x" @@ -115,4 +145,42 @@ jobs: api_key: ${{ secrets.TWITTER_API_KEY }} api_key_secret: ${{ secrets.TWITTER_API_KEY_SECRET }} access_token: ${{ secrets.TWITTER_ACCESS_TOKEN }} - access_token_secret: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} \ No newline at end of file + access_token_secret: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + + push_on_ecr: + runs-on: ubuntu-latest + needs: release_and_tag + if: github.event.pull_request.merged == true && inputs.publish_on_ecr == true + strategy: + matrix: + dockerfile: ${{ fromJson(inputs.dockerfiles) }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # otherwise, you do not retrieve the tags + - name: Push on ecr branch + uses: ./.github/actions/push_on_ecr + if: github.base_ref == 'master' || github.base_ref == 'main' || github.base_ref == 'develop' || github.base_ref == 'dev' + with: + repository: ${{ inputs.repository }} + aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} + aws_access_key: ${{ secrets.AWS_ACCESS_KEY}} + aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + dockerfile: ${{ matrix.dockerfile }} + image_tag: ${{ ( github.base_ref == 'main' || github.base_ref == 'master' ) && 'prod' || 'stag' }} + aws_region: ${{ inputs.aws_region }} + working_directory: ${{ inputs.working_directory }} + + - name: Push on ecr new release + if: needs.release_and_tag.outputs.match == 'true' && (github.base_ref == 'master' || github.base_ref == 'main' ) + uses: ./.github/actions/push_on_ecr + with: + repository: ${{ inputs.repository }} + aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} + aws_access_key: ${{ secrets.AWS_ACCESS_KEY}} + aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + dockerfile: ${{ inputs.dockerfile }} + image_tag: ${{ github.event.pull_request.title }} + aws_region: ${{ inputs.aws_region }} + working_directory: ${{ inputs.working_directory }} \ No newline at end of file diff --git a/.github/workflows/create_apt_cache.yaml b/.github/workflows/create_apt_cache.yaml new file mode 100644 index 00000000..9bd5fd73 --- /dev/null +++ b/.github/workflows/create_apt_cache.yaml @@ -0,0 +1,38 @@ +name: Create APT cache + +# GitHub will remove any cache entries that have not been accessed in over 7 days. + +on: + push: + branches: + - main + - master + - develop + - dev + paths: + # Path to APT requirements file + - '.github/test/python_test/packages.txt' + +# discard previous execution if you commit to a branch that is already running +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + create-cache: + name: Create cache for APT dependencies + runs-on: ubuntu-latest + steps: + - name: Check out latest commit on current branch + uses: actions/checkout@v4 + + # Remember to set the same APT requirements file path set before! + - name: Install APT dependencies + run: | + sudo apt-get update + sudo apt-get -y install --no-install-recommends $(tr '\n' ' ' < .github/test/python_test/packages.txt) + + - name: Save APT cache + uses: ./.github/actions/apt_requirements/save_apt_cache + with: + apt_requirements_file_path: .github/test/python_test/packages.txt diff --git a/.github/workflows/create_python_cache.yaml b/.github/workflows/create_python_cache.yaml new file mode 100644 index 00000000..8db85f48 --- /dev/null +++ b/.github/workflows/create_python_cache.yaml @@ -0,0 +1,55 @@ +name: Create Python cache + +# GitHub will remove any cache entries that have not been accessed in over 7 days. + +# Only project dependencies will be cached here + +on: + push: + branches: + - main + - master + - develop + - dev + paths: + - '.github/test/python_test/requirements.txt' + +# discard previous execution if you commit to a branch that is already running +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + create-python-cache: + name: Create cache for Python dependencies + runs-on: ubuntu-latest + steps: + - name: Check out latest commit + uses: actions/checkout@v4 + + # Uncomment only if necessary + #- name: Install system dependencies required by Python packages + # run: | + # sudo apt-get update && sudo apt install ... + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Set up Python virtual environment + uses: ./.github/actions/python_requirements/create_virtualenv + + - name: Install Python dependencies + run: | + pip install -r .github/test/python_test/requirements.txt + working-directory: "." + + - name: Save pip cache + uses: ./.github/actions/python_requirements/save_pip_cache + + - name: Create virtual environment cache + uses: ./.github/actions/python_requirements/save_virtualenv + with: + requirements_paths: .github/test/python_test/requirements.txt + diff --git a/.github/workflows/pull_request_automation.yml b/.github/workflows/pull_request_automation.yml index 1ff60d09..96c45d92 100644 --- a/.github/workflows/pull_request_automation.yml +++ b/.github/workflows/pull_request_automation.yml @@ -15,10 +15,11 @@ jobs: detect-changes: uses: ./.github/workflows/_detect_changes.yml with: - backend_directories: api greedybear + backend_directories: api greedybear tests frontend_directories: frontend ubuntu_version: latest + frontend-tests: needs: detect-changes if: ${{ needs.detect-changes.outputs.frontend > 0 }} @@ -39,9 +40,9 @@ jobs: use_coverage: true upload_coverage: true max_timeout: 15 - ubuntu_version: 22.04 node_versions: >- ["20"] + ubuntu_version: latest backend-tests: @@ -52,14 +53,18 @@ jobs: with: working_directory: . - use_black: true - use_isort: true - use_flake8: true + use_black: false + use_isort: false + use_flake8: false use_pylint: false use_bandit: false use_autoflake: false + use_ruff_formatter: true + use_ruff_linter: true requirements_path: requirements/project-requirements.txt + project_dev_requirements_file: requirements/dev-requirements.txt + packages_path: packages.txt django_settings_module: greedybear.settings check_migrations: true @@ -74,6 +79,7 @@ jobs: use_memcached: false use_elastic_search: false use_rabbitmq: true + rabbitmq_version: "4" use_mongo: false use_celery: false @@ -81,6 +87,8 @@ jobs: upload_coverage: true tags_for_slow_tests: main + custom_command: python manage.py test --tag=migration --failfast + env: >- { "ENVIRONMENT": "ci", diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4987b889..7288a8c9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,3 +20,9 @@ jobs: publish_on_test_pypi: false publish_on_npm: false publish_on_twitter: false + publish_on_ecr: false + repository: certego-test + working_directory: .github/test/python_test + dockerfiles: >- + ["Dockerfile"] + aws_region: eu-central-1 diff --git a/.gitignore b/.gitignore index 46689f15..3d05fec9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,12 @@ __pycache__/ mlmodels/ # JetBrains IDEs (PyCharm, IntelliJ, etc.) .idea/ +# Ruff cache +.ruff_cache/ +# Coverage reports +htmlcov/ +.coverage +coverage.xml +*.cover +.coverage.* + diff --git a/EXTRACTION_PROCESS.md b/EXTRACTION_PROCESS.md new file mode 100644 index 00000000..c3eee43e --- /dev/null +++ b/EXTRACTION_PROCESS.md @@ -0,0 +1,40 @@ +# Extraction Process + +This file offers an overview of how GreedyBear extracts and processes T-Pot data. The ExtractionJob shown in the diagram runs every 10 minutes by default. + +```mermaid +sequenceDiagram + participant Job as ExtractionJob + participant Pipeline as ExtractionPipeline + participant Elastic as ElasticRepository + participant Factory as StrategyFactory + participant Strategy as ExtractionStrategy + participant Processor as IocProcessor + participant Repo as IocRepository + + Job->>Pipeline: execute() + Pipeline->>Elastic: search(minutes_back) + Elastic-->>Pipeline: hits[] + + loop Each honeypot + Pipeline->>Factory: get_strategy(honeypot) + Factory-->>Pipeline: strategy + Pipeline->>Strategy: extract_from_hits(hits) + Strategy->>Strategy: iocs_from_hits(hits) + + loop Each IOC + Strategy->>Processor: add_ioc(ioc) + Processor->>Repo: get_ioc_by_name(name) + alt IOC exists + Processor->>Processor: merge_iocs() + Processor->>Repo: save(ioc) + else New IOC + Processor->>Repo: save(ioc) + end + end + end + + Pipeline->>Pipeline: UpdateScores() +``` + +A single ExtractionPipeline instance orchestrates the extraction of all available honeypots. Is uses the ElasticRepository to receive a list of all honeypot hits from a certain time window. For each honeypot it gets the corresponding ExtractionStrategy, which contains all the extraction logic that is specific for a certain type of honeypot (e.g. Cowrie). The ExtractionStrategy uses this logic to create IOC objects and hands them to the IocProcessor, which is responsible for processing them so they can be written to the database via the IocRepository. \ No newline at end of file diff --git a/README.md b/README.md index aca875b9..b1452bf9 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,7 @@ [![Twitter Follow](https://img.shields.io/twitter/follow/intel_owl?style=social)](https://twitter.com/intel_owl) [![Linkedin](https://img.shields.io/badge/LinkedIn-0077B5?style=flat&logo=linkedin&logoColor=white)](https://www.linkedin.com/company/intelowl/) -[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![CodeQL](https://github.com/intelowlproject/GreedyBear/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/intelowlproject/GreedyBear/actions/workflows/codeql-analysis.yml) [![Dependency Review](https://github.com/intelowlproject/GreedyBear/actions/workflows/dependency_review.yml/badge.svg)](https://github.com/intelowlproject/GreedyBear/actions/workflows/dependency_review.yml) [![Pull request automation](https://github.com/intelowlproject/GreedyBear/actions/workflows/pull_request_automation.yml/badge.svg)](https://github.com/intelowlproject/GreedyBear/actions/workflows/pull_request_automation.yml) @@ -22,7 +21,7 @@ Documentation about GreedyBear installation, usage, configuration and contributi ## Public feeds -There are public feeds provided by [The Honeynet Project](https://www.honeynet.org) in this [site](https://greedybear.honeynet.org). [Example](https://greedybear.honeynet.org/api/feeds/log4j/all/recent.txt) +There are public feeds provided by [The Honeynet Project](https://www.honeynet.org) in this [site](https://greedybear.honeynet.org). [Example](https://greedybear.honeynet.org/api/feeds/cowrie/all/recent.txt) Please do not perform too many requests to extract feeds or you will be banned. @@ -65,6 +64,6 @@ In 2022 we joined the official [DigitalOcean Open Source Program](https://www.di This project was started as a personal Christmas project by [Matteo Lodi](https://twitter.com/matte_lodi) in 2021. Special thanks to: -* [Tim Leonhard](https://github.com/regulartim) for having greatly improved the project and added Machine Learning Models during his master thesis. +* [Tim Leonhard](https://github.com/regulartim) for having greatly improved the project and added Machine Learning Models during his master thesis. He's the actual Principal Mantainer. * [Martina Carella](https://github.com/carellamartina) for having created the GUI during her master thesis. * [Daniele Rosetti](https://github.com/drosetti) for helping maintaining the Frontend. diff --git a/api/enums.py b/api/enums.py deleted file mode 100644 index a4a536dc..00000000 --- a/api/enums.py +++ /dev/null @@ -1,6 +0,0 @@ -import enum - - -class Honeypots(enum.Enum): - LOG4J = "log4j" - COWRIE = "cowrie" diff --git a/api/serializers.py b/api/serializers.py index 917a0f44..ba2b0ce9 100644 --- a/api/serializers.py +++ b/api/serializers.py @@ -3,9 +3,10 @@ from functools import cache from django.core.exceptions import FieldDoesNotExist +from rest_framework import serializers + from greedybear.consts import REGEX_DOMAIN, REGEX_IP from greedybear.models import IOC, GeneralHoneypot -from rest_framework import serializers logger = logging.getLogger(__name__) @@ -38,7 +39,9 @@ def validate(self, data): Check a given observable against regex expression """ observable = data["query"] - if not re.match(REGEX_IP, observable) or not re.match(REGEX_DOMAIN, observable): + if re.match(r"^[\d\.]+$", observable) and not re.match(REGEX_IP, observable): + raise serializers.ValidationError("Observable is not a valid IP") + if not re.match(REGEX_IP, observable) and not re.match(REGEX_DOMAIN, observable): raise serializers.ValidationError("Observable is not a valid IP or domain") try: required_object = IOC.objects.get(name=observable) @@ -95,6 +98,7 @@ def ordering_validation(ordering: str) -> str: class FeedsRequestSerializer(serializers.Serializer): feed_type = serializers.CharField(max_length=120) attack_type = serializers.ChoiceField(choices=["scanner", "payload_request", "all"]) + ioc_type = serializers.ChoiceField(choices=["ip", "domain", "all"]) max_age = serializers.IntegerField(min_value=1) min_days_seen = serializers.IntegerField(min_value=1) include_reputation = serializers.ListField(child=serializers.CharField(max_length=120)) @@ -114,7 +118,56 @@ def validate_ordering(self, ordering): return ordering_validation(ordering) +class ASNFeedsOrderingSerializer(FeedsRequestSerializer): + ALLOWED_ORDERING_FIELDS = frozenset( + { + "asn", + "ioc_count", + "total_attack_count", + "total_interaction_count", + "total_login_attempts", + "expected_ioc_count", + "expected_interactions", + "first_seen", + "last_seen", + } + ) + + def validate_ordering(self, ordering): + field_name = ordering.lstrip("-").strip() + + if field_name not in self.ALLOWED_ORDERING_FIELDS: + raise serializers.ValidationError( + f"Invalid ordering field for ASN aggregated feed: '{field_name}'. Allowed fields: {', '.join(sorted(self.ALLOWED_ORDERING_FIELDS))}" + ) + + return ordering + + class FeedsResponseSerializer(serializers.Serializer): + """ + Serializer for feed response data structure. + + NOTE: This serializer is currently NOT used in production code (as of #629). + It has been kept in the codebase for the following reasons: + + 1. **Documentation**: Serves as a clear schema definition for the API response contract + 2. **Testing**: Validates the expected response structure through unit tests + 3. **Future-proofing**: Allows easy re-enabling of validation if security requirements change + 4. **Reference**: Useful for API consumers and developers to understand the response format + + Performance Optimization Context: + Previously, this serializer was instantiated and validated for each IOC in the response + (up to 5000 times per request), causing significant overhead (~1.8s for 5000 IOCs). + The optimization removed this per-item validation since the data is constructed internally + in api/views/utils.py::feeds_response() and guaranteed to match this schema. + + The response is now built directly without serializer validation, reducing response time + to ~0.03s (50-90x speedup) while maintaining the exact same API contract defined here. + + See: #629 for benchmarking details and discussion. + """ + feed_type = serializers.ListField(child=serializers.CharField(max_length=120)) value = serializers.CharField(max_length=256) scanner = serializers.BooleanField() @@ -124,6 +177,7 @@ class FeedsResponseSerializer(serializers.Serializer): attack_count = serializers.IntegerField(min_value=1) interaction_count = serializers.IntegerField(min_value=1) ip_reputation = serializers.CharField(allow_blank=True, max_length=32) + firehol_categories = serializers.ListField(child=serializers.CharField(max_length=64), allow_empty=True) asn = serializers.IntegerField(allow_null=True, min_value=1) destination_port_count = serializers.IntegerField(min_value=0) login_attempts = serializers.IntegerField(min_value=0) diff --git a/api/urls.py b/api/urls.py index ec341bcd..f426151e 100644 --- a/api/urls.py +++ b/api/urls.py @@ -1,5 +1,8 @@ # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear # See the file 'LICENSE' for copying permission. +from django.urls import include, path +from rest_framework import routers + from api.views import ( StatisticsViewSet, command_sequence_view, @@ -7,11 +10,10 @@ enrichment_view, feeds, feeds_advanced, + feeds_asn, feeds_pagination, general_honeypot_list, ) -from django.urls import include, path -from rest_framework import routers # Routers provide an easy way of automatically determining the URL conf. router = routers.DefaultRouter(trailing_slash=False) @@ -21,6 +23,7 @@ urlpatterns = [ path("feeds/", feeds_pagination), path("feeds/advanced/", feeds_advanced), + path("feeds/asn/", feeds_asn), path("feeds///.", feeds), path("enrichment", enrichment_view), path("cowrie_session", cowrie_session_view), diff --git a/api/views/command_sequence.py b/api/views/command_sequence.py index efbf550f..a5137241 100644 --- a/api/views/command_sequence.py +++ b/api/views/command_sequence.py @@ -2,16 +2,22 @@ # See the file 'LICENSE' for copying permission. import logging -from api.views.utils import is_ip_address, is_sha256hash from certego_saas.apps.auth.backend import CookieTokenAuthentication +from django.conf import settings from django.http import Http404, HttpResponseBadRequest -from greedybear.consts import FEEDS_LICENSE, GET -from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType from rest_framework import status -from rest_framework.decorators import api_view, authentication_classes, permission_classes +from rest_framework.decorators import ( + api_view, + authentication_classes, + permission_classes, +) from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response +from api.views.utils import is_ip_address, is_sha256hash +from greedybear.consts import GET +from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, ViewType + logger = logging.getLogger(__name__) @@ -42,7 +48,7 @@ def command_sequence_view(request): include_similar = request.query_params.get("include_similar") is not None logger.info(f"Command Sequence view requested by {request.user} for {observable}") source_ip = str(request.META["REMOTE_ADDR"]) - request_source = Statistics(source=source_ip, view=viewType.COMMAND_SEQUENCE_VIEW.value) + request_source = Statistics(source=source_ip, view=ViewType.COMMAND_SEQUENCE_VIEW.value) request_source.save() if not observable: @@ -50,7 +56,7 @@ def command_sequence_view(request): if is_ip_address(observable): sessions = CowrieSession.objects.filter(source__name=observable, start_time__isnull=False, commands__isnull=False) - sequences = set(s.commands for s in sessions) + sequences = {s.commands for s in sessions} seqs = [ { "time": s.start_time, @@ -61,15 +67,16 @@ def command_sequence_view(request): ] related_iocs = IOC.objects.filter(cowriesession__commands__in=sequences).distinct().only("name") if include_similar: - related_clusters = set(s.cluster for s in sequences if s.cluster is not None) + related_clusters = {s.cluster for s in sequences if s.cluster is not None} related_iocs = IOC.objects.filter(cowriesession__commands__cluster__in=related_clusters).distinct().only("name") if not seqs: raise Http404(f"No command sequences found for IP: {observable}") data = { - "license": FEEDS_LICENSE, "executed_commands": seqs, "executed_by": sorted([ioc.name for ioc in related_iocs]), } + if settings.FEEDS_LICENSE: + data["license"] = settings.FEEDS_LICENSE return Response(data, status=status.HTTP_200_OK) if is_sha256hash(observable): @@ -86,10 +93,11 @@ def command_sequence_view(request): for s in sessions ] data = { - "license": FEEDS_LICENSE, "commands": commands, "iocs": sorted(iocs, key=lambda d: d["time"], reverse=True), } + if settings.FEEDS_LICENSE: + data["license"] = settings.FEEDS_LICENSE return Response(data, status=status.HTTP_200_OK) except CommandSequence.DoesNotExist as exc: raise Http404(f"No command sequences found with hash: {observable}") from exc diff --git a/api/views/cowrie_session.py b/api/views/cowrie_session.py index 9ddb0b4c..ed7c9bf8 100644 --- a/api/views/cowrie_session.py +++ b/api/views/cowrie_session.py @@ -4,16 +4,22 @@ import logging import socket -from api.views.utils import is_ip_address, is_sha256hash from certego_saas.apps.auth.backend import CookieTokenAuthentication +from django.conf import settings from django.http import Http404, HttpResponseBadRequest -from greedybear.consts import FEEDS_LICENSE, GET -from greedybear.models import IOC, CommandSequence, CowrieSession, Statistics, viewType from rest_framework import status -from rest_framework.decorators import api_view, authentication_classes, permission_classes +from rest_framework.decorators import ( + api_view, + authentication_classes, + permission_classes, +) from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response +from api.views.utils import is_ip_address, is_sha256hash +from greedybear.consts import GET +from greedybear.models import CommandSequence, CowrieSession, Statistics, ViewType + logger = logging.getLogger(__name__) @@ -67,7 +73,7 @@ def cowrie_session_view(request): logger.info(f"Cowrie view requested by {request.user} for {observable}") source_ip = str(request.META["REMOTE_ADDR"]) - request_source = Statistics(source=source_ip, view=viewType.COWRIE_SESSION_VIEW.value) + request_source = Statistics(source=source_ip, view=ViewType.COWRIE_SESSION_VIEW.value) request_source.save() if not observable: @@ -88,19 +94,20 @@ def cowrie_session_view(request): return HttpResponseBadRequest("Query must be a valid IP address or SHA-256 hash") if include_similar: - commands = set(s.commands for s in sessions if s.commands) - clusters = set(cmd.cluster for cmd in commands if cmd.cluster is not None) + commands = {s.commands for s in sessions if s.commands} + clusters = {cmd.cluster for cmd in commands if cmd.cluster is not None} related_sessions = CowrieSession.objects.filter(commands__cluster__in=clusters).prefetch_related("source", "commands") sessions = sessions.union(related_sessions) response_data = { - "license": FEEDS_LICENSE, "query": observable, } + if settings.FEEDS_LICENSE: + response_data["license"] = settings.FEEDS_LICENSE - unique_commands = set(s.commands for s in sessions if s.commands) + unique_commands = {s.commands for s in sessions if s.commands} response_data["commands"] = sorted("\n".join(cmd.commands) for cmd in unique_commands) - response_data["sources"] = sorted(set(s.source.name for s in sessions), key=socket.inet_aton) + response_data["sources"] = sorted({s.source.name for s in sessions}, key=socket.inet_aton) if include_credentials: response_data["credentials"] = sorted(set(itertools.chain(*[s.credentials for s in sessions]))) if include_session_data: diff --git a/api/views/enrichment.py b/api/views/enrichment.py index 1c49e5d9..b0b4ee16 100644 --- a/api/views/enrichment.py +++ b/api/views/enrichment.py @@ -2,15 +2,20 @@ # See the file 'LICENSE' for copying permission. import logging -from api.serializers import EnrichmentSerializer from certego_saas.apps.auth.backend import CookieTokenAuthentication -from greedybear.consts import GET -from greedybear.models import Statistics, viewType from rest_framework import status -from rest_framework.decorators import api_view, authentication_classes, permission_classes +from rest_framework.decorators import ( + api_view, + authentication_classes, + permission_classes, +) from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response +from api.serializers import EnrichmentSerializer +from greedybear.consts import GET +from greedybear.models import Statistics, ViewType + logger = logging.getLogger(__name__) @@ -34,7 +39,7 @@ def enrichment_view(request): serializer.is_valid(raise_exception=True) source_ip = str(request.META["REMOTE_ADDR"]) - request_source = Statistics(source=source_ip, view=viewType.ENRICHMENT_VIEW.value) + request_source = Statistics(source=source_ip, view=ViewType.ENRICHMENT_VIEW.value) request_source.save() return Response(serializer.data, status=status.HTTP_200_OK) diff --git a/api/views/feeds.py b/api/views/feeds.py index 5e309d11..37890f53 100644 --- a/api/views/feeds.py +++ b/api/views/feeds.py @@ -2,17 +2,28 @@ # See the file 'LICENSE' for copying permission. import logging -from api.views.utils import FeedRequestParams, feeds_response, get_queryset, get_valid_feed_types from certego_saas.apps.auth.backend import CookieTokenAuthentication from certego_saas.ext.pagination import CustomPageNumberPagination -from greedybear.consts import GET -from rest_framework.decorators import api_view, authentication_classes, permission_classes +from rest_framework.decorators import ( + api_view, + authentication_classes, + permission_classes, +) from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response + +from api.serializers import ASNFeedsOrderingSerializer +from api.views.utils import ( + FeedRequestParams, + asn_aggregated_queryset, + feeds_response, + get_queryset, + get_valid_feed_types, +) +from greedybear.consts import GET logger = logging.getLogger(__name__) -api_view([GET]) - @api_view([GET]) def feeds(request, feed_type, attack_type, prioritize, format_): @@ -21,7 +32,7 @@ def feeds(request, feed_type, attack_type, prioritize, format_): Args: request: The incoming request object. - feed_type (str): Type of feed (e.g., log4j, cowrie, etc.). + feed_type (str): Type of feed (e.g. cowrie, honeytrap, etc.). attack_type (str): Type of attack (e.g., all, specific attack types). prioritize (str): Prioritization mechanism to use (e.g., recent, persistent). format_ (str): Desired format of the response (e.g., json, csv, txt). @@ -31,9 +42,11 @@ def feeds(request, feed_type, attack_type, prioritize, format_): Returns: Response: The HTTP response with formatted IOC data. """ - logger.info(f"request /api/feeds with params: feed type: {feed_type}, " f"attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}") + logger.info(f"request /api/feeds with params: feed type: {feed_type}, attack_type: {attack_type}, prioritization: {prioritize}, format: {format_}") - feed_params = FeedRequestParams({"feed_type": feed_type, "attack_type": attack_type, "format_": format_}) + feed_params_data = request.query_params.dict() + feed_params_data.update({"feed_type": feed_type, "attack_type": attack_type, "format_": format_}) + feed_params = FeedRequestParams(feed_params_data) feed_params.apply_default_filters(request.query_params) feed_params.set_prioritization(prioritize) @@ -78,7 +91,7 @@ def feeds_advanced(request): Args: request: The incoming request object. - feed_type (str): Type of feed to retrieve. (supported: `cowrie`, `log4j`, etc.; default: `all`) + feed_type (str): Type of feed to retrieve. (supported: `cowrie`, `honeytrap`, etc.; default: `all`) attack_type (str): Type of attack to filter. (supported: `scanner`, `payload_request`, `all`; default: `all`) max_age (int): Maximum number of days since last occurrence. E.g. an IOC that was last seen 4 days ago is excluded by default. (default: 3) min_days_seen (int): Minimum number of days on which an IOC must have been seen. (default: 1) @@ -106,3 +119,45 @@ def feeds_advanced(request): resp_data = feeds_response(iocs, feed_params, valid_feed_types, dict_only=True, verbose=verbose) return paginator.get_paginated_response(resp_data) return feeds_response(iocs_queryset, feed_params, valid_feed_types, verbose=verbose) + + +@api_view(["GET"]) +@authentication_classes([CookieTokenAuthentication]) +@permission_classes([IsAuthenticated]) +def feeds_asn(request): + """ + Retrieve aggregated IOC feed data grouped by ASN (Autonomous System Number). + + Args: + request: The HTTP request object. + feed_type (str): Filter by feed type (e.g. 'cowrie', 'honeytrap'). Default: 'all'. + attack_type (str): Filter by attack type (e.g., 'scanner', 'payload_request'). Default: 'all'. + max_age (int): Maximum age of IOCs in days. Default: 3. + min_days_seen (int): Minimum days an IOC must have been observed. Default: 1. + exclude_reputation (str): ';'-separated reputations to exclude (e.g., 'mass scanner'). Default: none. + ordering (str): Aggregation ordering field (e.g., 'total_attack_count', 'asn'). Default: '-ioc_count'. + asn (str, optional): Filter results to a single ASN. + + Returns: + Response: HTTP response with a JSON list of ASN aggregation objects. + Each object contains: + asn (int): ASN number. + ioc_count (int): Number of IOCs for this ASN. + total_attack_count (int): Sum of attack_count for all IOCs. + total_interaction_count (int): Sum of interaction_count for all IOCs. + total_login_attempts (int): Sum of login_attempts for all IOCs. + honeypots (List[str]): Sorted list of unique honeypots that observed these IOCs. + expected_ioc_count (float): Sum of recurrence_probability for all IOCs, rounded to 4 decimals. + expected_interactions (float): Sum of expected_interactions for all IOCs, rounded to 4 decimals. + first_seen (DateTime): Earliest first_seen timestamp among IOCs. + last_seen (DateTime): Latest last_seen timestamp among IOCs. + """ + logger.info(f"request /api/feeds/asn/ with params: {request.query_params}") + feed_params = FeedRequestParams(request.query_params) + valid_feed_types = get_valid_feed_types() + + iocs_qs = get_queryset(request, feed_params, valid_feed_types, is_aggregated=True, serializer_class=ASNFeedsOrderingSerializer) + + asn_aggregates = asn_aggregated_queryset(iocs_qs, request, feed_params) + data = list(asn_aggregates) + return Response(data) diff --git a/api/views/general_honeypot.py b/api/views/general_honeypot.py index 146ded21..7679eb04 100644 --- a/api/views/general_honeypot.py +++ b/api/views/general_honeypot.py @@ -2,11 +2,12 @@ # See the file 'LICENSE' for copying permission. import logging -from greedybear.consts import GET -from greedybear.models import GeneralHoneypot from rest_framework.decorators import api_view from rest_framework.response import Response +from greedybear.consts import GET +from greedybear.models import GeneralHoneypot + logger = logging.getLogger(__name__) @@ -25,11 +26,11 @@ def general_honeypot_list(request): logger.info(f"Requested general honeypots list from {request.user}.") active = request.query_params.get("onlyActive") honeypots = [] - generalHoneypots = GeneralHoneypot.objects.all() + general_honeypots = GeneralHoneypot.objects.all() if active == "true": - generalHoneypots = generalHoneypots.filter(active=True) + general_honeypots = general_honeypots.filter(active=True) logger.info(f"Requested only active general honeypots from {request.user}") - honeypots.extend([hp.name for hp in generalHoneypots]) + honeypots.extend([hp.name for hp in general_honeypots]) logger.info(f"General honeypots: {honeypots} given back to user {request.user}") return Response(honeypots) diff --git a/api/views/statistics.py b/api/views/statistics.py index 042ab6c6..347dfa1a 100644 --- a/api/views/statistics.py +++ b/api/views/statistics.py @@ -6,11 +6,12 @@ from django.db.models import Count, Q from django.db.models.functions import Trunc from django.http import HttpResponseServerError -from greedybear.models import IOC, GeneralHoneypot, Statistics, viewType from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response +from greedybear.models import IOC, GeneralHoneypot, Statistics, ViewType + logger = logging.getLogger(__name__) @@ -39,11 +40,11 @@ def feeds(self, request, pk=None): "Sources": Count( "source", distinct=True, - filter=Q(view=viewType.FEEDS_VIEW.value), + filter=Q(view=ViewType.FEEDS_VIEW.value), ) } elif pk == "downloads": - annotations = {"Downloads": Count("source", filter=Q(view=viewType.FEEDS_VIEW.value))} + annotations = {"Downloads": Count("source", filter=Q(view=ViewType.FEEDS_VIEW.value))} else: logger.error("this is impossible. check the code") return HttpResponseServerError() @@ -66,11 +67,11 @@ def enrichment(self, request, pk=None): "Sources": Count( "source", distinct=True, - filter=Q(view=viewType.ENRICHMENT_VIEW.value), + filter=Q(view=ViewType.ENRICHMENT_VIEW.value), ) } elif pk == "requests": - annotations = {"Requests": Count("source", filter=Q(view=viewType.ENRICHMENT_VIEW.value))} + annotations = {"Requests": Count("source", filter=Q(view=ViewType.ENRICHMENT_VIEW.value))} else: logger.error("this is impossible. check the code") return HttpResponseServerError() @@ -79,8 +80,7 @@ def enrichment(self, request, pk=None): @action(detail=False, methods=["get"]) def feeds_types(self, request): """ - Retrieve statistics for different types of feeds, including Log4j, Cowrie, - and general honeypots. + Retrieve statistics for different types of feeds using GeneralHoneypot M2M relationship. Args: request: The incoming request object. @@ -88,15 +88,12 @@ def feeds_types(self, request): Returns: Response: A JSON response containing the feed type statistics. """ - # FEEDS - annotations = { - "Log4j": Count("name", distinct=True, filter=Q(log4j=True)), - "Cowrie": Count("name", distinct=True, filter=Q(cowrie=True)), - } - # feed_type for each general honeypot in the list - generalHoneypots = GeneralHoneypot.objects.all().filter(active=True) - for hp in generalHoneypots: - annotations[hp.name] = Count("name", Q(general_honeypot__name__iexact=hp.name.lower())) + # Build annotations for each active general honeypot + annotations = {} + general_honeypots = GeneralHoneypot.objects.all().filter(active=True) + for hp in general_honeypots: + # Use M2M relationship instead of boolean fields + annotations[hp.name] = Count("name", distinct=True, filter=Q(general_honeypot__name__iexact=hp.name)) return self.__aggregation_response_static_ioc(annotations) def __aggregation_response_static_statistics(self, annotations: dict) -> Response: diff --git a/api/views/utils.py b/api/views/utils.py index 39c2ae1c..bc4742c8 100644 --- a/api/views/utils.py +++ b/api/views/utils.py @@ -6,17 +6,16 @@ from datetime import datetime, timedelta from ipaddress import ip_address -from api.enums import Honeypots -from api.serializers import FeedsRequestSerializer, FeedsResponseSerializer +from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg -from django.db.models import F, Q +from django.db.models import Count, F, Max, Min, Sum from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse -from greedybear.consts import FEEDS_LICENSE -from greedybear.models import IOC, GeneralHoneypot, Statistics -from greedybear.settings import EXTRACTION_INTERVAL from rest_framework import status from rest_framework.response import Response +from api.serializers import FeedsRequestSerializer +from greedybear.models import IOC, GeneralHoneypot, Statistics + logger = logging.getLogger(__name__) @@ -46,6 +45,7 @@ class FeedRequestParams: Attributes: feed_type (str): Type of feed to retrieve (default: "all") attack_type (str): Type of attack to filter (default: "all") + ioc_type (str): Type of IOC to filter - 'ip', 'domain', or 'all' (default: "all") max_age (str): Maximum number of days since last occurrence (default: "3") min_days_seen (str): Minimum number of days on which an IOC must have been seen (default: "1") include_reputation (list): List of reputation values to include (default: []) @@ -65,6 +65,7 @@ def __init__(self, query_params: dict): """ self.feed_type = query_params.get("feed_type", "all").lower() self.attack_type = query_params.get("attack_type", "all").lower() + self.ioc_type = query_params.get("ioc_type", "all").lower() self.max_age = query_params.get("max_age", "3") self.min_days_seen = query_params.get("min_days_seen", "1") self.include_reputation = query_params["include_reputation"].split(";") if "include_reputation" in query_params else [] @@ -78,7 +79,7 @@ def __init__(self, query_params: dict): def apply_default_filters(self, query_params): if not query_params: - query_params = dict() + query_params = {} if "include_mass_scanners" not in query_params: self.exclude_reputation.append("mass scanner") if "include_tor_exit_nodes" not in query_params: @@ -115,11 +116,12 @@ def get_valid_feed_types() -> frozenset[str]: Returns: frozenset[str]: An immutable set of valid feed type strings """ - general_honeypots = GeneralHoneypot.objects.all().filter(active=True) - return frozenset([Honeypots.LOG4J.value, Honeypots.COWRIE.value, "all"] + [hp.name.lower() for hp in general_honeypots]) + general_honeypots = GeneralHoneypot.objects.filter(active=True) + feed_types = ["all"] + [hp.name.lower() for hp in general_honeypots] + return frozenset(feed_types) -def get_queryset(request, feed_params, valid_feed_types): +def get_queryset(request, feed_params, valid_feed_types, is_aggregated=False, serializer_class=FeedsRequestSerializer): """ Build a queryset to filter IOC data based on the request parameters. @@ -127,6 +129,15 @@ def get_queryset(request, feed_params, valid_feed_types): request: The incoming request object. feed_params: A FeedRequestParams instance. valid_feed_types (frozenset): The set of all valid feed types. + is_aggregated (bool, optional): + - If True, disables slicing (`feed_size`) and model-level ordering. + - Ensures full dataset is available for aggregation or specialized computation. + - Default: False. + serializer_class (class, optional): + - Serializer class used to validate request parameters. + - Allows injecting a custom serializer to enforce rules for specific feed types + (e.g., to restrict ordering fields or validation for specialized feeds). + - Default: `FeedsRequestSerializer`. Returns: QuerySet: The filtered queryset of IOC data. @@ -137,7 +148,7 @@ def get_queryset(request, feed_params, valid_feed_types): f"Age: {feed_params.max_age}, format: {feed_params.format}" ) - serializer = FeedsRequestSerializer( + serializer = serializer_class( data=vars(feed_params), context={"valid_feed_types": valid_feed_types}, ) @@ -145,29 +156,28 @@ def get_queryset(request, feed_params, valid_feed_types): query_dict = {} if feed_params.feed_type != "all": - if feed_params.feed_type in (Honeypots.LOG4J.value, Honeypots.COWRIE.value): - query_dict[feed_params.feed_type] = True - else: - # accept feed_type if it is in the general honeypots list - query_dict["general_honeypot__name__iexact"] = feed_params.feed_type + query_dict["general_honeypot__name__iexact"] = feed_params.feed_type if feed_params.attack_type != "all": query_dict[feed_params.attack_type] = True + if feed_params.ioc_type != "all": + query_dict["type"] = feed_params.ioc_type + query_dict["last_seen__gte"] = datetime.now() - timedelta(days=int(feed_params.max_age)) if int(feed_params.min_days_seen) > 1: query_dict["number_of_days_seen__gte"] = int(feed_params.min_days_seen) if feed_params.include_reputation: query_dict["ip_reputation__in"] = feed_params.include_reputation - iocs = ( - IOC.objects.filter(**query_dict) - .filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True)) - .exclude(ip_reputation__in=feed_params.exclude_reputation) - .annotate(value=F("name")) - .annotate(honeypots=ArrayAgg("general_honeypot__name")) - .order_by(feed_params.ordering)[: int(feed_params.feed_size)] - ) + iocs = IOC.objects.filter(**query_dict).exclude(ip_reputation__in=feed_params.exclude_reputation).annotate(value=F("name")).distinct() + + # aggregated feeds calculate metrics differently and need all rows to be accurate. + if not is_aggregated: + iocs = iocs.filter(general_honeypot__active=True) + iocs = iocs.annotate(honeypots=ArrayAgg("general_honeypot__name")) + iocs = iocs.order_by(feed_params.ordering) + iocs = iocs[: int(feed_params.feed_size)] # save request source for statistics source_ip = str(request.META["REMOTE_ADDR"]) @@ -195,28 +205,24 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose Format the IOC data into the requested format (e.g., JSON, CSV, TXT). Args: - request: The incoming request object. iocs (QuerySet): The filtered queryset of IOC data. - feed_type (str): Type of feed (e.g., log4j, cowrie, etc.). + feed_params (FeedRequestParams): Request parameters including format. valid_feed_types (frozenset): The set of all valid feed types. - format_ (str): Desired format of the response (e.g., json, csv, txt). dict_only (bool): Return IOC dictionary instead of Response object. - verbose (bool): Include IOC properties that may contain a lot of data. + verbose (bool): Include verbose fields (days_seen, destination_ports, honeypots, firehol_categories). Returns: Response: The HTTP response containing formatted IOC data. """ logger.info(f"Format feeds in: {feed_params.format}") - license_text = ( - f"# These feeds are generated by The Honeynet Project once every {EXTRACTION_INTERVAL} minutes " - f"and are protected by the following license: {FEEDS_LICENSE}" - ) match feed_params.format: case "txt": - text_lines = [license_text] + [ioc[0] for ioc in iocs.values_list("name")] + text_lines = [f"# {settings.FEEDS_LICENSE}"] if settings.FEEDS_LICENSE else [] + text_lines += [ioc[0] for ioc in iocs.values_list("name")] return HttpResponse("\n".join(text_lines), content_type="text/plain") case "csv": - rows = [[license_text]] + [list(ioc) for ioc in iocs.values_list("name")] + rows = [[f"# {settings.FEEDS_LICENSE}"]] if settings.FEEDS_LICENSE else [] + rows += [list(ioc) for ioc in iocs.values_list("name")] pseudo_buffer = Echo() writer = csv.writer(pseudo_buffer, quoting=csv.QUOTE_NONE) return StreamingHttpResponse( @@ -227,60 +233,70 @@ def feeds_response(iocs, feed_params, valid_feed_types, dict_only=False, verbose ) case "json": json_list = [] - required_fields = { + + # Base fields always returned + base_fields = { "value", "first_seen", "last_seen", "attack_count", "interaction_count", - "log4j", - "cowrie", "scanner", "payload_request", "ip_reputation", "asn", - "destination_ports", "login_attempts", - "honeypots", - "days_seen", "recurrence_probability", "expected_interactions", + "honeypots", # Always needed to calculate feed_type + "destination_ports", # Always needed to calculate destination_port_count + } + + # Additional verbose fields + verbose_only_fields = { + "days_seen", + "firehol_categories", } + + # Fetch fields from database (always include honeypots and destination_ports) + required_fields = base_fields | verbose_only_fields if verbose else base_fields + + # Collect values; `honeypots` will contain the list of associated honeypot names iocs = (ioc_as_dict(ioc, required_fields) for ioc in iocs) if isinstance(iocs, list) else iocs.values(*required_fields) for ioc in iocs: - ioc_feed_type = [] - if ioc[Honeypots.LOG4J.value]: - ioc_feed_type.append(Honeypots.LOG4J.value) - if ioc[Honeypots.COWRIE.value]: - ioc_feed_type.append(Honeypots.COWRIE.value) - if len(ioc["honeypots"]): - ioc_feed_type.extend([hp.lower() for hp in ioc["honeypots"] if hp is not None]) + ioc_feed_type = [hp.lower() for hp in ioc.get("honeypots", []) if hp] data_ = ioc | { "first_seen": ioc["first_seen"].strftime("%Y-%m-%d"), "last_seen": ioc["last_seen"].strftime("%Y-%m-%d"), "feed_type": ioc_feed_type, - "destination_port_count": len(ioc["destination_ports"]), + "destination_port_count": len(ioc.get("destination_ports", [])), } - if verbose: - json_list.append(data_) - continue + # Remove verbose-only fields from response when not in verbose mode + if not verbose: + # Remove destination_ports array from response + data_.pop("destination_ports", None) - serializer_item = FeedsResponseSerializer( - data=data_, - context={"valid_feed_types": valid_feed_types}, - ) - serializer_item.is_valid(raise_exception=True) - json_list.append(serializer_item.data) + # Always remove honeypots field as it's redundant with feed_type + data_.pop("honeypots", None) + + # Skip validation - data_ is constructed internally and matches the API contract + json_list.append(data_) # check if sorting the results by feed_type if feed_params.feed_type_sorting is not None: logger.info("Return feeds sorted by feed_type field") - json_list = sorted(json_list, key=lambda k: k["feed_type"], reverse=feed_params.feed_type_sorting == "-feed_type") + json_list = sorted( + json_list, + key=lambda k: k["feed_type"], + reverse=feed_params.feed_type_sorting == "-feed_type", + ) logger.info(f"Number of feeds returned: {len(json_list)}") - resp_data = {"license": FEEDS_LICENSE, "iocs": json_list} + resp_data = {"iocs": json_list} + if settings.FEEDS_LICENSE: + resp_data["license"] = settings.FEEDS_LICENSE if dict_only: return resp_data else: @@ -324,3 +340,64 @@ def is_sha256hash(string: str) -> bool: bool: True if the string is a valid SHA-256 hash, False otherwise """ return bool(re.fullmatch(r"^[A-Fa-f0-9]{64}$", string)) + + +def asn_aggregated_queryset(iocs_qs, request, feed_params): + """ + Perform DB-level aggregation grouped by ASN. + + Args + iocs_qs (QuerySet): Filtered IOC queryset from get_queryset; + request (Request): The API request object; + feed_params (FeedRequestParams): Validated parameter object + + Returns: A values-grouped queryset with annotated metrics and honeypot arrays. + """ + asn_filter = request.query_params.get("asn") + if asn_filter: + iocs_qs = iocs_qs.filter(asn=asn_filter) + + # default ordering is overridden here because of serializer default(-last-seen) behaviour + ordering = feed_params.ordering + if not ordering or ordering.strip() in {"", "-last_seen", "last_seen"}: + ordering = "-ioc_count" + + numeric_agg = ( + iocs_qs.exclude(asn__isnull=True) + .values("asn") + .annotate( + ioc_count=Count("id"), + total_attack_count=Sum("attack_count"), + total_interaction_count=Sum("interaction_count"), + total_login_attempts=Sum("login_attempts"), + expected_ioc_count=Sum("recurrence_probability"), + expected_interactions=Sum("expected_interactions"), + first_seen=Min("first_seen"), + last_seen=Max("last_seen"), + ) + .order_by(ordering) + ) + + honeypot_agg = ( + iocs_qs.exclude(asn__isnull=True) + .filter(general_honeypot__active=True) + .values("asn") + .annotate( + honeypots=ArrayAgg( + "general_honeypot__name", + distinct=True, + ) + ) + ) + + hp_lookup = {row["asn"]: row["honeypots"] or [] for row in honeypot_agg} + + # merging numeric aggregate with honeypot names for each asn + result = [] + for row in numeric_agg: + asn = row["asn"] + row_dict = dict(row) + row_dict["honeypots"] = sorted(hp_lookup.get(asn, [])) + result.append(row_dict) + + return result diff --git a/authentication/admin.py b/authentication/admin.py index fdd1e3bd..21eb7775 100644 --- a/authentication/admin.py +++ b/authentication/admin.py @@ -1,6 +1,5 @@ # This file is a part of GreedyBear https://github.com/honeynet/GreedyBear # See the file 'LICENSE' for copying permission. -from typing import Optional import email_utils from certego_saas.apps.user.admin import AbstractUserAdmin @@ -38,7 +37,7 @@ class UserAdminView(AbstractUserAdmin): actions = ["accept_users", "decline_users"] @admin.display(boolean=True) - def is_email_verified(self, obj: User) -> Optional[bool]: + def is_email_verified(self, obj: User) -> bool | None: return obj.is_email_verified @admin.action(description="Decline selected users") @@ -124,7 +123,7 @@ def user_is_active(self, obj: UserProfile) -> bool: return obj.user.is_active @admin.display(boolean=True) - def user_is_approved(self, obj: UserProfile) -> Optional[bool]: + def user_is_approved(self, obj: UserProfile) -> bool | None: return obj.user.approved diff --git a/authentication/migrations/0001_initial.py b/authentication/migrations/0001_initial.py index 42f67e22..da2dc841 100644 --- a/authentication/migrations/0001_initial.py +++ b/authentication/migrations/0001_initial.py @@ -1,13 +1,12 @@ # Generated by Django 3.2.18 on 2023-03-22 16:14 -from django.conf import settings import django.core.validators -from django.db import migrations, models import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models class Migration(migrations.Migration): - initial = True dependencies = [ @@ -18,15 +17,46 @@ class Migration(migrations.Migration): migrations.CreateModel( name="UserProfile", fields=[ - ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), - ("company_name", models.CharField(max_length=32, validators=[django.core.validators.MinLengthValidator(3)])), - ("company_role", models.CharField(max_length=32, validators=[django.core.validators.MinLengthValidator(3)])), - ("twitter_handle", models.CharField(blank=True, default="", max_length=16, validators=[django.core.validators.MinLengthValidator(3)])), + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "company_name", + models.CharField( + max_length=32, + validators=[django.core.validators.MinLengthValidator(3)], + ), + ), + ( + "company_role", + models.CharField( + max_length=32, + validators=[django.core.validators.MinLengthValidator(3)], + ), + ), + ( + "twitter_handle", + models.CharField( + blank=True, + default="", + max_length=16, + validators=[django.core.validators.MinLengthValidator(3)], + ), + ), ( "discover_from", models.CharField( choices=[ - ("search_engine", "Search Engine (Google, DuckDuckGo, etc.)"), + ( + "search_engine", + "Search Engine (Google, DuckDuckGo, etc.)", + ), ("was_recommended", "Recommended by friend or colleague"), ("social_media", "Social media"), ("blog_or_publication", "Blog or Publication"), @@ -36,7 +66,14 @@ class Migration(migrations.Migration): max_length=32, ), ), - ("user", models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name="user_profile", to=settings.AUTH_USER_MODEL)), + ( + "user", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="user_profile", + to=settings.AUTH_USER_MODEL, + ), + ), ], options={ "verbose_name_plural": "User Profiles", diff --git a/authentication/models.py b/authentication/models.py index f9806f2a..ec6f6bf7 100644 --- a/authentication/models.py +++ b/authentication/models.py @@ -18,15 +18,10 @@ class DiscoverFromChoices(models.TextChoices): # models class UserProfile(models.Model): - # meta - class Meta: - verbose_name_plural = "User Profiles" - - # contants + # constants DiscoverFromChoices = DiscoverFromChoices # fields - user = models.OneToOneField( settings.AUTH_USER_MODEL, on_delete=models.CASCADE, @@ -40,3 +35,10 @@ class Meta: choices=DiscoverFromChoices.choices, default=DiscoverFromChoices.OTHER, ) + + # meta + class Meta: + verbose_name_plural = "User Profiles" + + def __str__(self): + return f"{self.user.username} - {self.company_name}" diff --git a/authentication/serializers.py b/authentication/serializers.py index 78107c9c..df86986f 100644 --- a/authentication/serializers.py +++ b/authentication/serializers.py @@ -8,15 +8,15 @@ from certego_saas.models import User from certego_saas.settings import certego_apps_settings from django.conf import settings -from django.contrib.auth import password_validation from django.core.exceptions import ValidationError from django.db import DatabaseError, transaction -from django.utils.translation import gettext_lazy as _ -from greedybear.consts import REGEX_PASSWORD +from django.db.models import Q from rest_framework import serializers as rfs from rest_framework.authtoken.serializers import AuthTokenSerializer from slack_sdk.errors import SlackApiError +from greedybear.consts import REGEX_PASSWORD + from .models import UserProfile logger = logging.getLogger(__name__) @@ -103,9 +103,9 @@ def validate_key(self, key): # custom error messages err_str = str(exc.detail) if "invalid" in err_str: - exc.detail = "The provided verification key" " is invalid or your email address is already verified." + exc.detail = "The provided verification key is invalid or your email address is already verified." if "expired" in err_str: - exc.detail = "The provided verification key" " has expired or your email address is already verified." + exc.detail = "The provided verification key has expired or your email address is already verified." raise exc def save(self): @@ -122,7 +122,7 @@ def save(self): try: userprofile = user.user_profile user_admin_link = f"{settings.HOST_URI}/admin/certego_saas_user/user/{user.pk}" - userprofile_admin_link = f"{settings.HOST_URI}" f"/admin/authentication/userprofile/{userprofile.pk}" + userprofile_admin_link = f"{settings.HOST_URI}/admin/authentication/userprofile/{userprofile.pk}" slack = Slack() slack.send_message( title="Newly registered user!!", @@ -141,15 +141,25 @@ def save(self): class LoginSerializer(AuthTokenSerializer): def validate(self, attrs): + login_value = attrs.get("username") + # If user has entered email we try email->username mapping + try: + user = User.objects.get(email__iexact=login_value) + attrs["username"] = user.username + except User.DoesNotExist: + # Either user has entered username, or email entered doesn't exist + pass + try: return super().validate(attrs) except rfs.ValidationError as exc: try: - user = User.objects.get(username=attrs["username"]) + # Check if either of the two, username or email exists + user = User.objects.get(Q(username=login_value) | Q(email__iexact=login_value)) except User.DoesNotExist: # we do not want to leak info - # so just raise the original exception - raise exc + # so just raise the original exception without context + raise exc from None else: # custom error messages if not user.is_active: @@ -160,5 +170,4 @@ def validate(self, attrs): elif user.approved is False: exc.detail = "Your account was declined." logger.info(f"User {user} is not active. Error message: {exc.detail}") - # else - raise exc + raise exc from None diff --git a/authentication/urls.py b/authentication/urls.py index 37563947..47c9c02f 100644 --- a/authentication/urls.py +++ b/authentication/urls.py @@ -13,8 +13,8 @@ RegistrationView, ResendVerificationView, TokenSessionsViewSet, - checkAuthentication, - checkConfiguration, + check_authentication, + check_configuration, ) router = routers.DefaultRouter(trailing_slash=False) @@ -44,10 +44,10 @@ ), path("reset-password", PasswordResetView.as_view(), name="auth_reset-password"), path("login", LoginView.as_view(), name="auth_login"), - path("configuration", checkConfiguration), + path("configuration", check_configuration), # auth path("", include("certego_saas.apps.auth.urls")), path("apiaccess", APIAccessTokenView.as_view(), name="auth_apiaccess"), - path("authentication", checkAuthentication), + path("authentication", check_authentication), path("", include(router.urls)), ] diff --git a/authentication/views.py b/authentication/views.py index 80d5e65d..8e69bfda 100644 --- a/authentication/views.py +++ b/authentication/views.py @@ -1,5 +1,4 @@ import logging -from typing import List import rest_email_auth.views from certego_saas.apps.auth import views as certego_views @@ -9,15 +8,24 @@ from django.contrib.auth import get_user_model, login from django.core.cache import cache from durin import views as durin_views -from greedybear.consts import GET -from greedybear.enums import FrontendPage -from greedybear.settings import AUTH_USER_MODEL from rest_framework import status -from rest_framework.decorators import api_view, authentication_classes, permission_classes +from rest_framework.decorators import ( + api_view, + authentication_classes, + permission_classes, +) from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response -from .serializers import EmailVerificationSerializer, LoginSerializer, RegistrationSerializer +from greedybear.consts import GET +from greedybear.enums import FrontendPage +from greedybear.settings import AUTH_USER_MODEL + +from .serializers import ( + EmailVerificationSerializer, + LoginSerializer, + RegistrationSerializer, +) logger = logging.getLogger(__name__) @@ -27,47 +35,47 @@ class PasswordResetRequestView(rest_email_auth.views.PasswordResetRequestView): - authentication_classes: List = [] - permission_classes: List = [] - throttle_classes: List = [POSTUserRateThrottle] + authentication_classes: list = [] + permission_classes: list = [] + throttle_classes: list = [POSTUserRateThrottle] class PasswordResetView(rest_email_auth.views.PasswordResetView): - authentication_classes: List = [] - permission_classes: List = [] - throttle_classes: List = [POSTUserRateThrottle] + authentication_classes: list = [] + permission_classes: list = [] + throttle_classes: list = [POSTUserRateThrottle] class EmailVerificationView(rest_email_auth.views.EmailVerificationView): - authentication_classes: List = [] - permission_classes: List = [] - throttle_classes: List = [POSTUserRateThrottle] + authentication_classes: list = [] + permission_classes: list = [] + throttle_classes: list = [POSTUserRateThrottle] serializer_class = EmailVerificationSerializer class RegistrationView(rest_email_auth.views.RegistrationView): - authentication_classes: List = [] - permission_classes: List = [] - throttle_classes: List = [POSTUserRateThrottle] + authentication_classes: list = [] + permission_classes: list = [] + throttle_classes: list = [POSTUserRateThrottle] serializer_class = RegistrationSerializer class ResendVerificationView(rest_email_auth.views.ResendVerificationView): - authentication_classes: List = [] - permission_classes: List = [] - throttle_classes: List = [POSTUserRateThrottle] + authentication_classes: list = [] + permission_classes: list = [] + throttle_classes: list = [POSTUserRateThrottle] @api_view([GET]) @authentication_classes([CookieTokenAuthentication]) @permission_classes([IsAuthenticated]) -def checkAuthentication(request): +def check_authentication(request): logger.info(f"User: {request.user}, Administrator: {request.user.is_superuser}") return Response({"is_superuser": request.user.is_superuser}, status=status.HTTP_200_OK) @api_view([GET]) -def checkConfiguration(request): +def check_configuration(request): logger.info(f"Requested checking configuration from {request.user}.") page = request.query_params.get("page") errors = {} @@ -87,7 +95,12 @@ def checkConfiguration(request): errors["AWS SES backend"] = "configuration required" else: # SMTP backend - required_variables = [settings.EMAIL_HOST, settings.EMAIL_HOST_USER, settings.EMAIL_HOST_PASSWORD, settings.EMAIL_PORT] + required_variables = [ + settings.EMAIL_HOST, + settings.EMAIL_HOST_USER, + settings.EMAIL_HOST_PASSWORD, + settings.EMAIL_PORT, + ] for variable in required_variables: if not variable: errors["SMTP backend"] = "configuration required" diff --git a/configuration/ml_config.json b/configuration/ml_config.json new file mode 100644 index 00000000..b649f252 --- /dev/null +++ b/configuration/ml_config.json @@ -0,0 +1,22 @@ +{ + "RFClassifier": { + "class_weight": { + "false": 1, + "true": 4 + }, + "criterion": "entropy", + "max_depth": 10, + "max_features": "log2", + "min_samples_leaf": 6, + "min_samples_split": 3, + "n_estimators": 241 + }, + "RFRegressor": { + "criterion": "squared_error", + "max_depth": 11, + "max_features": "sqrt", + "min_samples_leaf": 3, + "min_samples_split": 8, + "n_estimators": 70 + } +} \ No newline at end of file diff --git a/docker/.version b/docker/.version index f32f3526..37ece384 100644 --- a/docker/.version +++ b/docker/.version @@ -1 +1 @@ -REACT_APP_GREEDYBEAR_VERSION="2.1.0" \ No newline at end of file +REACT_APP_GREEDYBEAR_VERSION="3.0.0" \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index a4dee9aa..c98f60c2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -31,9 +31,16 @@ RUN mkdir -p ${LOG_PATH} \ && pip3 install --no-cache-dir --upgrade pip COPY requirements/project-requirements.txt $PYTHONPATH/project-requirements.txt +COPY requirements/dev-requirements.txt $PYTHONPATH/dev-requirements.txt WORKDIR $PYTHONPATH RUN pip3 install --no-cache-dir -r $PYTHONPATH/project-requirements.txt +# Conditionally install dev requirements (coverage, etc.) +ARG INSTALL_DEV=false +RUN if [ "$INSTALL_DEV" = "true" ]; then \ + pip3 install --no-cache-dir -r $PYTHONPATH/dev-requirements.txt; \ + fi + COPY . $PYTHONPATH COPY --from=frontend-build /build /var/www/reactapp diff --git a/docker/Dockerfile_nginx b/docker/Dockerfile_nginx index c1a53bdf..049ea8c9 100644 --- a/docker/Dockerfile_nginx +++ b/docker/Dockerfile_nginx @@ -1,4 +1,4 @@ -FROM library/nginx:1.29.3-alpine +FROM library/nginx:1.29.4-alpine RUN mkdir -p /var/cache/nginx /var/cache/nginx/feeds RUN apk update && apk upgrade && apk add bash ENV NGINX_LOG_DIR=/var/log/nginx diff --git a/docker/default.yml b/docker/default.yml index 0907c88d..f38b1d31 100644 --- a/docker/default.yml +++ b/docker/default.yml @@ -52,6 +52,12 @@ services: driver: none depends_on: - postgres + healthcheck: + test: ["CMD", "rabbitmq-diagnostics", "check_running"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 10s celery_beat: image: intelowlproject/greedybear:prod @@ -63,9 +69,12 @@ services: env_file: - env_file depends_on: - - rabbitmq - - postgres - - uwsgi + rabbitmq: + condition: service_healthy + postgres: + condition: service_started + uwsgi: + condition: service_started <<: *no-healthcheck celery_worker_default: @@ -80,15 +89,17 @@ services: env_file: - env_file depends_on: - - rabbitmq - - postgres - - uwsgi + rabbitmq: + condition: service_healthy + postgres: + condition: service_started + uwsgi: + condition: service_started <<: *no-healthcheck - volumes: postgres_data: nginx_logs: generic_logs: static_content: - mlmodels: + mlmodels: \ No newline at end of file diff --git a/docker/elasticsearch.yml b/docker/elasticsearch.yml index deadb139..054c7449 100644 --- a/docker/elasticsearch.yml +++ b/docker/elasticsearch.yml @@ -4,7 +4,7 @@ services: - elasticsearch elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.15.0 + image: docker.elastic.co/elasticsearch/elasticsearch:9.2.3 environment: - "discovery.type=single-node" diff --git a/docker/env_file_template b/docker/env_file_template index d7622bf4..b8363a06 100644 --- a/docker/env_file_template +++ b/docker/env_file_template @@ -35,6 +35,10 @@ ELASTIC_ENDPOINT= SLACK_TOKEN= DEFAULT_SLACK_CHANNEL= +NTFY_URL= +# Url of the ntfy topic to recieve error alerts +# Example: https://ntfy.sh/your_topic + STAGE="production" DEBUG=False MOCK_CONNECTIONS=False @@ -42,11 +46,12 @@ MOCK_CONNECTIONS=False # True for public deployment, False for internal deployment PUBLIC_DEPLOYMENT=False -# Set True for use with TPot instances prior to version 24.04 -LEGACY_EXTRACTION=False # Interval for the honeypot data extraction in minutes (only choose divisors of 60) EXTRACTION_INTERVAL=10 +# Lookback time for the first extraction run in minutes (default: 1 day) +INITIAL_EXTRACTION_TIMESPAN = 1440 + # Set True to cluster command sequences recorded by Cowrie once a day # This might be computationaly expensive on large Databases CLUSTER_COWRIE_COMMAND_SEQUENCES=False @@ -63,4 +68,9 @@ COMMAND_SEQUENCE_RETENTION = 365 # ThreatFox API key. # Once added, your payload request domains will be submitted to ThreatFox -THREATFOX_API_KEY = \ No newline at end of file +THREATFOX_API_KEY = + +# Optional feed license URL to include in API responses +# If not set, no license information will be included in feeds +# Example: https://github.com/honeynet/GreedyBear/blob/main/FEEDS_LICENSE.md +FEEDS_LICENSE= \ No newline at end of file diff --git a/docker/local.override.yml b/docker/local.override.yml index 46c616c0..426b6534 100644 --- a/docker/local.override.yml +++ b/docker/local.override.yml @@ -5,6 +5,7 @@ services: dockerfile: docker/Dockerfile args: WATCHMAN: "true" + INSTALL_DEV: "true" image: intelowlproject/greedybear:test volumes: - ../:/opt/deploy/greedybear diff --git a/frontend/public/logo192.png b/frontend/public/logo192.png new file mode 100644 index 00000000..86942b10 Binary files /dev/null and b/frontend/public/logo192.png differ diff --git a/frontend/src/components/feeds/Feeds.jsx b/frontend/src/components/feeds/Feeds.jsx index 314d3b3c..f81c276c 100644 --- a/frontend/src/components/feeds/Feeds.jsx +++ b/frontend/src/components/feeds/Feeds.jsx @@ -14,11 +14,7 @@ import { feedsTableColumns } from "./tableColumns"; import { FEEDS_LICENSE } from "../../constants"; // costants -const feedTypeChoices = [ - { label: "All", value: "all" }, - { label: "Log4j", value: "log4j" }, - { label: "Cowrie", value: "cowrie" }, -]; +const feedTypeChoices = [{ label: "All", value: "all" }]; const attackTypeChoices = [ { label: "All", value: "all" }, @@ -26,6 +22,12 @@ const attackTypeChoices = [ { label: "Payload request", value: "payload_request" }, ]; +const iocTypeChoices = [ + { label: "All", value: "all" }, + { label: "IP addresses", value: "ip" }, + { label: "Domains", value: "domain" }, +]; + const prioritizationChoices = [ { label: "Recent", value: "recent" }, { label: "Persistent", value: "persistent" }, @@ -36,6 +38,7 @@ const prioritizationChoices = [ const initialValues = { feeds_type: "all", attack_type: "all", + ioc_type: "all", prioritize: "recent", }; @@ -87,6 +90,7 @@ export default function Feeds() { params: { feed_type: initialValues.feeds_type, attack_type: initialValues.attack_type, + ioc_type: initialValues.ioc_type, prioritize: initialValues.prioritize, }, initialParams: { @@ -102,10 +106,11 @@ export default function Feeds() { (values) => { try { setUrl( - `${FEEDS_BASE_URI}/${values.feeds_type}/${values.attack_type}/${values.prioritize}.json` + `${FEEDS_BASE_URI}/${values.feeds_type}/${values.attack_type}/${values.prioritize}.json?ioc_type=${values.ioc_type}` ); initialValues.feeds_type = values.feeds_type; initialValues.attack_type = values.attack_type; + initialValues.ioc_type = values.ioc_type; initialValues.prioritize = values.prioritize; const resetPage = { @@ -148,7 +153,7 @@ export default function Feeds() { {(formik) => (
- +