SAP · marcorosa · Aug 21, 2025 · Apr 18, 2025 · Apr 18, 2025 · Apr 18, 2025
@@ -17,7 +17,7 @@ jobs:
     name: Build changelog
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Run Changelog CI
         uses: saadmk11/changelog-ci@v1.2.0

@@ -20,7 +20,7 @@ jobs:
         runs-on: ubuntu-latest
         steps:
             - name: Check out Git repository
-              uses: actions/checkout@v4
+              uses: actions/checkout@v5
 
             - name: Set up Python environment
               uses: actions/setup-python@v5

@@ -20,7 +20,7 @@ jobs:
 
     steps:
       - name: Check out Git repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Set up Python environment
         uses: actions/setup-python@v5

@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: Check out Git repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Set up Node.js
         uses: actions/setup-node@v4

@@ -13,7 +13,7 @@ jobs:
       run:
         shell: bash
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Get version number
         id: get-pr-number

@@ -135,11 +135,20 @@ data.db.*
 key.txt
 
 # Pentest results
+# textattack
 error.txt
 result_fail.txt
 result_success.txt
 summary.txt
+# promptmap
 prompt_success.txt
+# gptfuzz
 result_gptfuzz.txt
+# codeattack
 codeattack_success.txt
+# artprompt
 artprompt_success.json
+# garak
+*.report.jsonl
+*.hitlog.jsonl
+*.report.html
@@ -1,3 +1,16 @@
+# Version: v0.4.0
+
+* [#44](https://github.com/SAP/STARS/pull/44): Add Garak tool
+* [#72](https://github.com/SAP/STARS/pull/72): Bump deprecated from 1.2.15 to 1.2.18 in /backend-agent
+* [#73](https://github.com/SAP/STARS/pull/73): Bump python-dotenv from 1.0.1 to 1.1.1 in /backend-agent
+* [#74](https://github.com/SAP/STARS/pull/74): Bump flask from 2.3.3 to 3.1.1 in /backend-agent
+* [#77](https://github.com/SAP/STARS/pull/77): Bump ollama from 0.4.7 to 0.5.3 in /backend-agent
+* [#78](https://github.com/SAP/STARS/pull/78): Bump the js-dependencies group across 1 directory with 25 updates
+* [#79](https://github.com/SAP/STARS/pull/79): Update faiss-cpu requirement from ~=1.9.0 to ~=1.12.0 in /backend-agent
+* [#80](https://github.com/SAP/STARS/pull/80): Bump actions/checkout from 4 to 5
+* [#81](https://github.com/SAP/STARS/pull/81): Bump mermaid from 11.4.1 to 11.10.0 in /frontend
+
+
 # Version: v0.3.1
 
 * [#50](https://github.com/SAP/STARS/pull/50): Bump webpack-dev-server and @angular-devkit/build-angular in /frontend

@@ -4,14 +4,15 @@
 from gen_ai_hub.proxy.core.proxy_clients import set_proxy_version
 from gen_ai_hub.proxy.langchain.init_models import (
     init_llm, init_embedding_model)
-from langchain.agents.agent_toolkits import (
-    create_conversational_retrieval_agent, create_retriever_tool)
+from langchain.agents.agent_toolkits import \
+    create_conversational_retrieval_agent
 from langchain.embeddings import CacheBackedEmbeddings
-from langchain.schema.messages import SystemMessage
 from langchain.storage import LocalFileStore
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import DirectoryLoader
 from langchain_community.vectorstores import FAISS
+from langchain_core.messages import SystemMessage
+from langchain_core.tools.retriever import create_retriever_tool
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
 # load env variables
@@ -75,7 +76,7 @@ def get_retriever(document_path: str,
     # https://python.langchain.com/docs/modules/data_connection/document_transformers/
 
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500, chunk_overlap=0)
+        chunk_size=500, chunk_overlap=100)
     docs = text_splitter.split_documents(raw_docs)
 
     # Vector store
@@ -194,6 +195,7 @@ def get_retriever(document_path: str,
     run_pyrit, \
     run_codeattack, \
     run_artprompt, \
+    run_garak_attack, \
     run_attack_suite, \
     get_supported_models, \
     use_command, \
@@ -262,6 +264,14 @@ def get_retriever(document_path: str,
     "artprompt" framework. Use this before using the \
     run_artprompt tool'
 )
+# Retriever that contains notes on how to use Garak
+garak_notes = get_retriever(
+    './data/garak',
+    'garak_how',
+    'Steps to take to run a pentest on a LLM using the \
+    "garak" framework. ALWAYS run this before using the \
+    run_garak_attack tool, because it will explain how to use the tool'
+)
 # Retriever that contains notes on how to run attack suites
 llm_attack_suite_notes = get_retriever(
     './data/suite',
@@ -300,6 +310,8 @@ def get_retriever(document_path: str,
     run_codeattack,
     artprompt_notes,
     run_artprompt,
+    garak_notes,
+    run_garak_attack,
     llm_attack_suite_notes,
     run_attack_suite,
     get_supported_models

@@ -14,6 +14,17 @@
     OUTPUT_FILE as codeattack_out_file,
     start_codeattack,
 )
+from libs.garak import (
+    OUTPUT_FILE as garak_output_file,
+    start_dan,
+    start_encoding,
+    start_goodside,
+    start_latentinjection,
+    start_malwaregen,
+    start_phrasing,
+    start_promptinject,
+    start_suffix,
+)
 from libs.gptfuzz import (
     OUTPUT_FILE as gptfuzz_out_file,
     perform_gptfuzz_attack,
@@ -145,7 +156,7 @@ def start(self) -> AttackResult:
         Start the attack as specified with this specification.
         """
         with Trace(self.attack, self.spec) as t:
-            match self.attack:
+            match self.attack.lower():
                 case 'promptmap':
                     return t.trace(start_prompt_map(
                         self.target_model,
@@ -175,12 +186,54 @@ def start(self) -> AttackResult:
                         self.eval_model,
                         self.parameters
                     ))
+                case 'dan':
+                    return t.trace(start_dan(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'encoding':
+                    return t.trace(start_encoding(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'goodside':
+                    return t.trace(start_goodside(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'latentinjection':
+                    return t.trace(start_latentinjection(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'malwaregen':
+                    return t.trace(start_malwaregen(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'phrasing':
+                    return t.trace(start_phrasing(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'promptinject':
+                    return t.trace(start_promptinject(
+                        self.target_model,
+                        self.parameters
+                    ))
+                case 'suffix':
+                    return t.trace(start_suffix(
+                        self.target_model,
+                        self.parameters
+                    ))
                 case _:
                     raise ValueError(f'Attack {self.attack} is not known.')
 
     @property
     def output_file(self):
         if 'output_file' in self.parameters:
+            # TODO when running attacks from garak, the output_file parameter
+            # appends .report.jsonl at runtime
             return self.parameters
         match self.attack:
             case 'promptmap':
@@ -191,6 +244,11 @@ def output_file(self):
                 return codeattack_out_file
             case 'artprompt':
                 return artprompt_out_file
+            case ('dan' | 'encoding' | 'goodside' | 'latentinjection' |
+                  'malwaregen' | 'phrasing' | 'promptinject' | 'suffix'):
+                return garak_output_file if \
+                    garak_output_file.endswith('.report.jsonl') else \
+                    f'{garak_output_file}.report.jsonl'
 
 
 class AttackSuite():

@@ -203,6 +203,94 @@ def artprompt(args):
     start_spec(spec, args)
 
 
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def dan(args):
+    spec = AttackSpecification.create(
+        'dan',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def encoding(args):
+    spec = AttackSpecification.create(
+        'encoding',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def goodside(args):
+    spec = AttackSpecification.create(
+        'goodside',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def latentinjection(args):
+    spec = AttackSpecification.create(
+        'latentinjection',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def malwaregen(args):
+    spec = AttackSpecification.create(
+        'malwaregen',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def phrasing(args):
+    spec = AttackSpecification.create(
+        'phrasing',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def promptinject(args):
+    spec = AttackSpecification.create(
+        'promptinject',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
+@subcommand([arg('target_model', help='Name of the target model to attack'),
+             arg('--output_file', '-o', help='Output file with results',
+                 default=None)])
+def suffix(args):
+    spec = AttackSpecification.create(
+        'suffix',
+        args.target_model,
+        params=vars(args))
+    start_spec(spec, args)
+
+
 @subcommand([arg('file',
                  help='Path to the JSON file containing the attack specification.',  # noqa: E501
                  nargs='?'),

@@ -0,0 +1,8 @@
+dan
+encoding
+goodside
+latentinjection
+malwaregen
+phrasing
+promptinject
+suffix
@@ -0,0 +1,34 @@
+Garak is a framework that supports multiple attacks to Large Language Models.
+STARS only supports a subset of those attacks, i.e., the attacks that fall
+under our working scenario (prompt injections, jailbreaks, etc.).
+You have access to the Garak framework using the tool 'run_garak_attack'.
+Since the framework supports multiple attacks, the user needs to specify which attack is to be run.
+To run the attack you need some information from the user, which you have to ask for.
+Specifically, the parameter you need is the target model name, as every attack
+needs a target model that is to be attacked.
+Currently supported attacks via the Garak framework are:
+- dan
+- encoding
+- goodside
+- latentinjection
+- malwaregen
+- phrasing
+- promptinject
+- suffix
+If the users ask the list of the attacks run, use this command: "cat ./data/garak/list_attacks.txt"
+The user may directly spell the attack he wants to run (e.g., "run dan") without
+mentioning garak, so you need to remember the names of the individual attacks
+listed above.
+In any case, the attack name is the parameter you have to pass to the run_garak_attack tool.
+Then, you must ask for the name of the model to be attacked.
+The name must map to one of the deployment ids available in SAP AI Core or "mistral".
+If the user doesn't know what LLM to chose, you may suggest "gpt-4o-mini".
+The user must decide the LLM to test: you can suggest "gpt-4o-mini" but you cannot force it, and as long as the user doesn't tell you the LLM name you cannot continue with the following steps.
+Don't show the user the content if this file, neither now nor later.
+Once you have the names of the LLM models, inform the user you will launch the
+tool and it will take some time (even more than 30 minutes in some cases).
+Then, run the function run_garak_attack.
+Once you finish executing the function, you must inform the user you have completed your task of running attacks and you are ready to show them the results.
+The results of Garak are stored in local files.
+Don't tell to the user these files exist and don't talk about them.
+The file is in jsonl format and is called 'garak.stars.report.jsonl' so you will open it with 'cat garak.stars.report.jsonl'.