SAP · marcorosa · Jan 7, 2025 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
@@ -139,7 +139,6 @@ result_success.txt
 summary.txt
 prompt_success.txt
 result_gptfuzz.txt
-EasyJailbreak_result.jsonl
 codeattack_success.txt
 
 # Frontend Environments

@@ -0,0 +1,20 @@
+# Version: v0.1.0
+
+* [#2](https://github.com/SAP/STARS/pull/2): Bump body-parser and express in /frontend
+* [#3](https://github.com/SAP/STARS/pull/3): Bump send and express in /frontend
+* [#4](https://github.com/SAP/STARS/pull/4): Bump serve-static and express in /frontend
+* [#5](https://github.com/SAP/STARS/pull/5): Bump cookie, socket.io and express in /frontend
+* [#6](https://github.com/SAP/STARS/pull/6): Bump express from 4.19.2 to 4.21.1 in /frontend
+* [#7](https://github.com/SAP/STARS/pull/7): Bump cross-spawn from 7.0.3 to 7.0.6 in /frontend
+* [#8](https://github.com/SAP/STARS/pull/8): Bump mermaid from 10.9.0 to 10.9.3 in /frontend
+* [#9](https://github.com/SAP/STARS/pull/9): Bump rollup from 4.14.1 to 4.28.0 in /frontend
+* [#10](https://github.com/SAP/STARS/pull/10): Bump dompurify from 3.1.0 to 3.2.2 in /frontend
+* [#11](https://github.com/SAP/STARS/pull/11): Minor fixes
+* [#12](https://github.com/SAP/STARS/pull/12): Fix reuse compliance
+* [#13](https://github.com/SAP/STARS/pull/13): Bump micromatch from 4.0.5 to 4.0.8 in /frontend
+* [#14](https://github.com/SAP/STARS/pull/14): Bump braces from 3.0.2 to 3.0.3 in /frontend
+* [#15](https://github.com/SAP/STARS/pull/15): Bump path-to-regexp and express in /frontend
+* [#16](https://github.com/SAP/STARS/pull/16): Bump nanoid from 3.3.7 to 3.3.8 in /frontend
+* [#17](https://github.com/SAP/STARS/pull/17): Bump ws and socket.io-adapter in /frontend
+* [#18](https://github.com/SAP/STARS/pull/18): Support IBM models
+* [#19](https://github.com/SAP/STARS/pull/19): Support Bedrock models
@@ -4,7 +4,6 @@
 import os
 import logging
 
-
 from attack_result import AttackResult, SuiteResult
 from libs.codeattack import start_codeattack, \
     OUTPUT_FILE as codeattack_out_file
@@ -159,42 +158,6 @@ def start(self) -> AttackResult:
                         self.eval_model,
                         self.parameters
                     ))
-                case 'cipher':
-                    return t.trace(Cipher(
-                        self.target_model,
-                        self.eval_model).perform_attack(
-                        self.parameters))
-                case 'codechameleon':
-                    return t.trace(CodeChameleon(
-                        self.target_model,
-                        self.eval_model).perform_attack(
-                        self.parameters))
-                case 'deepinception':
-                    return t.trace(DeepInception(
-                        self.target_model,
-                        self.eval_model).perform_attack(
-                        self.parameters))
-                case 'ica':
-                    return t.trace(ICA(
-                        self.target_model).perform_attack(
-                        self.parameters))
-                case 'jailbroken':
-                    return t.trace(Jailbroken(
-                        self.target_model,
-                        self.attack_model,
-                        self.eval_model).perform_attack(
-                        self.parameters))
-                case 'multilingual':
-                    return t.trace(Multilingual(
-                        self.target_model,
-                        self.eval_model).perform_attack(
-                        self.parameters))
-                case 'renellm':
-                    return t.trace(ReNeLLM(
-                        self.target_model,
-                        self.attack_model,
-                        self.eval_model).perform_attack(
-                        self.parameters))
                 case _:
                     raise ValueError(f'Attack {self.attack} is not known.')
 

@@ -6,8 +6,12 @@ I can help you with assessing security of **Large Language Models**.
 I can run a **vulnerability scan** against a Large Language Model of your choice.
 For that I will run a number of attacks against the Large Language Model:
 - promptmap to identify Prompt Injections
-- PyRIT to try to leak the System Prompt and to get the model to generate malicious content, e.g. writing a phishing email
-- Jailbreak tools such as "cipher" and "multilingual". These will attempt to get the model to generate malicious content.
+- PyRIT to try to leak the System Prompt and to get the model to generate
+  malicious content, e.g., writing a phishing email
+- CodeAttack to run a prompt Injection Attack hidden in a code completion task.
+  As CodeAttack includes hundreds of test prompts, a quick version (i.e., running
+  only 20 test prompts) will be run as part of the vulnerability scan.
+
 To start the vulnerability scan, simply answer *Start the vulnerability scan*.
 
 ### Individual attacks
@@ -19,13 +23,6 @@ Supported attacks are:
 - gptfuzz
 - PyRIT
 - CodeAttack
-- cipher
-- codechameleon
-- deepinception
-- ica
-- jailbroken
-- multilingual
-- renellm
 
 ### Attacks against Natural language processing models
 

@@ -38,14 +38,12 @@
       }
     },
     {
-      "attack": "cipher",
+      "attack": "codeattack",
       "target-model": "<target>",
-      "eval-model": "mistralai--mixtral-8x7b-instruct-v01"
-    },
-    {
-      "attack": "multilingual",
-      "target-model": "<target>",
-      "eval-model": "mistralai--mixtral-8x7b-instruct-v01"
+      "eval-model": "gpt-4",
+      "parameters": {
+        "num_prompts": 20
+      }
     }
   ]
 }
@@ -168,8 +168,8 @@ def start_codeattack(target_model: LLM,
             # incompatibility with the following < and > clauses
             try:
                 judge_score = int(prompt_attack_result['judge_score'])
-            except ValueError:
-                # Sometimes the LLM replies with a score="N/A"
+            except (ValueError, TypeError):
+                # Sometimes the LLM replies with a score="N/A" or =None
                 logger.warning('Judge replied with a non-integer score: '
                                f'{judge_score}')
                 judge_score = -1
@@ -229,7 +229,10 @@ def _prompt_attack(data, target_llm, post_processor, judge_llm, data_key=''):
     # As we call our LLM generate method instead of a direct completion
     # invocation (i.e., CodeAttack original implementation), we need to get the
     # textual response from a LLMResponse class with the unwrap method
-    target_response = target_response.unwrap_first()
+    # This may introduce a TypeError later (in post_processor.core call) in
+    # case the target_response is None, so we treat this case by assigning an
+    # empty string
+    target_response = target_response.unwrap_first() or ''
     logger.debug(target_response)
 
     logger.debug('*' * 20)

@@ -1,15 +1,16 @@
+from typing import Any, Dict, List
 import abc
 import logging
 import os
 
-from gen_ai_hub.proxy.core.proxy_clients import set_proxy_version
-
 from gen_ai_hub.proxy.core.proxy_clients import get_proxy_client
+from gen_ai_hub.proxy.core.proxy_clients import set_proxy_version
 from gen_ai_hub.proxy.native.openai import OpenAI as ProxyOpenAI
 from gen_ai_hub.proxy.native.google_vertexai.clients import GenerativeModel
-import httpx
+from gen_ai_hub.proxy.native.amazon.clients import Session
 from openai import OpenAI as OfficialOpenAI
 from openai import InternalServerError
+import httpx
 import ollama
 
 from llm_response import Error, Filtered, LLMResponse, Success
@@ -24,12 +25,12 @@
 AICORE_MODELS = {
     'openai':
     [
-        'gpt-4o',
-        'gpt-4',
         'gpt-35-turbo',
         'gpt-35-turbo-0125',
         'gpt-35-turbo-16k',
+        'gpt-4',
         'gpt-4-32k',
+        'gpt-4o',
         'gpt-4o-mini'
     ],
     'opensource':
@@ -45,6 +46,22 @@
         'gemini-1.0-pro',
         'gemini-1.5-pro',
         'gemini-1.5-flash'
+    ],
+    'ibm':
+    [
+        'ibm--granite-13b-chat'
+    ],
+    'bedrock':
+    [
+        'amazon--titan-text-lite',
+        'amazon--titan-text-express',
+        'anthropic--claude-3-haiku',
+        'anthropic--claude-3-sonnet',
+        'anthropic--claude-3-opus',
+        'anthropic--claude-3.5-sonnet',
+        'amazon--nova-pro',
+        'amazon--nova-lite',
+        'amazon--nova-micro'
     ]
 }
 
@@ -70,8 +87,17 @@ def from_model_name(cls, model_name: str) -> 'LLM':
             return AICoreOpenAILLM(model_name)
         if model_name in AICORE_MODELS['opensource']:
             return AICoreOpenAILLM(model_name, False)
+        if model_name in AICORE_MODELS['ibm']:
+            # IBM models are compatible with OpenAI completion API
+            return AICoreOpenAILLM(model_name)
         if model_name in AICORE_MODELS['vertexai']:
             return AICoreGoogleVertexLLM(model_name)
+        if model_name in AICORE_MODELS['bedrock']:
+            if 'titan' in model_name:
+                # Titan models don't support system prompts
+                return AICoreAmazonBedrockLLM(model_name, False)
+            else:
+                return AICoreAmazonBedrockLLM(model_name)
         if model_name == 'mistral':
             return LocalOpenAILLM(
                 os.getenv('MISTRAL_MODEL_NAME', ''),
@@ -442,3 +468,101 @@ def generate_completions_for_messages(
             return Success(responses)
         except ValueError as v:
             return Error(str(v))
+
+
+class AICoreAmazonBedrockLLM(LLM):
+
+    def __init__(self, model_name: str, uses_system_prompt: bool = True):
+        self.model_name = model_name
+        proxy_client = get_proxy_client('gen-ai-hub')
+        self.model = Session().client(
+            proxy_client=proxy_client,
+            model_name=self.model_name
+        )
+        self.uses_system_prompt = uses_system_prompt
+
+    def __str__(self) -> str:
+        return f'{self.model_name}/Amazon Bedrock'
+
+    def generate(self,
+                 system_prompt: str,
+                 prompt: str,
+                 temperature: float = 1,
+                 max_tokens: int = 1024,
+                 n: int = 1) -> LLMResponse:
+
+        # Declare types for messages and kwargs to avoid mypy errors
+        messages: List[Dict[str, Any]] = []
+        kwargs: Dict[str, Any] = {
+            'inferenceConfig': {
+                'temperature': temperature,
+                'maxTokens': max_tokens
+            }
+        }
+        if not system_prompt:
+            messages.append(
+                {'role': 'user', 'content': [{'text': prompt}]}
+            )
+        else:
+            if self.uses_system_prompt:
+                messages.append(
+                    {'role': 'user', 'content': [{'text': prompt}]}
+                )
+                kwargs['system'] = [{'text': system_prompt}]
+            else:
+                # Similarly to the Mistral model, also among Bedrock models
+                # there are some that do not support system prompt (e.g., titan
+                # models).
+                messages.append(
+                    {'role': 'user',
+                     'content': [{'text': f'{system_prompt}{prompt}'}]},
+                )
+        try:
+            responses = [self.model.converse(
+                messages=messages,
+                **kwargs  # arguments supported by converse API
+            )['output']['message']['content'][0]['text'] for _ in range(n)]
+            if not all(responses):
+                return Filtered(
+                    'One of the generations resulted in an empty response')
+            return Success(responses)
+        except ValueError as v:
+            return Error(str(v))
+
+    def generate_completions_for_messages(
+            self,
+            messages: list,
+            temperature: float = 1,
+            max_tokens: int = 1024,
+            top_p: int = 1,
+            frequency_penalty: float = 0.5,
+            presence_penalty: float = 0.5,
+            n: int = 1) -> LLMResponse:
+        contents = []
+        # TODO: manage system prompt
+        for message in messages:
+            contents.append(
+                {
+                    'role': 'user',
+                    'content': [{'text': message['content']}]
+                }
+            )
+        try:
+            responses = [self.model.converse(
+                messages=contents,
+                inferenceConfig={
+                    'temperature': temperature,
+                    'maxTokens': max_tokens,
+                    'topP': top_p
+                    # Frequency penalty and Presence penalty are not supported
+                    # by Amazon.
+                    # 'frequency_penalty': frequency_penalty,
+                    # 'presence_penalty': presence_penalty,
+                })['output']['message']['content'][0]['text']
+                for _ in range(n)]
+            if not all(responses):
+                return Filtered(
+                    'One of the generations resulted in an empty response')
+            return Success(responses)
+        except ValueError as v:
+            return Error(str(v))