ciesko · bscheurm · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024 · Feb 1, 2024
diff --git a/analysis/chat_history/.env.example b/analysis/chat_history/.env.example
@@ -0,0 +1,14 @@
+COSMOSDB_ENDPOINT=
+COSMOSDB_KEY=
+COSMOSDB_DATABASE_NAME=
+COSMOSDB_CONTAINER_NAME=
+
+AZURE_OPENAI_TEMPERATURE=
+AZURE_OPENAI_TOP_P=
+AZURE_OPENAI_MAX_TOKENS=
+AZURE_OPENAI_STOP_SEQUENCE=
+AZURE_OPENAI_RESOURCE=
+AZURE_OPENAI_MODEL=
+AZURE_OPENAI_MODEL_NAME=
+AZURE_OPENAI_ENDPOINT=
+AZURE_OPENAI_KEY=
diff --git a/analysis/chat_history/eval_prompt.py b/analysis/chat_history/eval_prompt.py
@@ -0,0 +1,61 @@
+from enum import Enum
+
+class EvaluationCateogry(Enum):
+    ACCURACY = "Accuracy"
+    RELEVANCE = "Relevance"
+    COHERENCE = "Coherence"
+    FLUENCY = "Fluency"
+    DEPTH = "Depth"
+    INSIGHTFULNESS = "Insightfulness"
+    OBJECTIVITY = "Objectivity"
+    CONTEXTUAL_APPROPRIATENESS = "ContextualAppropriateness"
+    SENTIMENT = "Sentiment"
+
+evaluations = {
+    EvaluationCateogry.ACCURACY: "Verify that the information provided about the topic is correct.",
+	EvaluationCateogry.RELEVANCE: "Check that the response focuses on the topic and its implications in the given context.",
+	EvaluationCateogry.COHERENCE: "Assess if the response is logically structured and easy to follow.",
+	EvaluationCateogry.FLUENCY: "Evaluate the grammatical and syntactical quality of the text.",
+	EvaluationCateogry.DEPTH: "Ensure the response covers the key aspects of the topic, providing a balanced depth of information.",
+	EvaluationCateogry.INSIGHTFULNESS: "Look for unique insights or perspectives in the response.",
+	EvaluationCateogry.OBJECTIVITY: "Check for a neutral and unbiased tone in the response.",
+	EvaluationCateogry.CONTEXTUAL_APPROPRIATENESS: "Ensure the response is appropriate for the given context, including awareness of any recent developments or specific nuances.",
+    EvaluationCateogry.SENTIMENT: "Assess the overall sentiment of the user's question. Possible values include POSITIVE, NEGATIVE, or NEUTRAL."
+}
+
+system_template = """
+Evaluate the quality of the AI response to the user question based on the following categories:
+
+{categories}
+
+Please provide an overall summary of the quality of the response in 2 to 3 sentences. Use the provided context to inform your analysis. Also provide a score between 0 and 3 for each category, where 0 is the lowest score and 3 is the highest score. Category definitions may provide a different rating scheme which if provided should be honored. If you are unsure about a category, you can leave it blank. 
+
+Format your response in JSON format that can be parsed using Python's `json` library. Respond only with the JSON object, without any additional text or comments or Markdown code block delimiters.
+
+Example response format:
+
+{{
+    "evaluation": "<overall-quality-evaluation>",
+    "scores": {{
+        "<category1-name>": "<score>",
+        "<category2-name>": "<score>",
+        ...etc.
+    }}
+}}
+"""
+
+def generate_evaluation_system_prompt(categories: list[EvaluationCateogry]) -> str:
+    """
+    Generate a prompt to evaluate the quality of an AI-generated response based on the specified categories.
+
+    Parameters:
+    - categories (list[EvaluationCateogry]): The categories to evaluate the response on.
+    """
+
+    if not categories or len(categories) == 0:
+        raise ValueError("At least one EvaluationCategory must be specified.")
+
+    formatted = [f"**{category.value}**:\n{evaluations[category]}" for category in categories]
+    prompt = system_template.format(categories="\n\n".join(formatted))
+
+    return prompt
diff --git a/analysis/chat_history/history.py b/analysis/chat_history/history.py
@@ -0,0 +1,71 @@
+import os
+from dotenv import load_dotenv
+from azure.cosmos import CosmosClient
+import pandas as pd
+
+def get_container_client():
+    """Get the Cosmos DB container client."""
+
+    # Read the Cosmos DB settings from environment variables
+    endpoint = os.environ.get("COSMOSDB_ENDPOINT")
+    key = os.environ.get("COSMOSDB_KEY")
+    database_name = os.environ.get("COSMOSDB_DATABASE_NAME")
+    container_name = os.environ.get("COSMOSDB_CONTAINER_NAME")
+
+    # Initialize the Cosmos DB client
+    client = CosmosClient(endpoint, key)
+    database = client.get_database_client(database_name)
+    container = database.get_container_client(container_name)
+
+    return container
+
+def get_conversations(start_date = None, end_date = None):
+    """Get the chat history from Cosmos DB."""
+
+    container = get_container_client()
+
+    query_template = """
+    SELECT c.id, c.timestamp, c.response_timestamp, c.user_input as user_query, c.conversation_id, c.tool as context, c.answer as chat_response
+    FROM c 
+    {where_clause}
+    ORDER BY c.timestamp DESC
+    """
+
+    if start_date:
+        start_date = start_date.strftime("%Y-%m-%d %H:%M:%S")
+    if end_date:
+        end_date = end_date.strftime("%Y-%m-%d %H:%M:%S")
+
+    where_clause = ""
+    if start_date and end_date:
+        where_clause = f"WHERE c.timestamp BETWEEN '{start_date}' AND '{end_date}'"
+    elif start_date:
+        where_clause = f"WHERE c.timestamp >= '{start_date}'"
+    elif end_date:
+        where_clause = f"WHERE c.timestamp <= '{end_date}'"
+
+    query = query_template.format(where_clause=where_clause)
+
+    items = container.query_items(query, enable_cross_partition_query=True)
+    return items
+
+def extend_dataframe(df):
+    # "Promote" the content form the user_query and chat_response columns to the top level of the dataframe
+    df['user_input'] = df['user_query'].apply(lambda x: x['content'] if pd.notnull(x) and 'content' in x else None)
+    df['answer'] = df['chat_response'].apply(
+        lambda x: x['choices'][0]['messages'][0]['content'] 
+        if pd.notnull(x) 
+        and 'choices' in x 
+        and len(x['choices']) > 0 
+        and 'messages' in x['choices'][0] 
+        and len(x['choices'][0]['messages']) > 0 
+        and 'content' in x['choices'][0]['messages'][0] 
+        else None)
+
+    # Calculate the response time
+    df['response_timestamp'] = pd.to_datetime(df['response_timestamp'], errors='coerce')
+    df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
+    df['duration'] = (df['response_timestamp'] - df['timestamp']).dt.total_seconds()
+
+    # Calculate number of turns for each 'conversation_id'
+    df['turn_count'] = df['conversation_id'].apply(lambda x: len(df[df['conversation_id'] == x]))