Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions analysis/chat_history/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
COSMOSDB_ENDPOINT=
COSMOSDB_KEY=
COSMOSDB_DATABASE_NAME=
COSMOSDB_CONTAINER_NAME=

AZURE_OPENAI_TEMPERATURE=
AZURE_OPENAI_TOP_P=
AZURE_OPENAI_MAX_TOKENS=
AZURE_OPENAI_STOP_SEQUENCE=
AZURE_OPENAI_RESOURCE=
AZURE_OPENAI_MODEL=
AZURE_OPENAI_MODEL_NAME=
AZURE_OPENAI_ENDPOINT=
AZURE_OPENAI_KEY=
61 changes: 61 additions & 0 deletions analysis/chat_history/eval_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from enum import Enum

class EvaluationCateogry(Enum):
ACCURACY = "Accuracy"
RELEVANCE = "Relevance"
COHERENCE = "Coherence"
FLUENCY = "Fluency"
DEPTH = "Depth"
INSIGHTFULNESS = "Insightfulness"
OBJECTIVITY = "Objectivity"
CONTEXTUAL_APPROPRIATENESS = "ContextualAppropriateness"
SENTIMENT = "Sentiment"

evaluations = {
EvaluationCateogry.ACCURACY: "Verify that the information provided about the topic is correct.",
EvaluationCateogry.RELEVANCE: "Check that the response focuses on the topic and its implications in the given context.",
EvaluationCateogry.COHERENCE: "Assess if the response is logically structured and easy to follow.",
EvaluationCateogry.FLUENCY: "Evaluate the grammatical and syntactical quality of the text.",
EvaluationCateogry.DEPTH: "Ensure the response covers the key aspects of the topic, providing a balanced depth of information.",
EvaluationCateogry.INSIGHTFULNESS: "Look for unique insights or perspectives in the response.",
EvaluationCateogry.OBJECTIVITY: "Check for a neutral and unbiased tone in the response.",
EvaluationCateogry.CONTEXTUAL_APPROPRIATENESS: "Ensure the response is appropriate for the given context, including awareness of any recent developments or specific nuances.",
EvaluationCateogry.SENTIMENT: "Assess the overall sentiment of the user's question. Possible values include POSITIVE, NEGATIVE, or NEUTRAL."
}

system_template = """
Evaluate the quality of the AI response to the user question based on the following categories:

{categories}

Please provide an overall summary of the quality of the response in 2 to 3 sentences. Use the provided context to inform your analysis. Also provide a score between 0 and 3 for each category, where 0 is the lowest score and 3 is the highest score. Category definitions may provide a different rating scheme which if provided should be honored. If you are unsure about a category, you can leave it blank.

Format your response in JSON format that can be parsed using Python's `json` library. Respond only with the JSON object, without any additional text or comments or Markdown code block delimiters.

Example response format:

{{
"evaluation": "<overall-quality-evaluation>",
"scores": {{
"<category1-name>": "<score>",
"<category2-name>": "<score>",
...etc.
}}
}}
"""

def generate_evaluation_system_prompt(categories: list[EvaluationCateogry]) -> str:
"""
Generate a prompt to evaluate the quality of an AI-generated response based on the specified categories.

Parameters:
- categories (list[EvaluationCateogry]): The categories to evaluate the response on.
"""

if not categories or len(categories) == 0:
raise ValueError("At least one EvaluationCategory must be specified.")

formatted = [f"**{category.value}**:\n{evaluations[category]}" for category in categories]
prompt = system_template.format(categories="\n\n".join(formatted))

return prompt
71 changes: 71 additions & 0 deletions analysis/chat_history/history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
from dotenv import load_dotenv
from azure.cosmos import CosmosClient
import pandas as pd

def get_container_client():
"""Get the Cosmos DB container client."""

# Read the Cosmos DB settings from environment variables
endpoint = os.environ.get("COSMOSDB_ENDPOINT")
key = os.environ.get("COSMOSDB_KEY")
database_name = os.environ.get("COSMOSDB_DATABASE_NAME")
container_name = os.environ.get("COSMOSDB_CONTAINER_NAME")

# Initialize the Cosmos DB client
client = CosmosClient(endpoint, key)
database = client.get_database_client(database_name)
container = database.get_container_client(container_name)

return container

def get_conversations(start_date = None, end_date = None):
"""Get the chat history from Cosmos DB."""

container = get_container_client()

query_template = """
SELECT c.id, c.timestamp, c.response_timestamp, c.user_input as user_query, c.conversation_id, c.tool as context, c.answer as chat_response
FROM c
{where_clause}
ORDER BY c.timestamp DESC
"""

if start_date:
start_date = start_date.strftime("%Y-%m-%d %H:%M:%S")
if end_date:
end_date = end_date.strftime("%Y-%m-%d %H:%M:%S")

where_clause = ""
if start_date and end_date:
where_clause = f"WHERE c.timestamp BETWEEN '{start_date}' AND '{end_date}'"
elif start_date:
where_clause = f"WHERE c.timestamp >= '{start_date}'"
elif end_date:
where_clause = f"WHERE c.timestamp <= '{end_date}'"

query = query_template.format(where_clause=where_clause)

items = container.query_items(query, enable_cross_partition_query=True)
return items

def extend_dataframe(df):
# "Promote" the content form the user_query and chat_response columns to the top level of the dataframe
df['user_input'] = df['user_query'].apply(lambda x: x['content'] if pd.notnull(x) and 'content' in x else None)
df['answer'] = df['chat_response'].apply(
lambda x: x['choices'][0]['messages'][0]['content']
if pd.notnull(x)
and 'choices' in x
and len(x['choices']) > 0
and 'messages' in x['choices'][0]
and len(x['choices'][0]['messages']) > 0
and 'content' in x['choices'][0]['messages'][0]
else None)

# Calculate the response time
df['response_timestamp'] = pd.to_datetime(df['response_timestamp'], errors='coerce')
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df['duration'] = (df['response_timestamp'] - df['timestamp']).dt.total_seconds()

# Calculate number of turns for each 'conversation_id'
df['turn_count'] = df['conversation_id'].apply(lambda x: len(df[df['conversation_id'] == x]))
Loading