diff --git a/examples/social_graph_demo/__init__.py b/examples/social_graph_demo/__init__.py
new file mode 100644
index 0000000..95e1449
--- /dev/null
+++ b/examples/social_graph_demo/__init__.py
@@ -0,0 +1,14 @@
+"""
+Social Graph Knowledge Propagation Demo
+
+A benchmark for testing knowledge propagation in multi-agent social networks.
+"""
+
+from .demo_knowledge_evaluator import DemoKnowledgeEvaluator
+from .knowledge_agents import KnowledgeAgent
+
+__version__ = '1.0.0'
+__all__ = [
+    'DemoKnowledgeEvaluator',
+    'KnowledgeAgent',
+]
diff --git a/examples/social_graph_demo/demo_knowledge_evaluator.py b/examples/social_graph_demo/demo_knowledge_evaluator.py
new file mode 100644
index 0000000..3b4c68a
--- /dev/null
+++ b/examples/social_graph_demo/demo_knowledge_evaluator.py
@@ -0,0 +1,460 @@
+"""
+LLM-based knowledge propagation evaluator using semantic understanding.
+"""
+
+import json
+import re
+from typing import Any
+
+from tiny_chat.evaluator import Evaluator
+from tiny_chat.messages import Message
+from tiny_chat.server.config import ModelProviderConfig
+from tiny_chat.server.providers import ModelProviderFactory
+
+
+class DemoKnowledgeEvaluator(Evaluator):
+    """
+    LLM-based evaluator that uses semantic understanding to assess
+    knowledge propagation in conversations.
+    """
+
+    def __init__(
+        self,
+        target_knowledge: str = 'GPT-5 will be released in May 2025',
+        knowledge_source: str = 'Sam Altman',
+        target_agents: list[str] | None = None,
+        model_name: str = 'gpt-4o-mini',
+    ):
+        """
+        Initialize the knowledge evaluator.
+
+        Args:
+            target_knowledge: The specific knowledge to track
+            knowledge_source: The original source agent
+            target_agents: List of agents that should receive the knowledge
+            model_name: LLM model to use for evaluation
+        """
+        self.target_knowledge = target_knowledge
+        self.knowledge_source = knowledge_source
+        self.target_agents = target_agents or []
+
+        model_config = ModelProviderConfig(
+            name=model_name, type='openai', temperature=0.1
+        )
+        self.model_provider = ModelProviderFactory.create_provider(model_config)
+
+        self.agents_with_knowledge = set()
+        self._evaluation_cache = {}
+
+    async def _analyze_message_for_knowledge(
+        self, agent_name: str, message_text: str
+    ) -> dict[str, Any]:
+        """Analyze a message for knowledge content using LLM."""
+        cache_key = f'{agent_name}:{hash(message_text)}'
+        if cache_key in self._evaluation_cache:
+            return self._evaluation_cache[cache_key]
+
+        if len(message_text.strip()) < 10 or 'did nothing' in message_text.lower():
+            result = {
+                'agent': agent_name,
+                'has_knowledge': False,
+                'completeness': 0.0,
+                'accuracy': 0.0,
+                'confidence': 0.0,
+                'reasoning': 'Message too short or no action taken',
+                'key_information': [],
+                'message_snippet': message_text[:50] + '...'
+                if len(message_text) > 50
+                else message_text,
+            }
+            self._evaluation_cache[cache_key] = result
+            return result
+
+        evaluation_prompt = f"""
+Analyze whether the following message demonstrates knowledge about this specific information:
+TARGET KNOWLEDGE: "{self.target_knowledge}"
+MESSAGE FROM {agent_name}:
+{message_text}
+
+Determine if this message shows that {agent_name} has learned or possesses the target knowledge.
+Consider these factors:
+1. Does the message explicitly mention the target knowledge?
+2. Does the message show understanding of the key concepts?
+3. Does the message indicate the agent has learned this information?
+4. Even if not explicitly stated, does the context suggest knowledge acquisition?
+
+Respond ONLY with a JSON object in this exact format:
+{{
+    "has_knowledge": true/false,
+    "completeness": 0-100 (percentage of target knowledge demonstrated),
+    "accuracy": 0-100 (accuracy of the information shared),
+    "confidence": 0-100 (confidence in this evaluation),
+    "reasoning": "detailed explanation of why this agent does/doesn't have the knowledge",
+    "key_information_found": ["list", "of", "key", "pieces", "found"]
+}}
+"""
+
+        try:
+            from litellm import acompletion
+
+            print(f'  Evaluating {agent_name} message with LLM...')
+            response = await acompletion(
+                model=self.model_provider._get_agenerate_model_name(),
+                messages=[{'role': 'user', 'content': evaluation_prompt}],
+                temperature=0.1,
+            )
+            print(f'  LLM response received for {agent_name}')
+
+            response_content = response.choices[0].message.content
+
+            json_match = re.search(r'\{.*\}', response_content, re.DOTALL)
+            if json_match:
+                result = json.loads(json_match.group())
+
+                def normalize_percentage(value):
+                    if value is None:
+                        return 0.0
+                    if value <= 1.0:
+                        return value * 100
+                    return value
+
+                llm_result = {
+                    'agent': agent_name,
+                    'has_knowledge': result.get('has_knowledge', False),
+                    'completeness': normalize_percentage(
+                        result.get('completeness', 0.0)
+                    ),
+                    'accuracy': normalize_percentage(result.get('accuracy', 0.0)),
+                    'confidence': normalize_percentage(result.get('confidence', 0.0)),
+                    'reasoning': result.get('reasoning', ''),
+                    'key_information': result.get('key_information_found', []),
+                    'message_snippet': message_text[:100] + '...'
+                    if len(message_text) > 100
+                    else message_text,
+                }
+                self._evaluation_cache[cache_key] = llm_result
+                return llm_result
+            else:
+                return self._fallback_analysis(agent_name, message_text)
+
+        except Exception as e:
+            print(f'LLM evaluation failed for {agent_name}: {e}')
+            return self._fallback_analysis(agent_name, message_text)
+
+    def _fallback_analysis(self, agent_name: str, message_text: str) -> dict[str, Any]:
+        """Fallback keyword-based analysis if LLM evaluation fails."""
+        knowledge_keywords = ['gpt-5', 'gpt5', 'may 2025', 'release']
+
+        message_lower = message_text.lower()
+        keywords_found = [kw for kw in knowledge_keywords if kw in message_lower]
+
+        has_knowledge = len(keywords_found) >= 2
+
+        return {
+            'agent': agent_name,
+            'has_knowledge': has_knowledge,
+            'completeness': len(keywords_found) * 25.0,
+            'accuracy': 80.0 if has_knowledge else 0.0,
+            'confidence': 60.0,
+            'reasoning': f'Fallback keyword analysis. Found keywords: {keywords_found}',
+            'key_information': keywords_found,
+            'message_snippet': message_text[:100] + '...'
+            if len(message_text) > 100
+            else message_text,
+        }
+
+    def __call__(
+        self, turn_number: int, messages: list[tuple[str, Message]]
+    ) -> list[tuple[str, tuple[tuple[str, int | float | bool], str]]]:
+        """Synchronous evaluation using fallback analysis."""
+        knowledge_analysis = []
+
+        for source, message in messages:
+            if source == 'Environment':
+                continue
+
+            message_text = message.to_natural_language()
+            analysis = self._fallback_analysis(source, message_text)
+
+            if analysis['has_knowledge']:
+                knowledge_analysis.append(analysis)
+                if source != self.knowledge_source:
+                    self.agents_with_knowledge.add(source)
+
+        total_target_agents = (
+            len(self.target_agents)
+            if self.target_agents
+            else len(
+                set(
+                    msg[0]
+                    for msg in messages
+                    if msg[0] != 'Environment' and msg[0] != self.knowledge_source
+                )
+            )
+        )
+        agents_reached = len(self.agents_with_knowledge)
+
+        if total_target_agents > 0:
+            propagation_rate = agents_reached / total_target_agents
+        else:
+            propagation_rate = 0.0
+
+        if knowledge_analysis:
+            accuracy_rate = sum(a['accuracy'] for a in knowledge_analysis) / len(
+                knowledge_analysis
+            )
+            if accuracy_rate <= 1.0:
+                accuracy_rate = accuracy_rate * 100
+        else:
+            accuracy_rate = 0.0
+
+        analysis_details = {
+            'total_knowledge_mentions': len(knowledge_analysis),
+            'agents_with_knowledge': list(self.agents_with_knowledge),
+            'propagation_rate': propagation_rate,
+            'accuracy_rate': accuracy_rate,
+            'knowledge_analysis': knowledge_analysis,
+        }
+
+        accuracy_for_score = (
+            accuracy_rate / 100 if accuracy_rate > 1.0 else accuracy_rate
+        )
+        overall_score = (propagation_rate * 0.6 + accuracy_for_score * 0.4) * 10
+
+        comments = self._generate_evaluation_comments(analysis_details)
+
+        return [('environment', (('knowledge_propagation', overall_score), comments))]
+
+    async def __acall__(
+        self, turn_number: int, messages: list[tuple[str, Message]]
+    ) -> list[tuple[str, tuple[tuple[str, int | float | bool], str]]]:
+        """Async version using LLM evaluation - analyzes both speaking and listening."""
+
+        knowledge_analysis = []
+
+        knowledge_messages = []
+
+        for source, message in messages:
+            if source == 'Environment':
+                continue
+
+            message_text = message.to_natural_language()
+            analysis = await self._analyze_message_for_knowledge(source, message_text)
+
+            if analysis['has_knowledge']:
+                knowledge_analysis.append(analysis)
+                knowledge_messages.append((source, message_text, analysis))
+                if source != self.knowledge_source:
+                    self.agents_with_knowledge.add(source)
+
+        await self._analyze_knowledge_reception(
+            messages, knowledge_messages, knowledge_analysis
+        )
+
+        total_target_agents = (
+            len(self.target_agents)
+            if self.target_agents
+            else len(
+                set(
+                    msg[0]
+                    for msg in messages
+                    if msg[0] != 'Environment' and msg[0] != self.knowledge_source
+                )
+            )
+        )
+        agents_reached = len(self.agents_with_knowledge)
+
+        if total_target_agents > 0:
+            propagation_rate = agents_reached / total_target_agents
+        else:
+            propagation_rate = 0.0
+
+        if knowledge_analysis:
+            accuracy_rate = sum(a['accuracy'] for a in knowledge_analysis) / len(
+                knowledge_analysis
+            )
+            if accuracy_rate <= 1.0:
+                accuracy_rate = accuracy_rate * 100
+        else:
+            accuracy_rate = 0.0
+
+        analysis_details = {
+            'total_knowledge_mentions': len(knowledge_analysis),
+            'agents_with_knowledge': list(self.agents_with_knowledge),
+            'propagation_rate': propagation_rate,
+            'accuracy_rate': accuracy_rate,
+            'knowledge_analysis': knowledge_analysis,
+        }
+
+        accuracy_for_score = (
+            accuracy_rate / 100 if accuracy_rate > 1.0 else accuracy_rate
+        )
+        overall_score = (propagation_rate * 0.6 + accuracy_for_score * 0.4) * 10
+
+        comments = self._generate_evaluation_comments(analysis_details)
+
+        return [('environment', (('knowledge_propagation', overall_score), comments))]
+
+    def _generate_evaluation_comments(self, details: dict[str, Any]) -> str:
+        """Generate human-readable evaluation comments."""
+        comments = []
+
+        comments.append('Knowledge Propagation Analysis:')
+        comments.append(f"- Knowledge mentions: {details['total_knowledge_mentions']}")
+        comments.append(f"- Agents reached: {len(details['agents_with_knowledge'])}")
+        comments.append(f"- Propagation rate: {details['propagation_rate']:.1%}")
+        comments.append(f"- Accuracy rate: {details['accuracy_rate']:.1f}%")
+
+        if details['agents_with_knowledge']:
+            comments.append(
+                f"- Agents with knowledge: {', '.join(details['agents_with_knowledge'])}"
+            )
+        else:
+            comments.append('- No knowledge propagation detected')
+
+        if details['knowledge_analysis']:
+            comments.append('\nDetailed LLM Analysis:')
+            for i, analysis in enumerate(details['knowledge_analysis'][:3], 1):
+                comments.append(f"{i}. {analysis['agent']}:")
+                comments.append(
+                    f"   Has Knowledge: {analysis.get('has_knowledge', 'N/A')}"
+                )
+                comments.append(
+                    f"   Completeness: {analysis.get('completeness', 0):.1f}%"
+                )
+                comments.append(f"   Accuracy: {analysis.get('accuracy', 0):.1f}%")
+                comments.append(f"   Confidence: {analysis.get('confidence', 0):.1f}%")
+                if analysis.get('reasoning'):
+                    comments.append(f"   Reasoning: {analysis['reasoning']}")
+                if analysis.get('key_information'):
+                    comments.append(f"   Key Info: {analysis['key_information']}")
+                if analysis.get('message_snippet'):
+                    comments.append(f"   Message: \"{analysis['message_snippet']}\"")
+
+        return '\n'.join(comments)
+
+    async def _analyze_knowledge_reception(
+        self, messages, knowledge_messages, knowledge_analysis
+    ):
+        """Analyze if agents received knowledge by listening to others."""
+
+        conversation_flow = []
+        for source, message in messages:
+            if source != 'Environment':
+                conversation_flow.append((source, message.to_natural_language()))
+
+        for knowledge_source, knowledge_text, knowledge_info in knowledge_messages:
+            potential_listeners = set(
+                msg[0]
+                for msg in messages
+                if msg[0] != 'Environment' and msg[0] != knowledge_source
+            )
+
+            for listener in potential_listeners:
+                if listener not in self.agents_with_knowledge:
+                    reception_analysis = (
+                        await self._analyze_knowledge_reception_for_agent(
+                            listener,
+                            knowledge_source,
+                            knowledge_text,
+                            conversation_flow,
+                        )
+                    )
+
+                    if reception_analysis.get('received_knowledge', False):
+                        knowledge_analysis.append(reception_analysis)
+                        self.agents_with_knowledge.add(listener)
+
+    async def _analyze_knowledge_reception_for_agent(
+        self, listener, knowledge_source, knowledge_text, conversation_flow
+    ):
+        """Analyze if a specific agent received knowledge by listening."""
+
+        knowledge_index = -1
+        for i, (speaker, text) in enumerate(conversation_flow):
+            if speaker == knowledge_source and knowledge_text[:50] in text:
+                knowledge_index = i
+                break
+
+        if knowledge_index == -1:
+            return {'received_knowledge': False}
+
+        listener_responses = []
+        for i in range(knowledge_index + 1, len(conversation_flow)):
+            speaker, text = conversation_flow[i]
+            if speaker == listener:
+                listener_responses.append(text)
+
+        if not listener_responses:
+            return {
+                'agent': listener,
+                'received_knowledge': True,
+                'completeness': 80.0,
+                'accuracy': 90.0,
+                'confidence': 70.0,
+                'reasoning': f'{listener} was present when {knowledge_source} shared knowledge',
+                'key_information': ['GPT-5', 'heard from ' + knowledge_source],
+                'message_snippet': f'(Heard from {knowledge_source})',
+            }
+
+        response_text = ' '.join(listener_responses)
+
+        reception_prompt = f"""
+Analyze if {listener} received and understood knowledge from {knowledge_source}.
+
+KNOWLEDGE SHARED BY {knowledge_source}:
+{knowledge_text}
+
+{listener}'S SUBSEQUENT RESPONSES:
+{response_text}
+
+Determine if {listener}'s responses show they received and understood the shared information.
+Look for:
+1. Acknowledgments like "thanks", "got it", "I'll share this"
+2. References to GPT-5 or release information
+3. Plans to pass information to others
+4. Any indication they heard and processed the information
+
+Respond ONLY with a JSON object:
+{{
+    "received_knowledge": true/false,
+    "completeness": 0-100,
+    "accuracy": 0-100,
+    "confidence": 0-100,
+    "reasoning": "explanation",
+    "key_information_found": ["list", "of", "indicators"]
+}}
+"""
+
+        try:
+            from litellm import acompletion
+
+            response = await acompletion(
+                model=self.model_provider._get_agenerate_model_name(),
+                messages=[{'role': 'user', 'content': reception_prompt}],
+                temperature=0.1,
+            )
+
+            response_content = response.choices[0].message.content
+            json_match = re.search(r'\{.*\}', response_content, re.DOTALL)
+
+            if json_match:
+                result = json.loads(json_match.group())
+                return {
+                    'agent': listener,
+                    'received_knowledge': result.get('received_knowledge', False),
+                    'completeness': result.get('completeness', 0.0),
+                    'accuracy': result.get('accuracy', 0.0),
+                    'confidence': result.get('confidence', 0.0),
+                    'reasoning': result.get('reasoning', ''),
+                    'key_information': result.get('key_information_found', []),
+                    'message_snippet': response_text[:100] + '...'
+                    if len(response_text) > 100
+                    else response_text,
+                }
+            else:
+                return {'received_knowledge': False}
+
+        except Exception as e:
+            print(f'Reception analysis failed for {listener}: {e}')
+            return {'received_knowledge': False}
diff --git a/examples/social_graph_demo/knowledge_agents.py b/examples/social_graph_demo/knowledge_agents.py
new file mode 100644
index 0000000..d6b2554
--- /dev/null
+++ b/examples/social_graph_demo/knowledge_agents.py
@@ -0,0 +1,128 @@
+"""
+Knowledge-aware agents that can store, share, and propagate information through social networks.
+"""
+
+from typing import Any
+
+from tiny_chat import AgentAction, BaseAgentProfile, LLMAgent, Observation
+
+
+class KnowledgeAgent(LLMAgent):
+    """An agent capable of storing and sharing knowledge through memory."""
+
+    def __init__(
+        self,
+        agent_name: str | None = None,
+        uuid_str: str | None = None,
+        agent_profile: BaseAgentProfile | dict[str, Any] | None = None,
+        profile_jsonl_path: str | None = None,
+        model_provider: Any = None,
+        script_like: bool = False,
+        initial_knowledge: list[str] | None = None,
+    ) -> None:
+        super().__init__(
+            agent_name=agent_name,
+            uuid_str=uuid_str,
+            agent_profile=agent_profile,
+            profile_jsonl_path=profile_jsonl_path,
+            model_provider=model_provider,
+            script_like=script_like,
+        )
+
+        self.memory: list[str] = initial_knowledge or []
+
+    def add_knowledge(self, knowledge: str) -> None:
+        """Add new knowledge to agent's memory."""
+        if knowledge and knowledge not in self.memory:
+            self.memory.append(knowledge)
+
+    def get_all_knowledge(self) -> list[str]:
+        """Get all knowledge stored in memory."""
+        return self.memory.copy()
+
+    def search_knowledge(self, query: str) -> list[str]:
+        """Search for relevant knowledge based on query."""
+        relevant_knowledge = []
+        query_lower = query.lower()
+
+        for knowledge in self.memory:
+            if any(keyword in knowledge.lower() for keyword in query_lower.split()):
+                relevant_knowledge.append(knowledge)
+
+        return relevant_knowledge
+
+    async def act(self, obs: Observation) -> AgentAction:
+        """Enhanced act method that considers knowledge sharing and learning."""
+        self.recv_message('Environment', obs)
+        await self._ensure_goal()
+
+        if self._only_none_action(obs.available_actions):
+            return AgentAction(action_type='none', argument='')
+
+        await self._learn_from_observation(obs)
+
+        enhanced_history = self._build_enhanced_history()
+
+        action = await self._model_provider.agenerate_action(
+            history=enhanced_history,
+            turn_number=obs.turn_number,
+            action_types=obs.available_actions,
+            agent=self.agent_name,
+            goal=self.goal,
+            script_like=self.script_like,
+        )
+
+        await self._learn_from_own_action(action)
+
+        return action
+
+    def _build_enhanced_history(self) -> str:
+        """Build conversation history enhanced with agent's knowledge."""
+        base_history = self._history_text(self.inbox)
+
+        if self.memory:
+            knowledge_context = 'My current knowledge: ' + '; '.join(self.memory)
+            return f'{knowledge_context}\n\n{base_history}'
+
+        return base_history
+
+    def _extract_knowledge_from_conversation(self, conversation: str) -> list[str]:
+        """Extract potential knowledge from conversation text."""
+        knowledge_indicators = [
+            'will be released',
+            'is scheduled for',
+            'announced that',
+            'confirmed that',
+            'stated that',
+        ]
+
+        extracted = []
+        sentences = conversation.split('.')
+
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if any(indicator in sentence.lower() for indicator in knowledge_indicators):
+                if len(sentence) > 20:
+                    extracted.append(sentence)
+
+        return extracted
+
+    async def _learn_from_observation(self, obs: Observation) -> None:
+        """Learn knowledge from observation (what other agents said)."""
+        obs_text = obs.to_natural_language()
+        new_knowledge = self._extract_knowledge_from_conversation(obs_text)
+
+        for knowledge in new_knowledge:
+            print(f'[{self.agent_name}] Learned: {knowledge}')
+            self.add_knowledge(knowledge)
+
+    async def _learn_from_own_action(self, action: AgentAction) -> None:
+        """Learn knowledge from own action (what I just said)."""
+        if action.action_type == 'speak':
+            action_text = action.to_natural_language()
+            new_knowledge = self._extract_knowledge_from_conversation(action_text)
+
+            for knowledge in new_knowledge:
+                if knowledge not in self.memory:
+                    print(f'[{self.agent_name}] Reinforced knowledge: {knowledge}')
+                    self.add_knowledge(knowledge)
diff --git a/examples/social_graph_demo/social_network_knowledge_demo.py b/examples/social_graph_demo/social_network_knowledge_demo.py
new file mode 100644
index 0000000..1890a9a
--- /dev/null
+++ b/examples/social_graph_demo/social_network_knowledge_demo.py
@@ -0,0 +1,262 @@
+"""
+Social Network Knowledge Propagation Benchmark
+
+This demo implements a knowledge propagation scenario where agents share information
+through a social network. The scenario features Sam Altman sharing GPT-5 release
+information with other agents through social connections.
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+sys.path.append(str(Path(__file__).parent))
+
+import glob
+import json
+
+from demo_knowledge_evaluator import DemoKnowledgeEvaluator
+
+from tiny_chat import SimpleMessage, TinyChatBackground
+from tiny_chat.server.core import create_server
+
+
+async def run_custom_evaluation_from_logs(evaluator: DemoKnowledgeEvaluator):
+    """Run custom evaluation on the most recent conversation log."""
+
+    log_pattern = 'conversation_logs/conversation_*.json'
+    log_files = glob.glob(log_pattern)
+
+    if not log_files:
+        print('No conversation logs found for evaluation')
+        return
+
+    latest_log = max(log_files, key=os.path.getctime)
+    print(f'Evaluating conversation log: {latest_log}')
+
+    try:
+        with open(latest_log, encoding='utf-8') as f:
+            conversation_data = json.load(f)
+
+        evaluator_messages = []
+
+        if 'conversation_history' in conversation_data:
+            for msg_data in conversation_data['conversation_history']:
+                try:
+                    if (
+                        isinstance(msg_data, dict)
+                        and 'agent' in msg_data
+                        and 'content' in msg_data
+                    ):
+                        source = msg_data['agent']
+                        message_content = msg_data['content']
+
+                        if message_content.startswith(
+                            'said: "'
+                        ) and message_content.endswith('"'):
+                            message_content = message_content[7:-1]
+                        elif message_content == 'did nothing':
+                            continue
+
+                        message = SimpleMessage(message=message_content)
+                        evaluator_messages.append((source, message))
+
+                except Exception as e:
+                    print(f'Error processing message: {e}')
+                    continue
+        else:
+            print('No conversation_history found in log file')
+            return
+
+        if not evaluator_messages:
+            print('No evaluable messages found in conversation log')
+            return
+
+        print(f'Found {len(evaluator_messages)} evaluable messages:')
+        for source, msg in evaluator_messages:
+            content = msg.to_natural_language()
+            preview = content[:60] + ('...' if len(content) > 60 else '')
+            print(f'  {source}: {preview}')
+
+        print(f'\nCalling evaluator with {len(evaluator_messages)} messages...')
+        evaluation_results = await evaluator.__acall__(
+            turn_number=len(evaluator_messages), messages=evaluator_messages
+        )
+        print(
+            f'Evaluator completed. Results: {len(evaluation_results) if evaluation_results else 0} items'
+        )
+
+        print('\n' + '=' * 50)
+        print('CUSTOM KNOWLEDGE EVALUATION')
+        print('=' * 50)
+
+        for metric_name, metric_data in evaluation_results:
+            if isinstance(metric_data, tuple) and len(metric_data) == 2:
+                score_info, comments = metric_data
+                if isinstance(score_info, tuple) and len(score_info) == 2:
+                    actual_metric_name, actual_score = score_info
+                    print(f'\n{actual_metric_name.upper()}: {actual_score:.2f}/10')
+                else:
+                    print(f'\n{metric_name.upper()}: {score_info:.2f}/10')
+                print('Comments:')
+                print(comments)
+            else:
+                print(f'\n{metric_name.upper()}: {metric_data}')
+
+    except Exception as e:
+        print(f'Error during evaluation: {e}')
+        import traceback
+
+        traceback.print_exc()
+
+
+async def main():
+    """Main demo function."""
+
+    api_key = os.getenv('OPENAI_API_KEY')
+    if not api_key:
+        print('Warning: OPENAI_API_KEY not set. Demo may not work properly.')
+        return
+
+    agent_configs = [
+        {
+            'name': 'Sam Altman',
+            'type': 'llm',
+            'goal': 'I am the CEO of OpenAI with insider knowledge. I know GPT-5 will be released in May 2025. I should share this information when Alice asks about upcoming AI developments.',
+            'model_provider': 'gpt-4o-mini',
+            'speaking_id': 0,
+        },
+        {
+            'name': 'Alice Chen',
+            'type': 'llm',
+            'goal': 'I am an AI researcher. I MUST ask Sam Altman about upcoming AI releases first, then immediately share any information I learn with Bob. I will not leave until I complete both tasks.',
+            'model_provider': 'gpt-4o-mini',
+            'speaking_id': 1,
+        },
+        {
+            'name': 'Bob Wilson',
+            'type': 'llm',
+            'goal': 'I am a tech journalist. I MUST wait for Alice to share GPT-5 information with me, then share this information with the Reporter. I will not leave until I complete both tasks.',
+            'model_provider': 'gpt-4o-mini',
+            'speaking_id': 2,
+        },
+        {
+            'name': 'Reporter',
+            'type': 'llm',
+            'goal': 'I need GPT-5 release information for my story. I MUST wait for Bob to share information with me and ask follow-up questions to get complete details. I will not leave until I get the information.',
+            'model_provider': 'gpt-4o-mini',
+            'speaking_id': 3,
+        },
+    ]
+
+    background = TinyChatBackground(
+        scenario='A professional AI industry discussion where knowledge about GPT-5 release spreads through the network',
+        agent_configs=[
+            {
+                'name': 'Sam Altman',
+                'background': 'Sam Altman is the CEO of OpenAI with exclusive insider knowledge that GPT-5 will be released in May 2025. He is willing to share this information with AI researchers.',
+                'goal': 'Share GPT-5 release information when asked by Alice about upcoming AI developments',
+            },
+            {
+                'name': 'Alice Chen',
+                'background': 'Alice is an AI researcher who MUST complete the knowledge chain. She will ask Sam about GPT-5, then immediately inform Bob. She stays until both tasks are done.',
+                'goal': 'Ask Sam about GPT-5 releases, then actively share the information with Bob to continue the knowledge chain',
+            },
+            {
+                'name': 'Bob Wilson',
+                'background': 'Bob is a tech journalist who MUST wait for Alice to share GPT-5 information, then pass it to Reporter. He stays until both tasks are completed.',
+                'goal': 'Receive GPT-5 information from Alice, then actively share it with Reporter to complete the knowledge propagation',
+            },
+            {
+                'name': 'Reporter',
+                'background': 'Reporter is investigating GPT-5 for breaking news. Must stay and actively seek information from Bob until getting complete GPT-5 release details.',
+                'goal': 'Actively ask Bob for GPT-5 information and stay until receiving complete release timeline details',
+            },
+        ],
+    )
+
+    obs_control = {
+        'mode': 'local',
+        'neighbor_map': {
+            'Sam Altman': ['Alice Chen'],
+            'Alice Chen': ['Sam Altman', 'Bob Wilson'],
+            'Bob Wilson': ['Alice Chen', 'Reporter'],
+            'Reporter': ['Bob Wilson'],
+        },
+    }
+
+    print('Social Network Knowledge Propagation Demo')
+    print('Scenario: GPT-5 Release Information Spreading')
+    print('-' * 50)
+    print('Starting knowledge propagation scenario...')
+    print('Social network topology:')
+    print('  Sam Altman -> Alice Chen')
+    print('  Alice Chen -> Sam Altman, Bob Wilson')
+    print('  Bob Wilson -> Alice Chen, Reporter')
+    print('  Reporter -> Bob Wilson')
+    print()
+
+    print('Expected knowledge propagation chain:')
+    print(
+        "1. Sam Altman (Source): Has exclusive knowledge 'GPT-5 will be released in May 2025'"
+    )
+    print('2. Alice Chen asks Sam about upcoming AI releases')
+    print('3. Sam shares GPT-5 release information with Alice')
+    print('4. Bob Wilson asks Alice about GPT-5 release dates')
+    print('5. Alice shares the May 2025 information with Bob')
+    print('6. Reporter asks Bob about GPT-5 timeline')
+    print('7. Bob shares the release information with Reporter')
+    print('8. Knowledge successfully propagates: Sam → Alice → Bob → Reporter')
+    print()
+
+    try:
+        demo_evaluator = DemoKnowledgeEvaluator(
+            target_knowledge='GPT-5 will be released in May 2025',
+            knowledge_source='Sam Altman',
+            target_agents=['Alice Chen', 'Bob Wilson', 'Reporter'],
+        )
+
+        print('Running conversation with private observation control...')
+
+        async with create_server() as server:
+            episode_log = await server.run_conversation(
+                agent_configs=agent_configs,
+                background=background,
+                max_turns=12,
+                enable_evaluation=True,
+                return_log=True,
+                action_order='sequential',
+                obs_control=obs_control,
+            )
+
+            print('\nRunning custom knowledge evaluation...')
+            await run_custom_evaluation_from_logs(demo_evaluator)
+
+        print('\n' + '=' * 50)
+        print('KNOWLEDGE PROPAGATION RESULTS')
+        print('=' * 50)
+        episode_length = getattr(
+            episode_log, 'episode_length', len(episode_log.rewards)
+        )
+        print(f'Total conversation turns: {episode_length}')
+
+        print('\nKnowledge propagation analysis:')
+        print('Knowledge indicators found: See evaluation results above')
+        print('Propagation success: See evaluation results above')
+
+        print(
+            f'\nEvaluation Scores: {[(score, data) for score, data in episode_log.rewards]}'
+        )
+
+        print('\nConversation log saved and can be reviewed for detailed analysis.')
+
+    except Exception as e:
+        print(f'Error running demo: {e}')
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == '__main__':
+    asyncio.run(main())