arkhn · honghanhh · Sep 15, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/lib/conflicts/llm_baselines/Makefile b/lib/conflicts/llm_baselines/Makefile
@@ -0,0 +1,35 @@
+.PHONY: help install clean run-zero run-one run-few run-all
+
+help: ## Show this help message
+	@echo "Available commands:"
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
+
+install: ## Install the package and dependencies
+	pip install -e .
+
+run-zero: ## Run zero-shot baseline
+	python run_baseline.py --config-name=zero_shot
+
+run-one: ## Run one-shot baseline
+	python run_baseline.py --config-name=one_shot
+
+run-few: ## Run few-shot baseline
+	python run_baseline.py --config-name=few_shot
+
+run-qwen: ## Run with Qwen model
+	python run_baseline.py --config-name=qwen_models
+
+run-all: run-zero run-one run-few ## Run all baselines
+
+clean: ## Clean output files
+	rm -rf outputs/
+	rm -rf wandb/
+	rm -rf __pycache__/
+	rm -rf .pytest_cache/
+	find . -name "*.pyc" -delete
+	find . -name "__pycache__" -type d -exec rm -rf {} +
+
+setup-env: ## Setup environment variables (requires manual API key)
+	@echo "Please set your Groq API key:"
+	@echo "export GROQ_API_KEY='your_api_key_here'"
+	@echo "Then run: make install"
diff --git a/lib/conflicts/llm_baselines/README.md b/lib/conflicts/llm_baselines/README.md
@@ -0,0 +1,94 @@
+# LLM Baselines for Clinical Text Conflict Classification
+
+This package provides zero-shot, one-shot, and few-shot prompting baselines for clinical text
+conflict classification using various LLM models via Groq API.
+
+## Installation
+
+1. Install dependencies:
+
+```bash
+cd lib/conflicts/llm_baselines
+pip install -e .
+```
+
+2. Set up your Groq API key:
+
+```bash
+export GROQ_API_KEY="your_groq_api_key_here"
+```
+
+## Usage
+
+### Basic Usage
+
+Run a zero-shot baseline:
+
+```bash
+python run_baseline.py --config-name=zero_shot
+```
+
+Run a one-shot baseline:
+
+```bash
+python run_baseline.py --config-name=one_shot
+```
+
+Run a few-shot baseline:
+
+```bash
+python run_baseline.py --config-name=few_shot
+```
+
+### Advanced Usage
+
+Run with a specific model:
+
+```bash
+python run_baseline.py --config-name=qwen_models model.name=qwen/qwen2.5-72b-instruct
+```
+
+Run with custom parameters:
+
+```bash
+python run_baseline.py \
+    --config-name=few_shot \
+    experiment.num_examples=10 \
+    experiment.batch_size=5 \
+    model.name=qwen/qwen2.5-14b-instruct
+```
+
+### Configuration
+
+The package uses Hydra for configuration management. Key configuration options:
+
+- `experiment.shot_type`: "zero", "one", or "few"
+- `experiment.num_examples`: Number of examples for few-shot prompting
+- `experiment.batch_size`: Batch size for processing
+- `model.name`: Model name from Groq API
+- `data.data_path`: Path to the conflict dataset
+
+## Project Structure
+
+This package uses a simplified single-file design:
+
+```
+llm_baselines/
+├── llm_baseline.py    # All functionality in one consolidated module
+├── __init__.py        # Package exports
+└── configs/           # Configuration files
+```
+
+All classes and functions are available from the main module:
+
+- `LLMBaselineRunner` - Main experiment runner
+- `GroqLLMClient` - Groq API client
+- `LLMEvaluator` - Evaluation metrics
+- `ZeroShotPrompt`, `OneShotPrompt`, `FewShotPrompt` - Prompt templates
+- `get_prompt_template()` - Factory function for prompts
+
+## Requirements
+
+- Python 3.11+
+- Groq API key
+- See `pyproject.toml` for full dependency list
diff --git a/lib/conflicts/llm_baselines/configs/__init__.py b/lib/conflicts/llm_baselines/configs/__init__.py
@@ -0,0 +1 @@
+# Configs module for llm_baselines
diff --git a/lib/conflicts/llm_baselines/configs/default.yaml b/lib/conflicts/llm_baselines/configs/default.yaml
@@ -0,0 +1,35 @@
+defaults:
+  - _self_
+  - override /hydra/sweeper: basic
+  - override /hydra/launcher: basic
+
+# Data configuration
+data:
+  test_ratio: 0.2
+  val_ratio: 0.1
+  max_length: 512
+  data_path: "processed/_12092025.json"
+
+# Model configuration
+model:
+  name: "qwen/qwen3-32b"
+
+# API configuration
+api:
+  api_key: null # Will use GROQ_API_KEY environment variable
+
+# Experiment configuration
+experiment:
+  shot_type: "zero" # Options: "zero", "one", "few"
+  num_examples: 0 # Number of examples for prompting (0 for zero-shot, 1 for one-shot, ignored for few-shot)
+  batch_size: 10 # Batch size for processing
+
+# Logging configuration
+logging:
+  output_dir: "outputs"
+  entity: "clinical-dream-team"
+  project: "llm-baselines"
+  tags: ["llm", "conflict-classification", "clinical-text"]
+
+# Global settings
+global_seed: 42
diff --git a/lib/conflicts/llm_baselines/configs/few_shot.yaml b/lib/conflicts/llm_baselines/configs/few_shot.yaml
@@ -0,0 +1,10 @@
+defaults:
+  - default
+
+experiment:
+  shot_type: "few"
+  num_examples: 6
+  batch_size: 10
+
+logging:
+  tags: ["llm", "conflict-classification", "clinical-text", "few-shot"]
diff --git a/lib/conflicts/llm_baselines/configs/one_shot.yaml b/lib/conflicts/llm_baselines/configs/one_shot.yaml
@@ -0,0 +1,10 @@
+defaults:
+  - default
+
+experiment:
+  shot_type: "one"
+  num_examples: 1
+  batch_size: 15
+
+logging:
+  tags: ["llm", "conflict-classification", "clinical-text", "one-shot"]
diff --git a/lib/conflicts/llm_baselines/configs/zero_shot.yaml b/lib/conflicts/llm_baselines/configs/zero_shot.yaml
@@ -0,0 +1,10 @@
+defaults:
+  - default
+
+experiment:
+  shot_type: "zero"
+  num_examples: 0
+  batch_size: 20
+
+logging:
+  tags: ["llm", "conflict-classification", "clinical-text", "zero-shot"]
diff --git a/lib/conflicts/llm_baselines/llm_baselines/__init__.py b/lib/conflicts/llm_baselines/llm_baselines/__init__.py
@@ -0,0 +1,28 @@
+from .llm_baseline import (
+    AVAILABLE_MODELS,
+    CONFLICT_LABELS,
+    DEFAULT_MODEL,
+    FewShotPrompt,
+    GroqLLMClient,
+    LLMBaselineRunner,
+    LLMEvaluator,
+    OneShotPrompt,
+    ZeroShotPrompt,
+    get_prompt_template,
+    main,
+)
+
+__version__ = "0.1.0"
+__all__ = [
+    "LLMBaselineRunner",
+    "GroqLLMClient",
+    "LLMEvaluator",
+    "ZeroShotPrompt",
+    "OneShotPrompt",
+    "FewShotPrompt",
+    "get_prompt_template",
+    "main",
+    "CONFLICT_LABELS",
+    "AVAILABLE_MODELS",
+    "DEFAULT_MODEL",
+]