Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions lib/conflicts/llm_baselines/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
.PHONY: help install clean run-zero run-one run-few run-all

help: ## Show this help message
@echo "Available commands:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'

install: ## Install the package and dependencies
pip install -e .

run-zero: ## Run zero-shot baseline
python run_baseline.py --config-name=zero_shot

run-one: ## Run one-shot baseline
python run_baseline.py --config-name=one_shot

run-few: ## Run few-shot baseline
python run_baseline.py --config-name=few_shot

run-qwen: ## Run with Qwen model
python run_baseline.py --config-name=qwen_models

run-all: run-zero run-one run-few ## Run all baselines

clean: ## Clean output files
rm -rf outputs/
rm -rf wandb/
rm -rf __pycache__/
rm -rf .pytest_cache/
find . -name "*.pyc" -delete
find . -name "__pycache__" -type d -exec rm -rf {} +

setup-env: ## Setup environment variables (requires manual API key)
@echo "Please set your Groq API key:"
@echo "export GROQ_API_KEY='your_api_key_here'"
@echo "Then run: make install"
94 changes: 94 additions & 0 deletions lib/conflicts/llm_baselines/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# LLM Baselines for Clinical Text Conflict Classification

This package provides zero-shot, one-shot, and few-shot prompting baselines for clinical text
conflict classification using various LLM models via Groq API.

## Installation

1. Install dependencies:

```bash
cd lib/conflicts/llm_baselines
pip install -e .
```

2. Set up your Groq API key:

```bash
export GROQ_API_KEY="your_groq_api_key_here"
```

## Usage

### Basic Usage

Run a zero-shot baseline:

```bash
python run_baseline.py --config-name=zero_shot
```

Run a one-shot baseline:

```bash
python run_baseline.py --config-name=one_shot
```

Run a few-shot baseline:

```bash
python run_baseline.py --config-name=few_shot
```

### Advanced Usage

Run with a specific model:

```bash
python run_baseline.py --config-name=qwen_models model.name=qwen/qwen2.5-72b-instruct
```

Run with custom parameters:

```bash
python run_baseline.py \
--config-name=few_shot \
experiment.num_examples=10 \
experiment.batch_size=5 \
model.name=qwen/qwen2.5-14b-instruct
```

### Configuration

The package uses Hydra for configuration management. Key configuration options:

- `experiment.shot_type`: "zero", "one", or "few"
- `experiment.num_examples`: Number of examples for few-shot prompting
- `experiment.batch_size`: Batch size for processing
- `model.name`: Model name from Groq API
- `data.data_path`: Path to the conflict dataset

## Project Structure

This package uses a simplified single-file design:

```
llm_baselines/
├── llm_baseline.py # All functionality in one consolidated module
├── __init__.py # Package exports
└── configs/ # Configuration files
```

All classes and functions are available from the main module:

- `LLMBaselineRunner` - Main experiment runner
- `GroqLLMClient` - Groq API client
- `LLMEvaluator` - Evaluation metrics
- `ZeroShotPrompt`, `OneShotPrompt`, `FewShotPrompt` - Prompt templates
- `get_prompt_template()` - Factory function for prompts

## Requirements

- Python 3.11+
- Groq API key
- See `pyproject.toml` for full dependency list
1 change: 1 addition & 0 deletions lib/conflicts/llm_baselines/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Configs module for llm_baselines
35 changes: 35 additions & 0 deletions lib/conflicts/llm_baselines/configs/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
defaults:
- _self_
- override /hydra/sweeper: basic
- override /hydra/launcher: basic

# Data configuration
data:
test_ratio: 0.2
val_ratio: 0.1
max_length: 512
data_path: "processed/_12092025.json"

# Model configuration
model:
name: "qwen/qwen3-32b"

# API configuration
api:
api_key: null # Will use GROQ_API_KEY environment variable

# Experiment configuration
experiment:
shot_type: "zero" # Options: "zero", "one", "few"
num_examples: 0 # Number of examples for prompting (0 for zero-shot, 1 for one-shot, ignored for few-shot)
batch_size: 10 # Batch size for processing

# Logging configuration
logging:
output_dir: "outputs"
entity: "clinical-dream-team"
project: "llm-baselines"
tags: ["llm", "conflict-classification", "clinical-text"]

# Global settings
global_seed: 42
10 changes: 10 additions & 0 deletions lib/conflicts/llm_baselines/configs/few_shot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
- default

experiment:
shot_type: "few"
num_examples: 6
batch_size: 10

logging:
tags: ["llm", "conflict-classification", "clinical-text", "few-shot"]
10 changes: 10 additions & 0 deletions lib/conflicts/llm_baselines/configs/one_shot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
- default

experiment:
shot_type: "one"
num_examples: 1
batch_size: 15

logging:
tags: ["llm", "conflict-classification", "clinical-text", "one-shot"]
10 changes: 10 additions & 0 deletions lib/conflicts/llm_baselines/configs/zero_shot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
- default

experiment:
shot_type: "zero"
num_examples: 0
batch_size: 20

logging:
tags: ["llm", "conflict-classification", "clinical-text", "zero-shot"]
28 changes: 28 additions & 0 deletions lib/conflicts/llm_baselines/llm_baselines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from .llm_baseline import (
AVAILABLE_MODELS,
CONFLICT_LABELS,
DEFAULT_MODEL,
FewShotPrompt,
GroqLLMClient,
LLMBaselineRunner,
LLMEvaluator,
OneShotPrompt,
ZeroShotPrompt,
get_prompt_template,
main,
)

__version__ = "0.1.0"
__all__ = [
"LLMBaselineRunner",
"GroqLLMClient",
"LLMEvaluator",
"ZeroShotPrompt",
"OneShotPrompt",
"FewShotPrompt",
"get_prompt_template",
"main",
"CONFLICT_LABELS",
"AVAILABLE_MODELS",
"DEFAULT_MODEL",
]
Loading