diff --git a/README.md b/README.md
index 5464c6c8..1ef8d521 100644
--- a/README.md
+++ b/README.md
@@ -265,7 +265,8 @@ pre-commit install
 ```
 
 You can then run all tests by running `pytest`, or only the CI/CD tests by
-running `CICD=1 pytest`. 
+running `CICD=1 pytest`. See [test/MARKERS_GUIDE.md](test/MARKERS_GUIDE.md) for
+details on running specific test categories (e.g., by backend, resource requirements).
 
 Tip: you can bypass the hooks by passing the `-n` flag to `git commit`.
 This is sometimes helpful for intermediate commits that you intend to later
diff --git a/docs/tutorial.md b/docs/tutorial.md
index 5270ab2c..4bca5c72 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -2,27 +2,54 @@
 
 ## Table of Contents
 
-- [Chapter 1: What Is Generative Programming?](#chapter-1-what-is-generative-programming)
-- [Chapter 2: Getting Started with Generative Programming in Mellea](#chapter-2-getting-started-with-generative-programming-in-mellea)
-  - [Requirements](#requirements)
-  - [Validating Requirements](#validating-requirements)
-  - [Instruct - Validate - Repair](#instruct---validate---repair)
-  - [Model Options](#modeloptions)
-- [Chapter 3: Overview of the Standard Library](#chapter-3-overview-of-the-standard-library)
-- [Chapter 4: Generative Slots](#chapter-4-generative-slots)
-- [Chapter 5: MObjects](#chapter-5-mobjects)
-  - [Case Study: Working with Documents](#case-study-working-with-documents)
-- [Chapter 6: Tuning for Requirements and Components](#chapter-6-tuning-requirements-and-components)
-- [Chapter 7: Context Management](#chapter-7-on-context-management)
-- [Chapter 8: Implementing Agents](#chapter-8-implementing-agents)
-  - [Case Study: ReACT](#case-study-implementing-react-in-mellea)
-  - [The Guarded Nondeterminism Pattern](#guarded-nondeterminism)
-- [Chapter 9: Interoperability with Other Frameworks](#chapter-9-interoperability-with-other-frameworks)
-- [Chapter 10: Prompt Engineering for Mellea](#chapter-10-prompt-engineering-for-m)
-  - [Custom  Templates](#custom-templates)
-- [Chapter 11: Tool Calling](#chapter-11-tool-calling)
-- [Chapter 12: Asynchronicity](#chapter-12-asynchronicity)
-- [Appendix: Contributing to Mellea](#appendix-contributing-to-mellea)
+- [Principles of Generative Programming: The Mellea Approach](#principles-of-generative-programming-the-mellea-approach)
+  - [Table of Contents](#table-of-contents)
+  - [Chapter 1: What Is Generative Programming](#chapter-1-what-is-generative-programming)
+  - [Chapter 2: Getting Started with Generative Programming in Mellea](#chapter-2-getting-started-with-generative-programming-in-mellea)
+    - [Requirements](#requirements)
+    - [Validating Requirements](#validating-requirements)
+    - [Instruct - Validate - Repair](#instruct---validate---repair)
+    - [ModelOptions](#modeloptions)
+      - [System Messages](#system-messages)
+    - [Conclusion](#conclusion)
+  - [Chapter 3: Overview of the Standard Library](#chapter-3-overview-of-the-standard-library)
+  - [Chapter 4: Generative Slots](#chapter-4-generative-slots)
+      - [Example: Sentiment Classifier](#example-sentiment-classifier)
+      - [Using Generative slots to Provide Compositionality Across Module Boundaries](#using-generative-slots-to-provide-compositionality-across-module-boundaries)
+  - [Chapter 5: MObjects](#chapter-5-mobjects)
+    - [Example: A Table as an MObject](#example-a-table-as-an-mobject)
+    - [Case Study: Working with Documents](#case-study-working-with-documents)
+    - [MObject methods are tools](#mobject-methods-are-tools)
+  - [Chapter 6: Tuning Requirements and Components](#chapter-6-tuning-requirements-and-components)
+    - [Problem Statement](#problem-statement)
+    - [Training the aLoRA Adapter](#training-the-alora-adapter)
+      - [Parameters](#parameters)
+    - [Upload to Hugging Face (Optional)](#upload-to-hugging-face-optional)
+    - [Integrating the Tuned Model into Mellea](#integrating-the-tuned-model-into-mellea)
+  - [Chapter 7: On Context Management](#chapter-7-on-context-management)
+  - [Chapter 8: Implementing Agents](#chapter-8-implementing-agents)
+    - [Case Study: Implementing ReACT in Mellea](#case-study-implementing-react-in-mellea)
+    - [Guarded Nondeterminism](#guarded-nondeterminism)
+  - [Chapter 9: Interoperability with Other Frameworks](#chapter-9-interoperability-with-other-frameworks)
+    - [Simple mcp server running Mellea](#simple-mcp-server-running-mellea)
+    - [Running Mellea programs as an openai compatible server (Experimental)](#running-mellea-programs-as-an-openai-compatible-server-experimental)
+      - [Example `m serve` application](#example-m-serve-application)
+  - [Chapter 10: Prompt Engineering for M](#chapter-10-prompt-engineering-for-m)
+    - [Templates](#templates)
+    - [Template Representations](#template-representations)
+    - [Customization](#customization)
+      - [Choosing a Template](#choosing-a-template)
+      - [Editing an Existing Class](#editing-an-existing-class)
+  - [Chapter 11: Tool Calling](#chapter-11-tool-calling)
+  - [Chapter 12: Asynchronicity](#chapter-12-asynchronicity)
+    - [Asynchronous Functions:](#asynchronous-functions)
+    - [Asynchronicity in Synchronous Functions](#asynchronicity-in-synchronous-functions)
+  - [Appendix: Contributing to Mellea](#appendix-contributing-to-mellea)
+    - [Contributor Guide: Getting Started](#contributor-guide-getting-started)
+    - [Contributor Guide: Requirements and Verifiers](#contributor-guide-requirements-and-verifiers)
+    - [Contributor Guide: Components](#contributor-guide-components)
+    - [Contributor Guide: Specialized Mify](#contributor-guide-specialized-mify)
+    - [Contributor Guide: Sessions](#contributor-guide-sessions)
 
 ## Chapter 1: What Is Generative Programming
 
@@ -1408,7 +1435,8 @@ pre-commit install
 ```
 
 You can then run all tests by running `pytest`, or only the CI/CD tests by
-running `CICD=1 pytest`. 
+running `CICD=1 pytest`. See [test/MARKERS_GUIDE.md](../test/MARKERS_GUIDE.md) for
+details on running specific test categories (e.g., by backend, resource requirements).
 
 Tip: you can bypass the hooks by passing the `-n` flag to `git commit`.
 This is sometimes helpful for intermediate commits that you intend to later
diff --git a/pyproject.toml b/pyproject.toml
index 2431f6b0..061edec6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ hf = [
     "peft>=0.18.0", # aLoRA support was added in Peft 0.18.0
     "transformers>=4.53.2",
     "trl==0.19.1",
+    "xgrammar",  # Required for constrained decoding in granite_common
 ]
 
 vllm = [
@@ -112,6 +113,7 @@ dev = [
     "pdm>=2.24.0",
     "pytest",
     "pytest-asyncio",
+    "psutil",  # For test infrastructure: RAM detection in conftest.py
     "mypy>=1.17.0",
     "python-semantic-release~=7.32",
     "nbmake>=1.5.5",
@@ -198,7 +200,22 @@ python_version = "3.10"
 
 [tool.pytest.ini_options]
 markers = [
-    "qualitative: Marks the test as needing an exact output from an LLM; set by an ENV variable for CICD. All tests marked with this will xfail in CI/CD"
+    # Backend markers
+    "ollama: Tests requiring Ollama backend (local, light)",
+    "openai: Tests requiring OpenAI API (requires API key)",
+    "watsonx: Tests requiring Watsonx API (requires API key)",
+    "huggingface: Tests requiring HuggingFace backend (local, heavy)",
+    "vllm: Tests requiring vLLM backend (local, GPU required)",
+    "litellm: Tests requiring LiteLLM backend",
+    
+    # Capability markers
+    "requires_api_key: Tests requiring external API keys",
+    "requires_gpu: Tests requiring GPU",
+    "requires_heavy_ram: Tests requiring 48GB+ RAM",
+    "qualitative: Non-deterministic quality tests",
+    
+    # Composite markers
+    "llm: Tests that make LLM calls (needs at least Ollama)",
 ]
 asyncio_mode = "auto"  # Don't require explicitly marking async tests.
 
diff --git a/test/MARKERS_GUIDE.md b/test/MARKERS_GUIDE.md
new file mode 100644
index 00000000..56029b6f
--- /dev/null
+++ b/test/MARKERS_GUIDE.md
@@ -0,0 +1,433 @@
+# Pytest Markers Guide for Mellea Tests
+
+## Overview
+
+This guide explains the pytest marker system for categorizing and running mellea tests based on backend requirements, resource availability, and test characteristics.
+
+## 🎯 What's Automatic vs Manual
+
+### ✅ Automatic (No Configuration Needed)
+When you run `pytest`, the system **automatically detects** and skips tests based on:
+- **Ollama availability** - Checks if port 11434 is listening
+- **API keys** - Checks environment variables (`OPENAI_API_KEY`, `WATSONX_APIKEY`, etc.)
+- **GPU availability** - Checks for CUDA (NVIDIA) or MPS (Apple Silicon) via torch
+- **System RAM** - Checks via `psutil.virtual_memory()` (if psutil installed)
+
+**You don't need to configure anything!** Just run `pytest` and tests will automatically skip with helpful messages if requirements aren't met.
+
+**Note:**
+- GPU detection requires `torch` (included in `mellea[hf]` and `mellea[vllm]`)
+- RAM detection requires `psutil` (included in dev dependencies)
+- If you're not using dev dependencies, install with: `pip install psutil`
+
+### ⚠️ Manual (Developer Adds to Test Files)
+Developers must **add markers** to test files to indicate what each test needs:
+```python
+# Developer adds these markers once per test file
+pytestmark = [pytest.mark.ollama, pytest.mark.llm]
+```
+
+**Summary:** Markers are manual (one-time setup per test file), detection is automatic (every test run).
+
+### 🔧 Override Auto-Detection (Advanced)
+Want to try running tests even when requirements aren't met? Use these pytest options:
+
+```bash
+# Try GPU tests without GPU (will use CPU, may be slow/fail)
+pytest --ignore-gpu-check test/backends/test_vllm.py
+
+# Try with less RAM than recommended
+pytest --ignore-ram-check test/backends/test_huggingface.py
+
+# Try without Ollama running
+pytest --ignore-ollama-check test/backends/test_ollama.py
+
+# Try without API keys (will fail at API call)
+pytest --ignore-api-key-check test/backends/test_openai.py
+
+# Combine multiple overrides
+pytest --ignore-gpu-check --ignore-ram-check -m "huggingface"
+```
+
+**Use Cases:**
+- Testing with CPU when GPU tests might work (slower but functional)
+- Trying with less RAM (might work for smaller models)
+- Debugging test infrastructure
+
+**Warning:** Tests will likely fail if requirements aren't actually met!
+
+## Quick Start
+
+```bash
+# Run all tests (auto-skips based on your system)
+pytest
+
+# Run only fast unit tests (no LLM calls)
+pytest -m "not llm"
+
+# Run Ollama tests only (local, light resources)
+pytest -m "ollama"
+
+# Run tests that don't require API keys
+pytest -m "not requires_api_key"
+
+# Run infrastructure tests only (skip quality tests)
+pytest -m "not qualitative"
+
+# Run quality tests for Ollama
+pytest -m "ollama and qualitative"
+```
+
+## Marker Categories
+
+### Backend Markers
+
+Specify which backend the test uses:
+
+- **`@pytest.mark.ollama`**: Tests requiring Ollama backend
+  - Local execution
+  - Light resources (CPU, ~2-4GB RAM)
+  - No API key required
+  - Example: `test/backends/test_ollama.py`
+
+- **`@pytest.mark.openai`**: Tests requiring OpenAI API
+  - Requires `OPENAI_API_KEY` environment variable
+  - Light resources (API calls only)
+  - Incurs API costs
+  - Example: `test/backends/test_vision_openai.py`
+
+- **`@pytest.mark.watsonx`**: Tests requiring Watsonx API
+  - Requires `WATSONX_APIKEY` or `IBM_CLOUD_API_KEY` environment variable
+  - Light resources (API calls only)
+  - Incurs API costs
+  - Example: `test/backends/test_watsonx.py`
+
+- **`@pytest.mark.huggingface`**: Tests requiring HuggingFace backend
+  - Local execution
+  - Heavy resources (GPU recommended, 16-32GB RAM, ~8GB VRAM)
+  - Downloads models (~3-8GB)
+  - No API key required (unless using gated models)
+  - Example: `test/backends/test_huggingface.py`
+
+- **`@pytest.mark.vllm`**: Tests requiring vLLM backend
+  - Local execution
+  - Heavy resources (GPU required, 16-32GB RAM, 8GB+ VRAM)
+  - Requires `VLLM_USE_V1=0` environment variable
+  - Example: `test/backends/test_vllm.py`
+
+- **`@pytest.mark.litellm`**: Tests requiring LiteLLM backend
+  - Requirements depend on underlying backend
+  - Example: `test/backends/test_litellm_ollama.py`
+
+### Capability Markers
+
+Specify resource or authentication requirements:
+
+- **`@pytest.mark.requires_api_key`**: Tests requiring external API keys
+  - Auto-skipped if required API key not found
+  - Use with backend markers (openai, watsonx)
+
+- **`@pytest.mark.requires_gpu`**: Tests requiring GPU
+  - Auto-skipped if no GPU detected
+  - Typically used with huggingface, vllm
+
+- **`@pytest.mark.requires_heavy_ram`**: Tests requiring 48GB+ RAM
+  - Auto-skipped if insufficient RAM detected
+  - Typically used with huggingface, vllm
+
+- **`@pytest.mark.qualitative`**: Non-deterministic quality tests
+  - Tests LLM output quality rather than infrastructure
+  - Skipped in CI (when `CICD=1`)
+  - May be flaky due to model variability
+
+### Composite Markers
+
+- **`@pytest.mark.llm`**: Tests that make LLM calls
+  - Requires at least Ollama to be available
+  - Use to distinguish from pure unit tests
+
+## Auto-Detection and Skipping
+
+The test suite automatically detects your system capabilities and skips tests that cannot run:
+
+### API Key Detection
+```python
+# Automatically checks for:
+OPENAI_API_KEY          # For OpenAI tests
+WATSONX_APIKEY          # For Watsonx tests
+IBM_CLOUD_API_KEY       # Alternative for Watsonx
+HF_TOKEN                # For gated HuggingFace models
+```
+
+### Backend Availability Detection
+```python
+# Automatically detects:
+- Ollama availability (checks if port 11434 is listening)
+```
+
+### Resource Detection
+```python
+# Automatically detects:
+- GPU availability (via torch.cuda.is_available())
+- GPU memory (via torch.cuda.get_device_properties())
+- System RAM (via psutil.virtual_memory())
+```
+
+### Skip Messages
+When a test is skipped, you'll see helpful messages:
+```
+SKIPPED [1] test/conftest.py:120: Skipping test: OPENAI_API_KEY not found in environment
+SKIPPED [1] test/conftest.py:125: Skipping test: GPU not available
+SKIPPED [1] test/conftest.py:130: Skipping test: Insufficient RAM (16.0GB < 32GB)
+SKIPPED [1] test/conftest.py:165: Skipping test: Ollama not available (port 11434 not listening)
+```
+
+## Usage Examples
+
+### Module-Level Markers
+
+Apply markers to all tests in a module using `pytestmark`:
+
+```python
+# test/backends/test_ollama.py
+import pytest
+
+# All tests in this module require Ollama and make LLM calls
+pytestmark = [pytest.mark.ollama, pytest.mark.llm]
+
+def test_simple_instruct(session):
+    # This test inherits ollama and llm markers
+    ...
+```
+
+### Multiple Markers
+
+Combine markers for complex requirements:
+
+```python
+# test/backends/test_huggingface.py
+pytestmark = [
+    pytest.mark.huggingface,
+    pytest.mark.llm,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+]
+```
+
+### Individual Test Markers
+
+Add markers to specific tests:
+
+```python
+@pytest.mark.qualitative
+def test_output_quality(session):
+    # This test checks LLM output quality
+    result = session.instruct("Write a poem")
+    assert "poem" in result.value.lower()
+```
+
+## Running Tests by Category
+
+### By Backend
+```bash
+# Ollama only
+pytest -m "ollama"
+
+# HuggingFace only
+pytest -m "huggingface"
+
+# All API-based backends
+pytest -m "openai or watsonx"
+```
+
+### By Resource Requirements
+```bash
+# Light tests only (no GPU, no heavy RAM)
+pytest -m "not (requires_gpu or requires_heavy_ram)"
+
+# Tests that work without API keys
+pytest -m "not requires_api_key"
+
+# GPU tests only
+pytest -m "requires_gpu"
+```
+
+### By Test Type
+```bash
+# Infrastructure tests only (deterministic)
+pytest -m "not qualitative"
+
+# Quality tests only (non-deterministic)
+pytest -m "qualitative"
+
+# Fast unit tests (no LLM calls)
+pytest -m "not llm"
+```
+
+### Complex Combinations
+```bash
+# Ollama infrastructure tests
+pytest -m "ollama and not qualitative"
+
+# All tests that work with just Ollama (no API keys, no GPU)
+pytest -m "not (requires_api_key or requires_gpu or requires_heavy_ram)"
+
+# Quality tests for local backends only
+pytest -m "qualitative and (ollama or huggingface or vllm)"
+```
+
+## CI/CD Integration
+
+### Current Behavior
+- `CICD=1` environment variable skips all qualitative tests
+- Module-level skips for heavy backends (huggingface, vllm, watsonx)
+
+### Recommended CI Matrix
+```yaml
+# .github/workflows/test.yml
+jobs:
+  unit-tests:
+    # Fast unit tests, no LLM
+    run: pytest -m "not llm"
+  
+  ollama-tests:
+    # Ollama infrastructure tests
+    run: pytest -m "ollama and not qualitative"
+  
+  quality-tests:
+    # Optional: Run quality tests on schedule
+    if: github.event_name == 'schedule'
+    run: pytest -m "qualitative and ollama"
+```
+
+## Adding Markers to New Tests
+
+### Step 1: Identify Requirements
+Ask yourself:
+1. Which backend does this test use?
+2. Does it require an API key?
+3. Does it need a GPU?
+4. Does it need heavy RAM (48GB+)?
+5. Is it testing output quality (qualitative) or infrastructure?
+
+### Step 2: Add Appropriate Markers
+
+For a new Ollama test:
+```python
+# Use module-level marker if all tests use same backend
+pytestmark = [pytest.mark.ollama, pytest.mark.llm]
+
+@pytest.mark.qualitative  # Add if testing output quality
+def test_my_new_feature(session):
+    ...
+```
+
+For a new HuggingFace test:
+```python
+pytestmark = [
+    pytest.mark.huggingface,
+    pytest.mark.llm,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+]
+
+@pytest.mark.qualitative
+def test_my_new_feature(session):
+    ...
+```
+
+### Step 3: Test Your Markers
+```bash
+# Verify your test is properly marked
+pytest --collect-only -m "your_marker"
+
+# Run just your test
+pytest -k "test_my_new_feature"
+```
+
+## Troubleshooting
+
+### Test Not Running
+```bash
+# Check which markers are applied
+pytest --collect-only test/path/to/test.py
+
+# Check why test is being skipped
+pytest -v test/path/to/test.py
+
+# Force run despite auto-skip (will likely fail if requirements not met)
+pytest test/path/to/test.py --runxfail
+```
+
+### Marker Not Recognized
+```bash
+# List all registered markers
+pytest --markers
+
+# Check pytest.ini configuration
+cat pytest.ini
+```
+
+### Auto-Skip Not Working
+```bash
+# Debug system capabilities
+pytest --setup-show test/path/to/test.py
+
+# Check conftest.py detection logic
+# See test/conftest.py:get_system_capabilities()
+
+# Run with verbose output to see skip reasons
+pytest -v -s test/path/to/test.py
+```
+
+### Force Run Tests (Override Auto-Skip)
+```bash
+# Run specific test ignoring auto-skip (useful for debugging)
+pytest test/backends/test_ollama.py --runxfail
+
+# Run with specific marker, will fail if requirements not met
+pytest -m "ollama" -v
+
+# Note: Tests will fail if actual requirements (Ollama, GPU, etc.) aren't met
+# This is useful for testing the test infrastructure itself
+```
+
+## Best Practices
+
+1. **Use module-level markers** for consistent backend requirements
+2. **Combine markers** to accurately describe test requirements
+3. **Keep qualitative marker** for non-deterministic tests
+4. **Test locally** before pushing to ensure markers work correctly
+5. **Document special requirements** in test docstrings
+
+## Migration from Old System
+
+### Before (Old System)
+```python
+# Only qualitative marker
+@pytest.mark.qualitative
+def test_ollama_instruct(session):
+    ...
+```
+
+### After (New System)
+```python
+# Module-level backend markers
+pytestmark = [pytest.mark.ollama, pytest.mark.llm]
+
+# Keep qualitative for quality tests
+@pytest.mark.qualitative
+def test_ollama_instruct(session):
+    ...
+```
+
+## Related Files
+
+- `test/conftest.py`: Auto-detection and skip logic
+- `pyproject.toml`: Marker definitions and pytest configuration
+
+## Questions?
+
+For questions or issues with the marker system:
+1. Check this guide first
+2. Open an issue on GitHub with the `testing` label
\ No newline at end of file
diff --git a/test/backends/test_huggingface.py b/test/backends/test_huggingface.py
index 925a07e3..6ae8da8a 100644
--- a/test/backends/test_huggingface.py
+++ b/test/backends/test_huggingface.py
@@ -1,29 +1,35 @@
 import asyncio
-from copy import copy
 import faulthandler
 import os
 import random
 import time
-from typing import Any, Coroutine
+from collections.abc import Coroutine
+from copy import copy
+from typing import Annotated, Any
 from unittest.mock import Mock
 
 import pydantic
 import pytest
 import torch
-from typing_extensions import Annotated
 
-# Skip entire module in CI since 17/18 tests are qualitative
-pytestmark = pytest.mark.skipif(
-    int(os.environ.get("CICD", 0)) == 1,
-    reason="Skipping HuggingFace tests in CI - mostly qualitative tests",
-)
+# Mark all tests in this module with backend and resource requirements
+pytestmark = [
+    pytest.mark.huggingface,
+    pytest.mark.llm,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    # Skip entire module in CI since 17/18 tests are qualitative
+    pytest.mark.skipif(
+        int(os.environ.get("CICD", 0)) == 1,
+        reason="Skipping HuggingFace tests in CI - mostly qualitative tests",
+    ),
+]
 
 from mellea import MelleaSession
+from mellea.backends import ModelOption
 from mellea.backends.adapters import GraniteCommonAdapter
 from mellea.backends.cache import SimpleLRUCache
-from mellea.formatters import TemplateFormatter
 from mellea.backends.huggingface import LocalHFBackend, _assert_correct_adapters
-from mellea.backends import ModelOption
 from mellea.core import (
     CBlock,
     Context,
@@ -31,10 +37,9 @@
     ValidationResult,
     default_output_to_bool,
 )
+from mellea.formatters import TemplateFormatter
+from mellea.stdlib.components import Intrinsic, Message
 from mellea.stdlib.context import ChatContext, SimpleContext
-
-from mellea.stdlib.components import Message
-from mellea.stdlib.components import Intrinsic
 from mellea.stdlib.requirements import ALoraRequirement, LLMaJRequirement
 
 
diff --git a/test/backends/test_huggingface_tools.py b/test/backends/test_huggingface_tools.py
index a908803f..9bbd097b 100644
--- a/test/backends/test_huggingface_tools.py
+++ b/test/backends/test_huggingface_tools.py
@@ -3,16 +3,22 @@
 import pytest
 
 # Skip entire module in CI since the single test is qualitative
-pytestmark = pytest.mark.skipif(
-    int(os.environ.get("CICD", 0)) == 1,
-    reason="Skipping HuggingFace tools tests in CI - qualitative test",
-)
+pytestmark = [
+    pytest.mark.huggingface,
+    pytest.mark.llm,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    pytest.mark.skipif(
+        int(os.environ.get("CICD", 0)) == 1,
+        reason="Skipping HuggingFace tools tests in CI - qualitative test",
+    ),
+]
 
 import mellea.backends.model_ids as model_ids
 from mellea import MelleaSession
+from mellea.backends import ModelOption
 from mellea.backends.cache import SimpleLRUCache
 from mellea.backends.huggingface import LocalHFBackend
-from mellea.backends import ModelOption
 from mellea.stdlib.context import ChatContext
 
 
diff --git a/test/backends/test_litellm_ollama.py b/test/backends/test_litellm_ollama.py
index bb2d3316..2fb70e19 100644
--- a/test/backends/test_litellm_ollama.py
+++ b/test/backends/test_litellm_ollama.py
@@ -1,16 +1,18 @@
 import asyncio
 import os
+
 import pytest
 
+# Mark all tests in this module as requiring Ollama via LiteLLM
+pytestmark = [pytest.mark.litellm, pytest.mark.ollama, pytest.mark.llm]
+
 from mellea import MelleaSession, generative
-from mellea.backends import ModelOption
+from mellea.backends import ModelOption, model_ids
 from mellea.backends.litellm import LiteLLMBackend
 from mellea.core import CBlock
-from mellea.stdlib.context import SimpleContext
 from mellea.stdlib.components import Message
+from mellea.stdlib.context import SimpleContext
 from mellea.stdlib.sampling import RejectionSamplingStrategy
-from mellea.backends import model_ids
-
 
 _MODEL_ID = f"ollama_chat/{model_ids.IBM_GRANITE_4_MICRO_3B.ollama_name}"
 
diff --git a/test/backends/test_litellm_watsonx.py b/test/backends/test_litellm_watsonx.py
index 6116c428..014eeb2b 100644
--- a/test/backends/test_litellm_watsonx.py
+++ b/test/backends/test_litellm_watsonx.py
@@ -1,6 +1,15 @@
 import asyncio
+
 import pytest
 
+# Mark all tests in this module as requiring Watsonx via LiteLLM
+pytestmark = [
+    pytest.mark.litellm,
+    pytest.mark.watsonx,
+    pytest.mark.llm,
+    pytest.mark.requires_api_key,
+]
+
 from mellea import MelleaSession
 from mellea.backends.litellm import LiteLLMBackend
 from mellea.core import CBlock
@@ -16,7 +25,6 @@ def session():
 
 def test_has_potential_event_loop_errors(session):
     """This test is specific to litellm backends that use watsonx/. It can be removed once that bug is fixed."""
-
     backend: LiteLLMBackend = session.backend
     potential_err = backend._has_potential_event_loop_errors()
     assert not potential_err, "first invocation in an event loop shouldn't flag errors"
diff --git a/test/backends/test_ollama.py b/test/backends/test_ollama.py
index 20636e8b..c679d95c 100644
--- a/test/backends/test_ollama.py
+++ b/test/backends/test_ollama.py
@@ -1,17 +1,20 @@
 import asyncio
 import json
+from typing import Annotated
 
 import pydantic
 import pytest
-from typing_extensions import Annotated
 
 from mellea import start_session
-from mellea.backends.ollama import OllamaModelBackend
 from mellea.backends import ModelOption
+from mellea.backends.ollama import OllamaModelBackend
 from mellea.core import CBlock, Requirement
 from mellea.stdlib.context import SimpleContext
 from mellea.stdlib.requirements import simple_validate
 
+# Mark all tests in this module as requiring Ollama
+pytestmark = [pytest.mark.ollama, pytest.mark.llm]
+
 
 @pytest.fixture(scope="function")
 def session():
@@ -40,7 +43,7 @@ def test_instruct_with_requirement(session):
     )
 
     email_word_count_req = Requirement(
-        f"The email should be at most 100",
+        "The email should be at most 100",
         validation_fn=simple_validate(lambda x: len(" ".split(x)) <= 100),
     )
 
@@ -96,7 +99,6 @@ class Email(pydantic.BaseModel):
     # this is not guaranteed, due to the lack of regexp pattern
     # assert "@" in email.to.email_address
     # assert email.to.email_address.endswith("example.com")
-    pass
 
 
 @pytest.mark.qualitative
diff --git a/test/backends/test_openai_ollama.py b/test/backends/test_openai_ollama.py
index 71ee0635..86f35174 100644
--- a/test/backends/test_openai_ollama.py
+++ b/test/backends/test_openai_ollama.py
@@ -7,6 +7,9 @@
 import pydantic
 import pytest
 
+# Mark all tests in this module as requiring Ollama via OpenAI-compatible API
+pytestmark = [pytest.mark.openai, pytest.mark.ollama, pytest.mark.llm]
+
 from mellea import MelleaSession
 from mellea.backends import ModelOption
 from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
@@ -103,7 +106,6 @@ class Email(pydantic.BaseModel):
     # this is not guaranteed, due to the lack of regexp pattern
     # assert "@" in email.to.email_address
     # assert email.to.email_address.endswith("example.com")
-    pass
 
 
 @pytest.mark.qualitative
diff --git a/test/backends/test_vision_ollama.py b/test/backends/test_vision_ollama.py
index 740043d9..90549120 100644
--- a/test/backends/test_vision_ollama.py
+++ b/test/backends/test_vision_ollama.py
@@ -5,11 +5,13 @@
 import pytest
 from PIL import Image
 
+# Mark all tests in this module as requiring Ollama with vision support
+pytestmark = [pytest.mark.ollama, pytest.mark.llm]
+
 from mellea import MelleaSession, start_session
 from mellea.backends import ModelOption
 from mellea.core import ImageBlock, ModelOutputThunk
-from mellea.stdlib.components import Message
-from mellea.stdlib.components import Instruction
+from mellea.stdlib.components import Instruction, Message
 
 
 @pytest.fixture(scope="module")
diff --git a/test/backends/test_vision_openai.py b/test/backends/test_vision_openai.py
index 9c958efe..0b2b92da 100644
--- a/test/backends/test_vision_openai.py
+++ b/test/backends/test_vision_openai.py
@@ -6,6 +6,9 @@
 import pytest
 from PIL import Image
 
+# Mark all tests in this module as requiring OpenAI API with vision support
+pytestmark = [pytest.mark.openai, pytest.mark.llm, pytest.mark.requires_api_key]
+
 from mellea import MelleaSession, start_session
 from mellea.backends import ModelOption
 from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
diff --git a/test/backends/test_vllm.py b/test/backends/test_vllm.py
index 05bfc0f8..ed4a0354 100644
--- a/test/backends/test_vllm.py
+++ b/test/backends/test_vllm.py
@@ -1,19 +1,27 @@
 import asyncio
 import os
+from typing import Annotated
+
 import pydantic
 import pytest
-from typing_extensions import Annotated
 
-# Skip entire module in CI since all 8 tests are qualitative
-pytestmark = pytest.mark.skipif(
-    int(os.environ.get("CICD", 0)) == 1,
-    reason="Skipping vLLM tests in CI - all qualitative tests",
-)
+# Mark all tests in this module with backend and resource requirements
+pytestmark = [
+    pytest.mark.vllm,
+    pytest.mark.llm,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    # Skip entire module in CI since all 8 tests are qualitative
+    pytest.mark.skipif(
+        int(os.environ.get("CICD", 0)) == 1,
+        reason="Skipping vLLM tests in CI - all qualitative tests",
+    ),
+]
 
+import mellea.backends.model_ids as model_ids
 from mellea import MelleaSession
-from mellea.backends.vllm import LocalVLLMBackend
 from mellea.backends import ModelOption
-import mellea.backends.model_ids as model_ids
+from mellea.backends.vllm import LocalVLLMBackend
 from mellea.core import CBlock
 from mellea.stdlib.context import ChatContext, SimpleContext
 
diff --git a/test/backends/test_vllm_tools.py b/test/backends/test_vllm_tools.py
index 0f6b21de..2d085deb 100644
--- a/test/backends/test_vllm_tools.py
+++ b/test/backends/test_vllm_tools.py
@@ -1,16 +1,23 @@
 import os
+
 import pytest
 
 # Skip entire module in CI since the single test is qualitative
-pytestmark = pytest.mark.skipif(
-    int(os.environ.get("CICD", 0)) == 1,
-    reason="Skipping vLLM tools tests in CI - qualitative test",
-)
+pytestmark = [
+    pytest.mark.vllm,
+    pytest.mark.llm,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    pytest.mark.skipif(
+        int(os.environ.get("CICD", 0)) == 1,
+        reason="Skipping vLLM tools tests in CI - qualitative test",
+    ),
+]
 
+import mellea.backends.model_ids as model_ids
 from mellea import MelleaSession
-from mellea.backends.vllm import LocalVLLMBackend
 from mellea.backends import ModelOption
-import mellea.backends.model_ids as model_ids
+from mellea.backends.vllm import LocalVLLMBackend
 from mellea.stdlib.context import ChatContext
 
 
diff --git a/test/backends/test_watsonx.py b/test/backends/test_watsonx.py
index 1631f488..6902e675 100644
--- a/test/backends/test_watsonx.py
+++ b/test/backends/test_watsonx.py
@@ -5,17 +5,23 @@
 import pydantic
 import pytest
 
-# Skip entire module in CI since 8/9 tests are qualitative
-pytestmark = pytest.mark.skipif(
-    int(os.environ.get("CICD", 0)) == 1,
-    reason="Skipping Watsonx tests in CI - mostly qualitative tests",
-)
+# Mark all tests in this module with backend and auth requirements
+pytestmark = [
+    pytest.mark.watsonx,
+    pytest.mark.llm,
+    pytest.mark.requires_api_key,
+    # Skip entire module in CI since 8/9 tests are qualitative
+    pytest.mark.skipif(
+        int(os.environ.get("CICD", 0)) == 1,
+        reason="Skipping Watsonx tests in CI - mostly qualitative tests",
+    ),
+]
 
 from mellea import MelleaSession
-from mellea.formatters import TemplateFormatter
 from mellea.backends import ModelOption
 from mellea.backends.watsonx import WatsonxAIBackend
 from mellea.core import CBlock, ModelOutputThunk
+from mellea.formatters import TemplateFormatter
 from mellea.stdlib.context import ChatContext, SimpleContext
 
 
@@ -45,7 +51,6 @@ def session(backend: WatsonxAIBackend):
 @pytest.mark.qualitative
 def test_filter_chat_completions_kwargs(backend: WatsonxAIBackend):
     """Detect changes to the WatsonxAI TextChatParameters."""
-
     known_keys = [
         "frequency_penalty",
         "logprobs",
@@ -66,7 +71,7 @@ def test_filter_chat_completions_kwargs(backend: WatsonxAIBackend):
         "guided_grammar",
         "guided_json",
     ]
-    test_dict = {key: 1 for key in known_keys}
+    test_dict = dict.fromkeys(known_keys, 1)
 
     # Make sure keys that we think should be in the TextChatParameters are there.
     filtered_dict = backend.filter_chat_completions_kwargs(test_dict)
@@ -133,7 +138,6 @@ class Email(pydantic.BaseModel):
     # this is not guaranteed, due to the lack of regexp pattern
     # assert "@" in email.to.email_address
     # assert email.to.email_address.endswith("example.com")
-    pass
 
 
 @pytest.mark.qualitative
diff --git a/test/conftest.py b/test/conftest.py
index 10c96e74..8c99a7b1 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,27 +1,257 @@
 import gc
 import os
+import subprocess
+import sys
 
 import pytest
 
+# Try to import optional dependencies for system detection
+try:
+    import psutil
+
+    HAS_PSUTIL = True
+except ImportError:
+    HAS_PSUTIL = False
+
+try:
+    import torch
+
+    HAS_TORCH = True
+except ImportError:
+    HAS_TORCH = False
+
+
+# ============================================================================
+# System Capability Detection
+# ============================================================================
+
+
+def _check_ollama_available():
+    """Check if Ollama is available by checking if port 11434 is listening.
+
+    Note: This only checks if Ollama is running, not which models are loaded.
+    Tests may still fail if required models (e.g., granite4:micro) are not pulled.
+    """
+    import socket
+
+    try:
+        # Try to connect to Ollama's default port
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.settimeout(1)
+        result = sock.connect_ex(("localhost", 11434))
+        sock.close()
+        return result == 0
+    except Exception:
+        return False
+
+
+def get_system_capabilities():
+    """Detect system capabilities for test requirements."""
+    capabilities = {
+        "has_gpu": False,
+        "gpu_memory_gb": 0,
+        "ram_gb": 0,
+        "has_api_keys": {},
+        "has_ollama": False,
+    }
+
+    # Detect GPU (CUDA for NVIDIA, MPS for Apple Silicon)
+    if HAS_TORCH:
+        has_cuda = torch.cuda.is_available()
+        has_mps = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+        capabilities["has_gpu"] = has_cuda or has_mps
+
+        if has_cuda:
+            try:
+                capabilities["gpu_memory_gb"] = torch.cuda.get_device_properties(
+                    0
+                ).total_memory / (1024**3)
+            except Exception:
+                pass
+        # Note: MPS doesn't provide easy memory query, leave at 0
+
+    # Detect RAM
+    if HAS_PSUTIL:
+        capabilities["ram_gb"] = psutil.virtual_memory().total / (1024**3)
+
+    # Detect API keys
+    api_key_vars = {
+        "openai": "OPENAI_API_KEY",
+        "watsonx": ["WATSONX_APIKEY", "IBM_CLOUD_API_KEY"],
+        "huggingface": "HF_TOKEN",
+    }
+
+    for backend, env_vars in api_key_vars.items():
+        if isinstance(env_vars, str):
+            env_vars = [env_vars]
+        capabilities["has_api_keys"][backend] = any(
+            os.environ.get(var) for var in env_vars
+        )
+
+    # Detect Ollama availability
+    capabilities["has_ollama"] = _check_ollama_available()
+
+    return capabilities
+
+
+@pytest.fixture(scope="session")
+def system_capabilities():
+    """Fixture providing system capabilities."""
+    return get_system_capabilities()
+
 
 @pytest.fixture(scope="session")
 def gh_run() -> int:
     return int(os.environ.get("CICD", 0))  # type: ignore
 
 
+# ============================================================================
+# Pytest Marker Registration and CLI Options
+# ============================================================================
+
+
+def pytest_addoption(parser):
+    """Add custom command-line options."""
+    parser.addoption(
+        "--ignore-gpu-check",
+        action="store_true",
+        default=False,
+        help="Ignore GPU requirement checks (tests may fail without GPU)",
+    )
+    parser.addoption(
+        "--ignore-ram-check",
+        action="store_true",
+        default=False,
+        help="Ignore RAM requirement checks (tests may fail with insufficient RAM)",
+    )
+    parser.addoption(
+        "--ignore-ollama-check",
+        action="store_true",
+        default=False,
+        help="Ignore Ollama availability checks (tests will fail if Ollama not running)",
+    )
+    parser.addoption(
+        "--ignore-api-key-check",
+        action="store_true",
+        default=False,
+        help="Ignore API key checks (tests will fail without valid API keys)",
+    )
+
+
+def pytest_configure(config):
+    """Register custom markers."""
+    # Backend markers
+    config.addinivalue_line(
+        "markers", "ollama: Tests requiring Ollama backend (local, light)"
+    )
+    config.addinivalue_line(
+        "markers", "openai: Tests requiring OpenAI API (requires API key)"
+    )
+    config.addinivalue_line(
+        "markers", "watsonx: Tests requiring Watsonx API (requires API key)"
+    )
+    config.addinivalue_line(
+        "markers", "huggingface: Tests requiring HuggingFace backend (local, heavy)"
+    )
+    config.addinivalue_line(
+        "markers", "vllm: Tests requiring vLLM backend (local, GPU required)"
+    )
+    config.addinivalue_line("markers", "litellm: Tests requiring LiteLLM backend")
+
+    # Capability markers
+    config.addinivalue_line(
+        "markers", "requires_api_key: Tests requiring external API keys"
+    )
+    config.addinivalue_line("markers", "requires_gpu: Tests requiring GPU")
+    config.addinivalue_line("markers", "requires_heavy_ram: Tests requiring 16GB+ RAM")
+    config.addinivalue_line("markers", "qualitative: Non-deterministic quality tests")
+
+    # Composite markers
+    config.addinivalue_line(
+        "markers", "llm: Tests that make LLM calls (needs at least Ollama)"
+    )
+
+
+# ============================================================================
+# Test Skipping Logic
+# ============================================================================
+
+
 def pytest_runtest_setup(item):
-    """Skip qualitative tests when running in CI environment."""
-    # Runs tests *not* marked with `@pytest.mark.qualitative` to run normally.
-    if not item.get_closest_marker("qualitative"):
-        return
+    """Skip tests based on markers and system capabilities.
 
+    Can be overridden with command-line options:
+    - pytest --ignore-gpu-check
+    - pytest --ignore-ram-check
+    - pytest --ignore-ollama-check
+    - pytest --ignore-api-key-check
+    """
+    capabilities = get_system_capabilities()
     gh_run = int(os.environ.get("CICD", 0))
+    config = item.config
 
-    if gh_run == 1:
+    # Check for override flags from CLI
+    ignore_gpu = config.getoption("--ignore-gpu-check", default=False)
+    ignore_ram = config.getoption("--ignore-ram-check", default=False)
+    ignore_ollama = config.getoption("--ignore-ollama-check", default=False)
+    ignore_api_key = config.getoption("--ignore-api-key-check", default=False)
+
+    # Skip qualitative tests in CI (existing behavior)
+    if item.get_closest_marker("qualitative") and gh_run == 1:
         pytest.skip(
             reason="Skipping qualitative test: got env variable CICD == 1. Used only in gh workflows."
         )
 
+    # Skip tests requiring API keys if not available (unless override)
+    if item.get_closest_marker("requires_api_key") and not ignore_api_key:
+        # Check specific backend markers
+        for backend in ["openai", "watsonx"]:
+            if item.get_closest_marker(backend):
+                if not capabilities["has_api_keys"].get(backend):
+                    pytest.skip(
+                        f"Skipping test: {backend} API key not found in environment"
+                    )
+
+    # Skip tests requiring GPU if not available (unless override)
+    if item.get_closest_marker("requires_gpu") and not ignore_gpu:
+        if not capabilities["has_gpu"]:
+            pytest.skip("Skipping test: GPU not available")
+
+    # Skip tests requiring heavy RAM if insufficient (unless override)
+    # NOTE: The 48GB threshold is based on empirical testing:
+    #   - HuggingFace tests with granite-3.3-8b-instruct failed on 32GB M1 MacBook
+    #   - Also failed on 36GB system
+    #   - Set to 48GB as safe threshold for 8B model + overhead
+    # TODO: Consider per-model thresholds or make configurable
+    #       Can be overridden with: pytest --ignore-ram-check
+    if item.get_closest_marker("requires_heavy_ram") and not ignore_ram:
+        RAM_THRESHOLD_GB = 48  # Based on real-world testing
+        if capabilities["ram_gb"] > 0 and capabilities["ram_gb"] < RAM_THRESHOLD_GB:
+            pytest.skip(
+                f"Skipping test: Insufficient RAM ({capabilities['ram_gb']:.1f}GB < {RAM_THRESHOLD_GB}GB)"
+            )
+
+    # Backend-specific skipping
+    if item.get_closest_marker("openai") and not ignore_api_key:
+        if not capabilities["has_api_keys"].get("openai"):
+            pytest.skip("Skipping test: OPENAI_API_KEY not found in environment")
+
+    if item.get_closest_marker("watsonx") and not ignore_api_key:
+        if not capabilities["has_api_keys"].get("watsonx"):
+            pytest.skip(
+                "Skipping test: Watsonx API credentials not found in environment"
+            )
+
+    if item.get_closest_marker("vllm") and not ignore_gpu:
+        if not capabilities["has_gpu"]:
+            pytest.skip("Skipping test: vLLM requires GPU")
+
+    if item.get_closest_marker("ollama") and not ignore_ollama:
+        if not capabilities["has_ollama"]:
+            pytest.skip(
+                "Skipping test: Ollama not available (port 11434 not listening)"
+            )
+
 
 def memory_cleaner():
     """Aggressive memory cleanup function."""
@@ -46,6 +276,34 @@ def memory_cleaner():
         pass
 
 
+@pytest.fixture(autouse=True, scope="session")
+def normalize_ollama_host():
+    """Normalize OLLAMA_HOST to work with client libraries.
+
+    If OLLAMA_HOST is set to 0.0.0.0 (server bind address), change it to
+    127.0.0.1:11434 for client connections. This prevents connection errors
+    when tests try to connect to Ollama.
+    """
+    original_host = os.environ.get("OLLAMA_HOST")
+
+    # If OLLAMA_HOST starts with 0.0.0.0, replace with 127.0.0.1
+    if original_host and original_host.startswith("0.0.0.0"):
+        # Extract port if present, default to 11434
+        if ":" in original_host:
+            port = original_host.split(":", 1)[1]
+        else:
+            port = "11434"
+        os.environ["OLLAMA_HOST"] = f"127.0.0.1:{port}"
+
+    yield
+
+    # Restore original value
+    if original_host is not None:
+        os.environ["OLLAMA_HOST"] = original_host
+    elif "OLLAMA_HOST" in os.environ:
+        del os.environ["OLLAMA_HOST"]
+
+
 @pytest.fixture(autouse=True, scope="function")
 def aggressive_cleanup():
     """Aggressive memory cleanup after each test to prevent OOM on CI runners."""
diff --git a/test/core/test_component_typing.py b/test/core/test_component_typing.py
index d6f10f7c..4c83045c 100644
--- a/test/core/test_component_typing.py
+++ b/test/core/test_component_typing.py
@@ -1,8 +1,11 @@
 """Tests for checking the functionality of typed components, model output thunks and sampling results."""
 
-import pytest
 from typing import get_args
-from mellea import start_session
+
+import pytest
+
+import mellea.stdlib.functional as mfuncs
+from mellea import MelleaSession, start_session
 from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
 from mellea.backends.ollama import OllamaModelBackend
 from mellea.core import (
@@ -11,15 +14,20 @@
     ComponentParseError,
     Context,
     ModelOutputThunk,
+    Requirement,
+    ValidationResult,
 )
+from mellea.stdlib.components import Instruction, Message
 from mellea.stdlib.context import ChatContext, SimpleContext
-from mellea.stdlib.components import Message
-from mellea.stdlib.components import Instruction
-from mellea.core import Requirement, ValidationResult
 from mellea.stdlib.sampling import BaseSamplingStrategy
-from mellea import MelleaSession
 
-import mellea.stdlib.functional as mfuncs
+# Module-level markers: Uses granite3.3:8b (8B, heavy) in local mode
+pytestmark = [
+    pytest.mark.ollama,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    pytest.mark.llm,
+]
 
 
 class FloatComp(Component[float]):
@@ -78,7 +86,7 @@ def session(backend) -> MelleaSession:
 def test_mot_init_typing():
     mot = ModelOutputThunk[float](value="1")
     assert hasattr(mot, "__orig_class__"), (
-        f"mots are generics and should have this field"
+        "mots are generics and should have this field"
     )
     assert get_args(mot.__orig_class__)[0] == float, (  # type: ignore
         f"expected float, got {get_args(mot.__orig_class__)[0]} as mot type"  # type: ignore
@@ -86,7 +94,7 @@ def test_mot_init_typing():
 
     unknown_mot = ModelOutputThunk(value="2")
     assert not hasattr(unknown_mot, "__orig_class__"), (
-        f"unknown mots / mots with no type defined at instantiate don't have this attribute"
+        "unknown mots / mots with no type defined at instantiate don't have this attribute"
     )
 
 
diff --git a/test/stdlib/components/intrinsic/test_rag.py b/test/stdlib/components/intrinsic/test_rag.py
index 27646ba7..6e386a59 100644
--- a/test/stdlib/components/intrinsic/test_rag.py
+++ b/test/stdlib/components/intrinsic/test_rag.py
@@ -9,16 +9,20 @@
 import torch
 
 from mellea.backends.huggingface import LocalHFBackend
-from mellea.stdlib.components import Document
-from mellea.stdlib.context import ChatContext
-from mellea.stdlib.components import Message
+from mellea.stdlib.components import Document, Message
 from mellea.stdlib.components.intrinsic import rag
+from mellea.stdlib.context import ChatContext
 
 # Skip entire module in CI since all 7 tests are qualitative
-pytestmark = pytest.mark.skipif(
-    int(os.environ.get("CICD", 0)) == 1,
-    reason="Skipping RAG tests in CI - all qualitative tests",
-)
+pytestmark = [
+    pytest.mark.skipif(
+        int(os.environ.get("CICD", 0)) == 1,
+        reason="Skipping RAG tests in CI - all qualitative tests",
+    ),
+    pytest.mark.huggingface,
+    pytest.mark.requires_gpu,
+    pytest.mark.llm,
+]
 
 DATA_ROOT = pathlib.Path(os.path.dirname(__file__)) / "testdata"
 """Location of data files for the tests in this file."""
@@ -30,7 +34,6 @@
 @pytest.fixture(name="backend")
 def _backend():
     """Backend used by the tests in this file."""
-
     # Prevent thrashing if the default device is CPU
     torch.set_num_threads(4)
 
@@ -48,7 +51,8 @@ def _backend():
 
 def _read_input_json(file_name: str):
     """Shared code for reading data stored in JSON files and converting to Mellea
-    types."""
+    types.
+    """
     with open(DATA_ROOT / "input_json" / file_name, encoding="utf-8") as f:
         json_data = json.load(f)
 
@@ -70,7 +74,8 @@ def _read_input_json(file_name: str):
 
 def _read_output_json(file_name: str):
     """Shared code for reading canned outputs stored in JSON files and converting
-    to Mellea types."""
+    to Mellea types.
+    """
     with open(DATA_ROOT / "output_json" / file_name, encoding="utf-8") as f:
         json_data = json.load(f)
 
diff --git a/test/stdlib/components/test_genslot.py b/test/stdlib/components/test_genslot.py
index 3c59568f..ba956507 100644
--- a/test/stdlib/components/test_genslot.py
+++ b/test/stdlib/components/test_genslot.py
@@ -3,20 +3,27 @@
 
 import pytest
 
-from mellea import generative, start_session
+from mellea import MelleaSession, generative, start_session
 from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
 from mellea.backends.ollama import OllamaModelBackend
 from mellea.core import Requirement
-from mellea.stdlib.context import ChatContext, Context
 from mellea.stdlib.components.genslot import (
     AsyncGenerativeSlot,
     GenerativeSlot,
     PreconditionException,
     SyncGenerativeSlot,
 )
+from mellea.stdlib.context import ChatContext, Context
 from mellea.stdlib.requirements import simple_validate
 from mellea.stdlib.sampling import RejectionSamplingStrategy
-from mellea import MelleaSession
+
+# Module-level markers: Uses granite3.3:8b (8B, heavy) in local mode
+pytestmark = [
+    pytest.mark.ollama,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    pytest.mark.llm,
+]
 
 
 @pytest.fixture(scope="module")
@@ -161,7 +168,6 @@ def test_arg_extraction(backend, arg_choices, kwarg_choices, errs):
     Python should catch most of these issues itself. We have to manually raise an exception for
     the arguments of the original function being positional.
     """
-
     # List of all needed values.
     backend = backend
     ctx = ChatContext()
@@ -242,7 +248,6 @@ def test_with_no_args(session):
     @generative
     def generate_text() -> str:
         """Generate text!"""
-        ...
 
     generate_text(m=session)
 
diff --git a/test/stdlib/sampling/test_sofai_graph_coloring.py b/test/stdlib/sampling/test_sofai_graph_coloring.py
index 3f0d9c25..37b06ffc 100644
--- a/test/stdlib/sampling/test_sofai_graph_coloring.py
+++ b/test/stdlib/sampling/test_sofai_graph_coloring.py
@@ -13,8 +13,7 @@
 from mellea.core import GenerateLog, Requirement, ValidationResult
 from mellea.stdlib.components import Instruction, Message, ModelOutputThunk
 from mellea.stdlib.context import ChatContext
-from mellea.stdlib.sampling import SOFAISamplingStrategy, SamplingResult
-
+from mellea.stdlib.sampling import SamplingResult, SOFAISamplingStrategy
 
 # =============================================================================
 # Graph Coloring Test Domain
@@ -709,10 +708,13 @@ async def s2_generate(*args, **kwargs):
 
 
 @pytest.mark.qualitative
+@pytest.mark.ollama
+@pytest.mark.llm
 class TestSOFAIGraphColoringIntegration:
     """Integration tests with actual LLM backends.
 
     These tests are marked qualitative and skipped in CI.
+    Uses llama3.2:1b (lightweight, no heavy RAM needed).
     """
 
     def test_graph_coloring_fresh_start(self):
diff --git a/test/stdlib/sampling/test_sofai_sampling.py b/test/stdlib/sampling/test_sofai_sampling.py
index ae68587c..bae95ff4 100644
--- a/test/stdlib/sampling/test_sofai_sampling.py
+++ b/test/stdlib/sampling/test_sofai_sampling.py
@@ -267,10 +267,13 @@ def test_fallback_without_tags(self):
 
 
 @pytest.mark.qualitative
+@pytest.mark.ollama
+@pytest.mark.llm
 class TestSOFAIIntegration:
     """Integration tests for SOFAISamplingStrategy.
 
     These tests require actual LLM backends and are marked as qualitative.
+    Uses llama3.2:1b (lightweight, no heavy RAM needed).
     """
 
     def test_sofai_with_ollama(self, gh_run):
diff --git a/test/stdlib/sampling/test_think_budget_forcing.py b/test/stdlib/sampling/test_think_budget_forcing.py
index 747849f1..f4025f0d 100644
--- a/test/stdlib/sampling/test_think_budget_forcing.py
+++ b/test/stdlib/sampling/test_think_budget_forcing.py
@@ -10,6 +10,15 @@
 
 MODEL_ID = OPENAI_GPT_OSS_20B
 
+# Module-level markers: gpt-oss:20b is a 20B model requiring heavy resources
+pytestmark = [
+    pytest.mark.ollama,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    pytest.mark.llm,
+    pytest.mark.qualitative,
+]
+
 
 @pytest.fixture(scope="module")
 def m_session(gh_run):
diff --git a/test/stdlib/test_spans.py b/test/stdlib/test_spans.py
index 0ddb4bc1..71b03ed0 100644
--- a/test/stdlib/test_spans.py
+++ b/test/stdlib/test_spans.py
@@ -1,11 +1,19 @@
 import pytest
 
 from mellea.backends import ModelOption
+from mellea.backends.huggingface import LocalHFBackend
+from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
 from mellea.core import CBlock
 from mellea.stdlib.components import SimpleComponent
-from mellea.stdlib.session import start_session, MelleaSession
-from mellea.backends.model_ids import IBM_GRANITE_3_3_8B
-from mellea.backends.huggingface import LocalHFBackend
+from mellea.stdlib.session import MelleaSession, start_session
+
+# Module-level markers for all tests using granite-3.3-8b (8B model)
+pytestmark = [
+    pytest.mark.huggingface,
+    pytest.mark.requires_gpu,
+    pytest.mark.requires_heavy_ram,
+    pytest.mark.llm,
+]
 
 
 # We edit the context type in the async tests below. Don't change the scope here.
diff --git a/uv.lock b/uv.lock
index 8d1f179a..f50bf4bb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3252,6 +3252,7 @@ all = [
     { name = "transformers" },
     { name = "trl" },
     { name = "vllm" },
+    { name = "xgrammar" },
 ]
 docling = [
     { name = "docling" },
@@ -3265,6 +3266,7 @@ hf = [
     { name = "peft" },
     { name = "transformers" },
     { name = "trl" },
+    { name = "xgrammar" },
 ]
 litellm = [
     { name = "litellm" },
@@ -3287,6 +3289,7 @@ dev = [
     { name = "nbmake" },
     { name = "pdm" },
     { name = "pre-commit" },
+    { name = "psutil" },
     { name = "pylint" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
@@ -3344,6 +3347,7 @@ requires-dist = [
     { name = "types-tqdm" },
     { name = "uvicorn" },
     { name = "vllm", marker = "extra == 'vllm'", specifier = ">=0.9.1" },
+    { name = "xgrammar", marker = "extra == 'hf'" },
 ]
 provides-extras = ["hf", "vllm", "litellm", "watsonx", "docling", "all"]
 
@@ -3354,6 +3358,7 @@ dev = [
     { name = "nbmake", specifier = ">=1.5.5" },
     { name = "pdm", specifier = ">=2.24.0" },
     { name = "pre-commit", specifier = ">=4.2.0" },
+    { name = "psutil" },
     { name = "pylint", specifier = ">=3.3.4" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
@@ -3416,6 +3421,59 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7a/f0/8282d9641415e9e33df173516226b404d367a0fc55e1a60424a152913abc/mistune-3.1.4-py3-none-any.whl", hash = "sha256:93691da911e5d9d2e23bc54472892aff676df27a75274962ff9edc210364266d", size = 53481, upload-time = "2025-08-29T07:20:42.218Z" },
 ]
 
+[[package]]
+name = "mlx"
+version = "0.30.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9c/9c/d6f72f04eeeeaeee8309397efcfa0e923189d0b720f4ac6b3887d0a2f40b/mlx-0.30.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:685051761e428336f8f19ae76a761ce99d29ff67c52738f15ce6409e2ff34e6b", size = 568453, upload-time = "2026-01-14T01:16:40.796Z" },
+    { url = "https://files.pythonhosted.org/packages/db/59/505717fd63f62d766f054ab8770d08e98b10217c0995bd2555429863fd31/mlx-0.30.3-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:e405e6575e3b0b00dd6bd02bdb415b638cd5c2e5faedb696df2b2c8fbe871240", size = 568451, upload-time = "2026-01-14T01:16:42.027Z" },
+    { url = "https://files.pythonhosted.org/packages/86/9c/a5319ae8ed0baa76fde80def12391ae13acec1b88904d4ead9bbabc9a083/mlx-0.30.3-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:46894eb528457483aec44227f61afdff424cb76d146a6e1727d03ea0f52be41b", size = 568309, upload-time = "2026-01-14T05:52:06.915Z" },
+    { url = "https://files.pythonhosted.org/packages/78/b6/dfcfffc41d832a86249715fab336dc8638c2237035287eb24af792484c53/mlx-0.30.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:794e79587a4906bdb3c5473ef936f45008eaaa609a3c498cc29a442b2c829621", size = 568664, upload-time = "2026-01-14T01:16:45.573Z" },
+    { url = "https://files.pythonhosted.org/packages/22/9f/22d494b83b611380063da31c2b482db8c620f7ad6531cfcd1e11f7c35852/mlx-0.30.3-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:472cdc6eaca8610224621a1561e8c36477eab1a2f0dd3eb49b95484d739c4605", size = 568663, upload-time = "2026-01-14T01:16:46.588Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/b6fb0500aef8e9ed65d4730d8c34b13d7a770ca863b9af363b5713a16040/mlx-0.30.3-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:a5d82be69c7e671dc4d5855d2f6aedcb507817e5985478903ab754b642d9ba01", size = 568522, upload-time = "2026-01-14T05:52:08.334Z" },
+    { url = "https://files.pythonhosted.org/packages/11/b3/e24c3a69dad0cf4404bb174c6fed0d804022da64758cd815a254e1cd0627/mlx-0.30.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:0b275168b80645a155b456e1a457a37fb5ee2c251e8fbd8db9e153351a9e2d2f", size = 569398, upload-time = "2026-01-14T01:16:49.804Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/87/d0804443da97a06d3439f6efb0ceffa178f530a121f0f4a6c77b39f8bfd7/mlx-0.30.3-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:6e818de14864982e832344198240a1dafba7d3316c4eb6f1b8e43b4dd25dd2ef", size = 569396, upload-time = "2026-01-14T01:16:51.007Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/dc/7cdd95e4561b73fba8c86bf11293797076120400e472fe2a72ef483b6d8d/mlx-0.30.3-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:d23b422209fd4b7ecacef59070321f8c6a122f906a5e9b6683a5fc9e1b8fcd5c", size = 569192, upload-time = "2026-01-14T05:52:09.715Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/22/42935d593fe82d3b98eb9d60e4620ed99703886635106f89d407c68f33bc/mlx-0.30.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:743fac1e4f9e8e46c8262943c643a31139c255cdb256c99ad496958215ccac1e", size = 569344, upload-time = "2026-01-14T01:16:54.847Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/27/f2e7a5236289d45315d0215e8553b4dd7e2faaba3bcb5025b34b25d5ab66/mlx-0.30.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:3b04ae81655aa0e63a6e8f2c749de3bbce64cf5b168ae10f39ed086dfa99e7f8", size = 569345, upload-time = "2026-01-14T01:16:56.564Z" },
+    { url = "https://files.pythonhosted.org/packages/01/41/06b042457f51952456e9bb46b2c6e205ab3a28fc52d6751b5787fdb762b2/mlx-0.30.3-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:ba9b5bdb1e929cc130af72efd7f73508c0f4e526d224489af7ec1c6419564659", size = 569213, upload-time = "2026-01-14T05:52:10.86Z" },
+    { url = "https://files.pythonhosted.org/packages/82/e2/6e551bd48fb350fbf0ee4cc5cd09485437d260b8f4937f22d8623e14687a/mlx-0.30.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2c27fd8daaae14ca6cf407fcd236006a6e968f7708c8f61a2709116f2e754852", size = 571920, upload-time = "2026-01-14T01:16:59.683Z" },
+    { url = "https://files.pythonhosted.org/packages/82/c0/561d1c9d3d12830b0e7fdcbd807585ef20909e398d4bcdbf25e4367543eb/mlx-0.30.3-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:b755fd4ed4b6a2ae4dee3766b5a2ea52fcbe83ebd1cf018458e18b74139409f3", size = 571921, upload-time = "2026-01-14T01:17:00.868Z" },
+    { url = "https://files.pythonhosted.org/packages/42/1a/fb573fc2edc22a777fa254ff5c0c886ffd2c88aeb1f21c45778ef170f990/mlx-0.30.3-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:7e352c0369a2f7e54d4f317b434eab3333918ea9edde1c43c61d36386b6f76bf", size = 571732, upload-time = "2026-01-14T05:52:11.893Z" },
+]
+
+[[package]]
+name = "mlx-lm"
+version = "0.29.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
+    { name = "numpy", marker = "sys_platform == 'darwin'" },
+    { name = "protobuf", marker = "sys_platform == 'darwin'" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin'" },
+    { name = "sentencepiece", marker = "sys_platform == 'darwin'" },
+    { name = "transformers", marker = "sys_platform == 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e3/62/f46e1355256a114808517947f8e83ad6be310c7288c551db0fa678f47923/mlx_lm-0.29.1.tar.gz", hash = "sha256:b99180d8f33d33a077b814e550bfb2d8a59ae003d668fd1f4b3fff62a381d34b", size = 232302, upload-time = "2025-12-16T16:58:27.959Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/53/913099c91d384e115ea078325efd9a0bc1ea3eb3458c694b4596cbd267f2/mlx_lm-0.29.1-py3-none-any.whl", hash = "sha256:440941b3054c2a2216e97615de584cc90fa1ea874782e20699b9895721fad8dc", size = 324884, upload-time = "2025-12-16T16:58:26.36Z" },
+]
+
+[[package]]
+name = "mlx-metal"
+version = "0.30.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/63/4d8f6fefb507c028df4454dabfe8d8e0ad2961bb06510b6aca23d2d5b2be/mlx_metal-0.30.3-py3-none-macosx_14_0_arm64.whl", hash = "sha256:6276312b02353714c7c6515169569fe1c4bebe3229c8ecf1fdb375a13e78c966", size = 37716245, upload-time = "2026-01-14T01:16:34.838Z" },
+    { url = "https://files.pythonhosted.org/packages/35/91/1d452e48a4bb4958844fd3bb28ae31b8de110549c009ebec5024ce27ebf3/mlx_metal-0.30.3-py3-none-macosx_15_0_arm64.whl", hash = "sha256:c096c0a3428f3f96a06220f97a36f9528b18bc05173f821eb05bc8458e723fa8", size = 37712125, upload-time = "2026-01-14T01:16:38.619Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/36/7a3cbca85542b5ca4faf871e35927f43aa0e3fc830ae5b699780fe723677/mlx_metal-0.30.3-py3-none-macosx_26_0_arm64.whl", hash = "sha256:69068533bd1ee8b0379ce5de57ed5fd313577a10ecab58e1332fd1ff7248a75e", size = 46488962, upload-time = "2026-01-14T05:52:04.523Z" },
+]
+
 [[package]]
 name = "more-itertools"
 version = "10.8.0"
@@ -8126,6 +8184,7 @@ name = "xgrammar"
 version = "0.1.19"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
     { name = "ninja" },
     { name = "pydantic" },
     { name = "sentencepiece" },