From dfc6a17278bf7b3bb7d500417fb6ac5e952c59ae Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 21:43:24 -0500
Subject: [PATCH 1/7] Add Whisper to requirements, update setup.py to read from
 requirements.txt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add openai-whisper to requirements.txt for speech decoding
- Refactor setup.py to read install_requires from requirements.txt
- Update clustering.py to return NaN (not 0.5) when insufficient data
- Remove debug print statements from clustering.py
- Remove benchmark_cluster.py (temporary file)
- Add CLAUDE.md to .gitignore

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .gitignore                   |  1 +
 benchmark_cluster.py         | 37 ------------------------------------
 quail/analysis/clustering.py | 10 +++-------
 requirements.txt             |  1 +
 setup.py                     | 16 +++++++---------
 5 files changed, 12 insertions(+), 53 deletions(-)
 delete mode 100644 benchmark_cluster.py

diff --git a/.gitignore b/.gitignore
index d9dde27..a870fdb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ dist/*
 venv/
 docs/_build
 docs/sg_execution_times.rst
+CLAUDE.md
diff --git a/benchmark_cluster.py b/benchmark_cluster.py
deleted file mode 100644
index 34a214a..0000000
--- a/benchmark_cluster.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import quail
-import time
-import numpy as np
-import pandas as pd
-
-def run_benchmark():
-    print("Generating data...")
-    # 20 subjects, 10 lists each, 16 items
-    n_subj = 20
-    n_lists = 10
-    list_len = 16
-    
-    # Create random items
-    # Pres: ints
-    pres = [[[str(k) for k in range(list_len)] for j in range(n_lists)] for i in range(n_subj)]
-    # Rec: random shuffle of pres
-    rec = [[[str(k) for k in np.random.permutation(range(list_len))] for j in range(n_lists)] for i in range(n_subj)]
-    
-    # Features: Scalar 'val'
-    features = [{'item': str(k), 'val': float(k)} for k in range(list_len)]
-    # Expand features to 3D
-    pres_feat = [[features for j in range(n_lists)] for i in range(n_subj)]
-    
-    egg = quail.Egg(pres=pres, rec=rec, features=pres_feat)
-    
-    print("Starting fingerprint analysis...")
-    start = time.time()
-    # Compute fingerprint with permutations (expensive part)
-    # 10 perms
-    res = egg.analyze('fingerprint', n_perms=10, permute=True, parallel=False)
-    end = time.time()
-    
-    print(f"Analysis complete in {end - start:.2f} seconds.")
-    print(f"Result shape: {res.data.shape}")
-
-if __name__ == '__main__':
-    run_benchmark()
diff --git a/quail/analysis/clustering.py b/quail/analysis/clustering.py
index 3bf1d51..ee028fa 100644
--- a/quail/analysis/clustering.py
+++ b/quail/analysis/clustering.py
@@ -96,9 +96,8 @@ def _get_weight_exact(egg, feature, distdict, permute, n_perms):
     rec = list(egg.get_rec_items().values[0])
 
     if len(rec) <= 2:
-        warnings.warn('Not enough recalls to compute fingerprint, returning default'
-              'fingerprint.. (everything is .5)')
-        return 0.5
+        warnings.warn('Not enough recalls to compute fingerprint, returning NaN')
+        return np.nan
 
     distmat = get_distmat(egg, feature, distdict)
 
@@ -174,14 +173,11 @@ def _get_weight_best(egg, feature, distdict, permute, n_perms, distance):
 
     rec = list(egg.get_rec_items().values[0])
     if len(rec) <= 2:
-        warnings.warn('Not enough recalls to compute fingerprint, returning default'
-              'fingerprint.. (everything is .5)')
+        warnings.warn('Not enough recalls to compute fingerprint, returning NaN')
         return np.nan
 
     distmat = get_distmat(egg, feature, distdict)
     matchmat = get_match(egg, feature, distdict)
-    print(f"DEBUG: matchmat.shape={matchmat.shape}, len(rec)={len(rec)}")
-    print(f"DEBUG: distmat.shape={distmat.shape}")
 
     ranks = []
     for i in range(len(rec)-1):
diff --git a/requirements.txt b/requirements.txt
index ae6eaa4..db8d01a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ matplotlib>=3.5.0
 seaborn>=0.12.0
 pandas>=2.0.0
 joblib>=1.3.0
+openai-whisper
diff --git a/setup.py b/setup.py
index 4731906..3f2a250 100755
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+import os
 from setuptools import setup, find_packages
 
 DESCRIPTION = 'A python toolbox for analyzing and plotting free recall data'
@@ -12,11 +13,15 @@
 - Clustering metrics (e.g. single-number summaries of how often participants transition from recalling a word to another related word, where "related" can be user-defined.)
 - Many nice plotting functions
 - Convenience functions for loading in data
-- Automatically parse speech data (audio files) using wrappers for the Google Cloud Speech to Text API
+- Automatically parse speech data (audio files) using OpenAI Whisper
 
 The intended user of this toolbox is a memory researcher who seeks an easy way to analyze and visualize data from free recall psychology experiments.
 """
 
+# Read requirements from requirements.txt
+here = os.path.abspath(os.path.dirname(__file__))
+with open(os.path.join(here, 'requirements.txt')) as f:
+    requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')]
 
 EXTRAS_REQUIRE={
     'speech-decoding': ["pydub", "openai-whisper"],
@@ -35,13 +40,6 @@
     license='MIT',
     packages=find_packages(exclude=('tests', 'docs', 'paper')),
     include_package_data=True,
-    install_requires=[
-        'numpy>=2.0.0',
-        'scipy>=1.10.0',
-        'matplotlib>=3.5.0',
-        'seaborn>=0.12.0',
-        'pandas>=2.0.0',
-        'joblib>=1.3.0',
-    ],
+    install_requires=requirements,
     extras_require=EXTRAS_REQUIRE,
 )

From 9e452e86df3f375e1a732b68c822962a16be1025 Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 21:44:17 -0500
Subject: [PATCH 2/7] Add GitHub Actions workflow for pytest and linting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Run tests on Python 3.9, 3.10, 3.11, 3.12
- Include coverage reporting
- Add flake8 linting for syntax errors
- Trigger on push to master/main/optimize-quail and PRs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .github/workflows/tests.yml | 59 +++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..449ef59
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,59 @@
+name: Tests
+
+on:
+  push:
+    branches: [ master, main, optimize-quail ]
+  pull_request:
+    branches: [ master, main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+      fail-fast: false
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest pytest-cov
+        pip install -e .
+
+    - name: Run tests
+      run: |
+        pytest tests/ -v --tb=short
+
+    - name: Run tests with coverage
+      run: |
+        pytest tests/ --cov=quail --cov-report=xml --cov-report=term-missing
+
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8
+
+    - name: Lint with flake8
+      run: |
+        # Stop the build if there are Python syntax errors or undefined names
+        flake8 quail --count --select=E9,F63,F7,F82 --show-source --statistics
+        # Exit-zero treats all errors as warnings
+        flake8 quail --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics

From 876009efa600c8114f5b850b317aba6dcecda3d9 Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 21:47:03 -0500
Subject: [PATCH 3/7] Add ReadTheDocs configuration and update doc requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add .readthedocs.yaml for RTD v2 config
- Update docs/doc_requirements.txt with modern package versions
- Configure Python 3.11 build environment

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .readthedocs.yaml         | 18 ++++++++++++++++++
 docs/doc_requirements.txt | 19 +++++++++----------
 2 files changed, 27 insertions(+), 10 deletions(-)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..0e23b56
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,18 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
+sphinx:
+  configuration: docs/conf.py
+
+python:
+  install:
+    - requirements: docs/doc_requirements.txt
+    - method: pip
+      path: .
diff --git a/docs/doc_requirements.txt b/docs/doc_requirements.txt
index 7d905ee..7979012 100644
--- a/docs/doc_requirements.txt
+++ b/docs/doc_requirements.txt
@@ -1,18 +1,17 @@
-sphinx==1.5.5
-sphinx_bootstrap_theme==0.4.13
+sphinx>=4.0
+sphinx_bootstrap_theme
 sphinx-gallery
 numpydoc
 nbsphinx
-seaborn>=0.7.1
-matplotlib>=1.5.1
-scipy>=0.17.1
-numpy>=1.10.4
-pandas==0.18.1
-future
+seaborn>=0.12.0
+matplotlib>=3.5.0
+scipy>=1.10.0
+numpy>=2.0.0
+pandas>=2.0.0
+joblib>=1.3.0
 sqlalchemy
-dill
 requests
 pydub
-multiprocessing
 pathos
 jupyter_client
+ipykernel

From 5d2f9cac3138aca6814f26ddf88b5cbc385d18de Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 21:48:32 -0500
Subject: [PATCH 4/7] Fix linter errors: remove dead code and fix typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove unused compute_stimulus_stick function with undefined variables
- Fix typo: experimeter_filter -> experimenter_filter

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 quail/fingerprint.py | 7 -------
 quail/load.py        | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/quail/fingerprint.py b/quail/fingerprint.py
index 8bff6df..44d1bb7 100644
--- a/quail/fingerprint.py
+++ b/quail/fingerprint.py
@@ -370,13 +370,6 @@ def compute_feature_stick(features, weights, alpha):
         return feature_stick
 
     def reorder_list(egg, feature_stick, dist_dict, tau):
-
-        def compute_stimulus_stick(s, tau):
-            '''create a 'stick' of feature weights'''
-
-            feature_stick = [[weights[feature]]*round(weights[feature]**alpha)*100 for feature in w]
-            return [item for sublist in feature_stick for item in sublist]
-
         # parse egg
         pres, rec, features, dist_funcs = parse_egg(egg)
 
diff --git a/quail/load.py b/quail/load.py
index 614f551..92ad255 100644
--- a/quail/load.py
+++ b/quail/load.py
@@ -387,7 +387,7 @@ def getFeatures(stimDict):
 
     # add custom filters
     if filters:
-        filter_func = [adaptive_filter, experimeter_filter, experiments_filter] + filters
+        filter_func = [adaptive_filter, experimenter_filter, experiments_filter] + filters
     else:
         filter_func = [adaptive_filter, experimenter_filter, experiments_filter]
 

From 6e3453fec90f30b5b52e6ff0ef3a5f0c4522503b Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 21:51:20 -0500
Subject: [PATCH 5/7] Add CI and docs badges to README, update speech decoding
 reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add GitHub Actions test status badge
- Add ReadTheDocs documentation badge
- Update speech decoding mention from Google Cloud to OpenAI Whisper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9689196..b7c0f2a 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1003184.svg)](https://doi.org/10.5281/zenodo.1003184)
 [![JOSS](http://joss.theoj.org/papers/3fb5123eb2538e06f6a25ded0a088b73/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00424)
+[![Tests](https://github.com/ContextLab/quail/actions/workflows/tests.yml/badge.svg)](https://github.com/ContextLab/quail/actions/workflows/tests.yml)
+[![Documentation Status](https://readthedocs.org/projects/cdl-quail/badge/?version=latest)](https://cdl-quail.readthedocs.io/en/latest/?badge=latest)
 
 ![Quail logo](images/Quail_Logo_small.png)
 
@@ -12,7 +14,7 @@ Quail is a Python package that facilitates analyses of behavioral data from memo
 - Clustering metrics (e.g. single-number summaries of how often participants transition from recalling a word to another related word, where "related" can be user-defined.)
 - Many nice plotting functions
 - Convenience functions for loading in data
-- Automatically parse speech data (audio files) using wrappers for the Google Cloud Speech to Text API
+- Automatically parse speech data (audio files) using OpenAI Whisper
 
 The intended user of this toolbox is a memory researcher who seeks an easy way to analyze and visualize data from free recall psychology experiments.
 

From 9dfc8f6a0443cb462a2f6b91709d1b6fcc9220db Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 21:57:29 -0500
Subject: [PATCH 6/7] Add ffmpeg and torch to CI for Whisper speech tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Install ffmpeg via apt-get for audio processing
- Install CPU-only torch for Whisper model inference
- Install openai-whisper explicitly before package

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .github/workflows/tests.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 449ef59..3033e34 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -22,10 +22,17 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
+    - name: Install system dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y ffmpeg
+
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install pytest pytest-cov
+        pip install torch --index-url https://download.pytorch.org/whl/cpu
+        pip install openai-whisper
         pip install -e .
 
     - name: Run tests

From 1a8cf0c65baa490d77c8ecae6bf6baa92cedba54 Mon Sep 17 00:00:00 2001
From: Jeremy Manning <jeremy.r.manning@dartmouth.edu>
Date: Tue, 9 Dec 2025 22:03:40 -0500
Subject: [PATCH 7/7] Fix simulate_list bug: use correct group range 1-16
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The wordpool groups are numbered 1-16, not 0-15. Using range(16)
would select group 0 which doesn't exist, causing sample() to fail.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 quail/simulate.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/quail/simulate.py b/quail/simulate.py
index c1be2a0..502f3f9 100644
--- a/quail/simulate.py
+++ b/quail/simulate.py
@@ -10,9 +10,8 @@ def simulate_list(nwords=16, nrec=10, ncats=4):
     path = os.path.join(os.path.dirname(__file__), 'data/cut_wordpool.csv')
     wp = pd.read_csv(path)
 
-    # get one list
-    # logic seems to pick a group random
-    wp = wp[wp['GROUP']==np.random.choice(list(range(16)), 1)[0]].sample(16)
+    # get one list - pick a random group (groups are 1-16)
+    wp = wp[wp['GROUP']==np.random.choice(list(range(1, 17)), 1)[0]].sample(16)
 
     wp['COLOR'] = [[int(np.random.rand() * 255) for i in range(3)] for i in range(16)]