From dfc6a17278bf7b3bb7d500417fb6ac5e952c59ae Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 21:43:24 -0500 Subject: [PATCH 1/7] Add Whisper to requirements, update setup.py to read from requirements.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add openai-whisper to requirements.txt for speech decoding - Refactor setup.py to read install_requires from requirements.txt - Update clustering.py to return NaN (not 0.5) when insufficient data - Remove debug print statements from clustering.py - Remove benchmark_cluster.py (temporary file) - Add CLAUDE.md to .gitignore 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .gitignore | 1 + benchmark_cluster.py | 37 ------------------------------------ quail/analysis/clustering.py | 10 +++------- requirements.txt | 1 + setup.py | 16 +++++++--------- 5 files changed, 12 insertions(+), 53 deletions(-) delete mode 100644 benchmark_cluster.py diff --git a/.gitignore b/.gitignore index d9dde27..a870fdb 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ dist/* venv/ docs/_build docs/sg_execution_times.rst +CLAUDE.md diff --git a/benchmark_cluster.py b/benchmark_cluster.py deleted file mode 100644 index 34a214a..0000000 --- a/benchmark_cluster.py +++ /dev/null @@ -1,37 +0,0 @@ -import quail -import time -import numpy as np -import pandas as pd - -def run_benchmark(): - print("Generating data...") - # 20 subjects, 10 lists each, 16 items - n_subj = 20 - n_lists = 10 - list_len = 16 - - # Create random items - # Pres: ints - pres = [[[str(k) for k in range(list_len)] for j in range(n_lists)] for i in range(n_subj)] - # Rec: random shuffle of pres - rec = [[[str(k) for k in np.random.permutation(range(list_len))] for j in range(n_lists)] for i in range(n_subj)] - - # Features: Scalar 'val' - features = [{'item': str(k), 'val': float(k)} for k in range(list_len)] - # Expand features to 3D - pres_feat = [[features for j in range(n_lists)] for i in range(n_subj)] - - egg = quail.Egg(pres=pres, rec=rec, features=pres_feat) - - print("Starting fingerprint analysis...") - start = time.time() - # Compute fingerprint with permutations (expensive part) - # 10 perms - res = egg.analyze('fingerprint', n_perms=10, permute=True, parallel=False) - end = time.time() - - print(f"Analysis complete in {end - start:.2f} seconds.") - print(f"Result shape: {res.data.shape}") - -if __name__ == '__main__': - run_benchmark() diff --git a/quail/analysis/clustering.py b/quail/analysis/clustering.py index 3bf1d51..ee028fa 100644 --- a/quail/analysis/clustering.py +++ b/quail/analysis/clustering.py @@ -96,9 +96,8 @@ def _get_weight_exact(egg, feature, distdict, permute, n_perms): rec = list(egg.get_rec_items().values[0]) if len(rec) <= 2: - warnings.warn('Not enough recalls to compute fingerprint, returning default' - 'fingerprint.. (everything is .5)') - return 0.5 + warnings.warn('Not enough recalls to compute fingerprint, returning NaN') + return np.nan distmat = get_distmat(egg, feature, distdict) @@ -174,14 +173,11 @@ def _get_weight_best(egg, feature, distdict, permute, n_perms, distance): rec = list(egg.get_rec_items().values[0]) if len(rec) <= 2: - warnings.warn('Not enough recalls to compute fingerprint, returning default' - 'fingerprint.. (everything is .5)') + warnings.warn('Not enough recalls to compute fingerprint, returning NaN') return np.nan distmat = get_distmat(egg, feature, distdict) matchmat = get_match(egg, feature, distdict) - print(f"DEBUG: matchmat.shape={matchmat.shape}, len(rec)={len(rec)}") - print(f"DEBUG: distmat.shape={distmat.shape}") ranks = [] for i in range(len(rec)-1): diff --git a/requirements.txt b/requirements.txt index ae6eaa4..db8d01a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ matplotlib>=3.5.0 seaborn>=0.12.0 pandas>=2.0.0 joblib>=1.3.0 +openai-whisper diff --git a/setup.py b/setup.py index 4731906..3f2a250 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import os from setuptools import setup, find_packages DESCRIPTION = 'A python toolbox for analyzing and plotting free recall data' @@ -12,11 +13,15 @@ - Clustering metrics (e.g. single-number summaries of how often participants transition from recalling a word to another related word, where "related" can be user-defined.) - Many nice plotting functions - Convenience functions for loading in data -- Automatically parse speech data (audio files) using wrappers for the Google Cloud Speech to Text API +- Automatically parse speech data (audio files) using OpenAI Whisper The intended user of this toolbox is a memory researcher who seeks an easy way to analyze and visualize data from free recall psychology experiments. """ +# Read requirements from requirements.txt +here = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(here, 'requirements.txt')) as f: + requirements = [line.strip() for line in f if line.strip() and not line.startswith('#')] EXTRAS_REQUIRE={ 'speech-decoding': ["pydub", "openai-whisper"], @@ -35,13 +40,6 @@ license='MIT', packages=find_packages(exclude=('tests', 'docs', 'paper')), include_package_data=True, - install_requires=[ - 'numpy>=2.0.0', - 'scipy>=1.10.0', - 'matplotlib>=3.5.0', - 'seaborn>=0.12.0', - 'pandas>=2.0.0', - 'joblib>=1.3.0', - ], + install_requires=requirements, extras_require=EXTRAS_REQUIRE, ) From 9e452e86df3f375e1a732b68c822962a16be1025 Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 21:44:17 -0500 Subject: [PATCH 2/7] Add GitHub Actions workflow for pytest and linting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Run tests on Python 3.9, 3.10, 3.11, 3.12 - Include coverage reporting - Add flake8 linting for syntax errors - Trigger on push to master/main/optimize-quail and PRs 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .github/workflows/tests.yml | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..449ef59 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,59 @@ +name: Tests + +on: + push: + branches: [ master, main, optimize-quail ] + pull_request: + branches: [ master, main ] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11', '3.12'] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-cov + pip install -e . + + - name: Run tests + run: | + pytest tests/ -v --tb=short + + - name: Run tests with coverage + run: | + pytest tests/ --cov=quail --cov-report=xml --cov-report=term-missing + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 + + - name: Lint with flake8 + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 quail --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 quail --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics From 876009efa600c8114f5b850b317aba6dcecda3d9 Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 21:47:03 -0500 Subject: [PATCH 3/7] Add ReadTheDocs configuration and update doc requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add .readthedocs.yaml for RTD v2 config - Update docs/doc_requirements.txt with modern package versions - Configure Python 3.11 build environment 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .readthedocs.yaml | 18 ++++++++++++++++++ docs/doc_requirements.txt | 19 +++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..0e23b56 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,18 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py + +python: + install: + - requirements: docs/doc_requirements.txt + - method: pip + path: . diff --git a/docs/doc_requirements.txt b/docs/doc_requirements.txt index 7d905ee..7979012 100644 --- a/docs/doc_requirements.txt +++ b/docs/doc_requirements.txt @@ -1,18 +1,17 @@ -sphinx==1.5.5 -sphinx_bootstrap_theme==0.4.13 +sphinx>=4.0 +sphinx_bootstrap_theme sphinx-gallery numpydoc nbsphinx -seaborn>=0.7.1 -matplotlib>=1.5.1 -scipy>=0.17.1 -numpy>=1.10.4 -pandas==0.18.1 -future +seaborn>=0.12.0 +matplotlib>=3.5.0 +scipy>=1.10.0 +numpy>=2.0.0 +pandas>=2.0.0 +joblib>=1.3.0 sqlalchemy -dill requests pydub -multiprocessing pathos jupyter_client +ipykernel From 5d2f9cac3138aca6814f26ddf88b5cbc385d18de Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 21:48:32 -0500 Subject: [PATCH 4/7] Fix linter errors: remove dead code and fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused compute_stimulus_stick function with undefined variables - Fix typo: experimeter_filter -> experimenter_filter 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- quail/fingerprint.py | 7 ------- quail/load.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/quail/fingerprint.py b/quail/fingerprint.py index 8bff6df..44d1bb7 100644 --- a/quail/fingerprint.py +++ b/quail/fingerprint.py @@ -370,13 +370,6 @@ def compute_feature_stick(features, weights, alpha): return feature_stick def reorder_list(egg, feature_stick, dist_dict, tau): - - def compute_stimulus_stick(s, tau): - '''create a 'stick' of feature weights''' - - feature_stick = [[weights[feature]]*round(weights[feature]**alpha)*100 for feature in w] - return [item for sublist in feature_stick for item in sublist] - # parse egg pres, rec, features, dist_funcs = parse_egg(egg) diff --git a/quail/load.py b/quail/load.py index 614f551..92ad255 100644 --- a/quail/load.py +++ b/quail/load.py @@ -387,7 +387,7 @@ def getFeatures(stimDict): # add custom filters if filters: - filter_func = [adaptive_filter, experimeter_filter, experiments_filter] + filters + filter_func = [adaptive_filter, experimenter_filter, experiments_filter] + filters else: filter_func = [adaptive_filter, experimenter_filter, experiments_filter] From 6e3453fec90f30b5b52e6ff0ef3a5f0c4522503b Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 21:51:20 -0500 Subject: [PATCH 5/7] Add CI and docs badges to README, update speech decoding reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add GitHub Actions test status badge - Add ReadTheDocs documentation badge - Update speech decoding mention from Google Cloud to OpenAI Whisper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9689196..b7c0f2a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1003184.svg)](https://doi.org/10.5281/zenodo.1003184) [![JOSS](http://joss.theoj.org/papers/3fb5123eb2538e06f6a25ded0a088b73/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00424) +[![Tests](https://github.com/ContextLab/quail/actions/workflows/tests.yml/badge.svg)](https://github.com/ContextLab/quail/actions/workflows/tests.yml) +[![Documentation Status](https://readthedocs.org/projects/cdl-quail/badge/?version=latest)](https://cdl-quail.readthedocs.io/en/latest/?badge=latest) ![Quail logo](images/Quail_Logo_small.png) @@ -12,7 +14,7 @@ Quail is a Python package that facilitates analyses of behavioral data from memo - Clustering metrics (e.g. single-number summaries of how often participants transition from recalling a word to another related word, where "related" can be user-defined.) - Many nice plotting functions - Convenience functions for loading in data -- Automatically parse speech data (audio files) using wrappers for the Google Cloud Speech to Text API +- Automatically parse speech data (audio files) using OpenAI Whisper The intended user of this toolbox is a memory researcher who seeks an easy way to analyze and visualize data from free recall psychology experiments. From 9dfc8f6a0443cb462a2f6b91709d1b6fcc9220db Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 21:57:29 -0500 Subject: [PATCH 6/7] Add ffmpeg and torch to CI for Whisper speech tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Install ffmpeg via apt-get for audio processing - Install CPU-only torch for Whisper model inference - Install openai-whisper explicitly before package 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .github/workflows/tests.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 449ef59..3033e34 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,10 +22,17 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y ffmpeg + - name: Install dependencies run: | python -m pip install --upgrade pip pip install pytest pytest-cov + pip install torch --index-url https://download.pytorch.org/whl/cpu + pip install openai-whisper pip install -e . - name: Run tests From 1a8cf0c65baa490d77c8ecae6bf6baa92cedba54 Mon Sep 17 00:00:00 2001 From: Jeremy Manning Date: Tue, 9 Dec 2025 22:03:40 -0500 Subject: [PATCH 7/7] Fix simulate_list bug: use correct group range 1-16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wordpool groups are numbered 1-16, not 0-15. Using range(16) would select group 0 which doesn't exist, causing sample() to fail. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- quail/simulate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/quail/simulate.py b/quail/simulate.py index c1be2a0..502f3f9 100644 --- a/quail/simulate.py +++ b/quail/simulate.py @@ -10,9 +10,8 @@ def simulate_list(nwords=16, nrec=10, ncats=4): path = os.path.join(os.path.dirname(__file__), 'data/cut_wordpool.csv') wp = pd.read_csv(path) - # get one list - # logic seems to pick a group random - wp = wp[wp['GROUP']==np.random.choice(list(range(16)), 1)[0]].sample(16) + # get one list - pick a random group (groups are 1-16) + wp = wp[wp['GROUP']==np.random.choice(list(range(1, 17)), 1)[0]].sample(16) wp['COLOR'] = [[int(np.random.rand() * 255) for i in range(3)] for i in range(16)]