Skip to content

Commit 72efd3f

Browse files
committed
add auto nbconvert; fix bug in tab reader
1 parent ad61597 commit 72efd3f

File tree

3 files changed

+259
-9
lines changed

3 files changed

+259
-9
lines changed

coolbox/utilities/reader/tab.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def _convert_dtype(df: pd.DataFrame) -> pd.DataFrame:
332332
if dtype != int:
333333
df[col_name] = df[col_name].astype(int)
334334
# convert float columns to float
335-
for col_name in ['value']:
335+
for col_name in ['value', 'score']:
336336
if col_name in df.columns:
337337
dtype = df[col_name].dtype
338338
if dtype != float:
@@ -525,7 +525,7 @@ def get_indexed_tab_reader(
525525
except NotImplementedError:
526526
# Unsupported file type for oxbow
527527
try:
528-
reader = TabFileReaderWithTabix(path, columns=columns)
528+
reader = TabFileReaderWithTabix(indexed_path, columns=columns)
529529
return reader
530530
except OSError as e:
531531
log.error(str(e))

docs/Makefile

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,26 @@ PORT ?= 8000
1212
# Put it first so that "make" without argument is like "make help".
1313
help:
1414
@echo "Please use 'make <target>' where <target> is one of"
15-
@echo " html to make standalone HTML files (same as GitHub Actions)"
16-
@echo " clean to remove build artifacts"
17-
@echo " livehtml to auto-rebuild and serve docs with live reload"
18-
@echo " serve to serve the built documentation"
19-
@echo " linkcheck to check all external links for integrity"
20-
@echo " doctest to run all doctests embedded in the documentation"
15+
@echo ""
16+
@echo "Building:"
17+
@echo " html build HTML documentation (same as GitHub Actions)"
18+
@echo " clean remove build artifacts"
19+
@echo " livehtml auto-rebuild and serve docs with live reload"
20+
@echo ""
21+
@echo "Notebooks:"
22+
@echo " list-nb list all Jupyter notebooks"
23+
@echo " update-nb execute and update all notebooks (sequential)"
24+
@echo " update-nb-fast execute and update all notebooks (parallel)"
25+
@echo " update-nb-filter update notebooks matching FILTER (e.g., FILTER=quick_start)"
26+
@echo ""
27+
@echo "Serving:"
28+
@echo " serve serve the built documentation on http://localhost:$(PORT)"
29+
@echo ""
30+
@echo "Testing:"
31+
@echo " linkcheck check all external links for integrity"
32+
@echo " doctest run all doctests embedded in the documentation"
2133

22-
.PHONY: help html clean livehtml serve linkcheck doctest Makefile
34+
.PHONY: help html clean livehtml serve update-nb update-nb-fast update-nb-filter list-nb linkcheck doctest Makefile
2335

2436
# Build HTML documentation (same as GitHub Actions)
2537
html:
@@ -60,6 +72,34 @@ serve:
6072
@echo "Press Ctrl+C to stop..."
6173
@cd $(BUILDDIR)/html && python -m http.server $(PORT)
6274

75+
# Update Jupyter notebooks by executing them
76+
update-nb:
77+
@echo "Updating Jupyter notebooks (sequential execution)..."
78+
@python update_notebooks.py --source-dir $(SOURCEDIR)
79+
@echo ""
80+
@echo "Notebooks updated successfully!"
81+
82+
# Update notebooks in parallel (faster but uses more resources)
83+
update-nb-fast:
84+
@echo "Updating Jupyter notebooks (parallel execution)..."
85+
@python update_notebooks.py --source-dir $(SOURCEDIR) --parallel 4
86+
@echo ""
87+
@echo "Notebooks updated successfully!"
88+
89+
# Update specific notebook(s) by name filter
90+
update-nb-filter:
91+
@if [ -z "$(FILTER)" ]; then \
92+
echo "Error: Please specify FILTER variable"; \
93+
echo "Example: make update-nb-filter FILTER=quick_start"; \
94+
exit 1; \
95+
fi
96+
@echo "Updating notebooks matching '$(FILTER)'..."
97+
@python update_notebooks.py --source-dir $(SOURCEDIR) --filter "$(FILTER)"
98+
99+
# Dry run - list notebooks without executing
100+
list-nb:
101+
@python update_notebooks.py --source-dir $(SOURCEDIR) --dry-run
102+
63103
# Check links
64104
linkcheck:
65105
$(SPHINXBUILD) -b linkcheck $(SOURCEDIR) $(BUILDDIR)/linkcheck $(SPHINXOPTS)

docs/update_notebooks.py

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#!/usr/bin/env python
2+
"""
3+
Update all Jupyter notebooks in the docs directory by executing them.
4+
This script runs all notebooks and updates their outputs.
5+
"""
6+
7+
import sys
8+
import argparse
9+
from pathlib import Path
10+
import subprocess
11+
import concurrent.futures
12+
from typing import List, Tuple
13+
14+
15+
def execute_notebook(notebook_path: Path, timeout: int = 300) -> Tuple[Path, bool, str]:
16+
"""
17+
Execute a single notebook and update its outputs.
18+
19+
Parameters
20+
----------
21+
notebook_path : Path
22+
Path to the notebook file
23+
timeout : int
24+
Timeout in seconds for notebook execution
25+
26+
Returns
27+
-------
28+
tuple
29+
(notebook_path, success, message)
30+
"""
31+
try:
32+
print(f"Executing: {notebook_path}")
33+
34+
# Use jupyter nbconvert to execute and update in place
35+
result = subprocess.run(
36+
[
37+
"jupyter", "nbconvert",
38+
"--to", "notebook",
39+
"--execute",
40+
"--inplace",
41+
"--ExecutePreprocessor.timeout={}".format(timeout),
42+
"--ExecutePreprocessor.kernel_name=python3",
43+
str(notebook_path)
44+
],
45+
capture_output=True,
46+
text=True,
47+
timeout=timeout + 10 # Add buffer to subprocess timeout
48+
)
49+
50+
if result.returncode == 0:
51+
print(f"✓ Success: {notebook_path}")
52+
return (notebook_path, True, "Executed successfully")
53+
else:
54+
error_msg = result.stderr or result.stdout
55+
print(f"✗ Failed: {notebook_path}")
56+
print(f" Error: {error_msg[:200]}")
57+
return (notebook_path, False, error_msg)
58+
59+
except subprocess.TimeoutExpired:
60+
msg = f"Timeout after {timeout} seconds"
61+
print(f"✗ Timeout: {notebook_path}")
62+
return (notebook_path, False, msg)
63+
except Exception as e:
64+
msg = str(e)
65+
print(f"✗ Error: {notebook_path} - {msg}")
66+
return (notebook_path, False, msg)
67+
68+
69+
def find_notebooks(source_dir: Path, exclude_checkpoints: bool = True) -> List[Path]:
70+
"""
71+
Find all Jupyter notebooks in the source directory.
72+
73+
Parameters
74+
----------
75+
source_dir : Path
76+
Source directory to search
77+
exclude_checkpoints : bool
78+
Whether to exclude .ipynb_checkpoints directories
79+
80+
Returns
81+
-------
82+
list
83+
List of notebook paths
84+
"""
85+
notebooks = []
86+
for nb in source_dir.rglob("*.ipynb"):
87+
if exclude_checkpoints and ".ipynb_checkpoints" in str(nb):
88+
continue
89+
notebooks.append(nb)
90+
return sorted(notebooks)
91+
92+
93+
def main():
94+
parser = argparse.ArgumentParser(
95+
description="Execute and update Jupyter notebooks in docs"
96+
)
97+
parser.add_argument(
98+
"--source-dir",
99+
type=Path,
100+
default=Path("source"),
101+
help="Source directory containing notebooks (default: source)"
102+
)
103+
parser.add_argument(
104+
"--timeout",
105+
type=int,
106+
default=300,
107+
help="Timeout per notebook in seconds (default: 300)"
108+
)
109+
parser.add_argument(
110+
"--parallel",
111+
type=int,
112+
default=1,
113+
help="Number of parallel workers (default: 1)"
114+
)
115+
parser.add_argument(
116+
"--filter",
117+
type=str,
118+
default="",
119+
help="Filter notebooks by name pattern (e.g., 'quick_start')"
120+
)
121+
parser.add_argument(
122+
"--dry-run",
123+
action="store_true",
124+
help="List notebooks without executing them"
125+
)
126+
parser.add_argument(
127+
"--fail-fast",
128+
action="store_true",
129+
help="Stop on first failure"
130+
)
131+
132+
args = parser.parse_args()
133+
134+
# Find all notebooks
135+
print(f"Searching for notebooks in {args.source_dir}...")
136+
notebooks = find_notebooks(args.source_dir)
137+
138+
# Apply filter
139+
if args.filter:
140+
notebooks = [nb for nb in notebooks if args.filter in str(nb)]
141+
142+
if not notebooks:
143+
print("No notebooks found.")
144+
return 0
145+
146+
print(f"\nFound {len(notebooks)} notebook(s):")
147+
for nb in notebooks:
148+
print(f" - {nb.relative_to(args.source_dir.parent)}")
149+
150+
if args.dry_run:
151+
print("\nDry run - no notebooks executed.")
152+
return 0
153+
154+
print(f"\nExecuting notebooks (timeout: {args.timeout}s, workers: {args.parallel})...")
155+
print("=" * 80)
156+
157+
# Execute notebooks
158+
results = []
159+
if args.parallel > 1:
160+
# Parallel execution
161+
with concurrent.futures.ThreadPoolExecutor(max_workers=args.parallel) as executor:
162+
futures = {
163+
executor.submit(execute_notebook, nb, args.timeout): nb
164+
for nb in notebooks
165+
}
166+
167+
for future in concurrent.futures.as_completed(futures):
168+
result = future.result()
169+
results.append(result)
170+
171+
if args.fail_fast and not result[1]:
172+
# Cancel remaining futures
173+
for f in futures:
174+
f.cancel()
175+
break
176+
else:
177+
# Sequential execution
178+
for nb in notebooks:
179+
result = execute_notebook(nb, args.timeout)
180+
results.append(result)
181+
182+
if args.fail_fast and not result[1]:
183+
break
184+
185+
# Print summary
186+
print("\n" + "=" * 80)
187+
print("Summary:")
188+
print("=" * 80)
189+
190+
success_count = sum(1 for _, success, _ in results if success)
191+
fail_count = len(results) - success_count
192+
193+
print(f"Total: {len(results)}")
194+
print(f"Success: {success_count}")
195+
print(f"Failed: {fail_count}")
196+
197+
if fail_count > 0:
198+
print("\nFailed notebooks:")
199+
for nb, success, msg in results:
200+
if not success:
201+
print(f" ✗ {nb.relative_to(args.source_dir.parent)}")
202+
print(f" {msg[:100]}")
203+
return 1
204+
205+
print("\n✓ All notebooks executed successfully!")
206+
return 0
207+
208+
209+
if __name__ == "__main__":
210+
sys.exit(main())

0 commit comments

Comments
 (0)