-
Notifications
You must be signed in to change notification settings - Fork 0
update structure #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # TableScanner Environment Variables | ||
| # Copy this file to .env and fill in your actual values | ||
|
|
||
| # KBase Service Authentication Token | ||
| KB_SERVICE_AUTH_TOKEN=your_kbase_token_here | ||
|
|
||
| # Cache directory for storing downloaded files and SQLite databases | ||
| CACHE_DIR=/tmp/tablescanner_cache | ||
|
|
||
| # KBase Workspace Service URL | ||
| WORKSPACE_URL=https://kbase.us/services/ws |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,3 +17,6 @@ uv.lock | |
| .venv | ||
| venv/ | ||
| *.log | ||
|
|
||
| # Environment variables | ||
| .env | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,24 @@ | ||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||
| Configuration settings for TableScanner application. | ||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
| from pydantic_settings import BaseSettings | ||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
| class Settings(BaseSettings): | ||||||||||||||||||||||||||||||||||||
| """Application settings.""" | ||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
| KB_SERVICE_AUTH_TOKEN: str | ||||||||||||||||||||||||||||||||||||
| CACHE_DIR: str | ||||||||||||||||||||||||||||||||||||
|
Comment on lines
+9
to
+12
|
||||||||||||||||||||||||||||||||||||
| """Application settings.""" | |
| KB_SERVICE_AUTH_TOKEN: str | |
| CACHE_DIR: str | |
| """ | |
| Application settings. | |
| Required environment variables: | |
| - KB_SERVICE_AUTH_TOKEN | |
| - WORKSPACE_URL | |
| Optional environment variables: | |
| - CACHE_DIR (default: "./cache") | |
| """ | |
| KB_SERVICE_AUTH_TOKEN: str | |
| CACHE_DIR: str = "./cache" |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,15 @@ | ||||||||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||||||||
| Pydantic models for request/response schemas. | ||||||||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| from typing import Optional, List, Dict, Any | ||||||||||||||||||||||||||||||||||||||||||||
| from pydantic import BaseModel | ||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||
| class SearchRequest(BaseModel): | ||||||||||||||||||||||||||||||||||||||||||||
| """Search request with query parameters.""" | ||||||||||||||||||||||||||||||||||||||||||||
| pangenome_id: str | ||||||||||||||||||||||||||||||||||||||||||||
| table_name: str | ||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+6
to
+12
|
||||||||||||||||||||||||||||||||||||||||||||
| from pydantic import BaseModel | |
| class SearchRequest(BaseModel): | |
| """Search request with query parameters.""" | |
| pangenome_id: str | |
| table_name: str | |
| from pydantic import BaseModel, constr | |
| class SearchRequest(BaseModel): | |
| """Search request with query parameters.""" | |
| pangenome_id: constr(min_length=1, regex=r"^[A-Za-z0-9_\-]+$") | |
| table_name: constr(min_length=1, regex=r"^[A-Za-z_][A-Za-z0-9_]*$") |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing validation for limit field: The limit field should have a minimum value (at least 1) and possibly a maximum value to prevent excessive memory usage or denial of service from extremely large result sets. Consider adding Field validators with appropriate constraints.
| from pydantic import BaseModel | |
| class SearchRequest(BaseModel): | |
| """Search request with query parameters.""" | |
| pangenome_id: str | |
| table_name: str | |
| limit: Optional[int] = None | |
| from pydantic import BaseModel, Field | |
| class SearchRequest(BaseModel): | |
| """Search request with query parameters.""" | |
| pangenome_id: str | |
| table_name: str | |
| limit: Optional[int] = Field( | |
| default=None, | |
| ge=1, | |
| le=1000, | |
| description="Maximum number of results to return (min 1, max 1000)" | |
| ) |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -4,34 +4,89 @@ | |||||||||||||||||
| Contains all API endpoint definitions. | ||||||||||||||||||
| """ | ||||||||||||||||||
|
|
||||||||||||||||||
| from fastapi import APIRouter, Query | ||||||||||||||||||
| from pathlib import Path | ||||||||||||||||||
| from fastapi import APIRouter, Request, HTTPException | ||||||||||||||||||
|
|
||||||||||||||||||
| from app.models import SearchRequest | ||||||||||||||||||
| from app.utils.workspace import get_object_info | ||||||||||||||||||
| from app.utils.download import download_from_handle | ||||||||||||||||||
| from app.utils.cache import get_cache_paths, save_to_cache, is_cached | ||||||||||||||||||
| from app.utils.sqlite import convert_to_sqlite | ||||||||||||||||||
|
|
||||||||||||||||||
| router = APIRouter() | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| @router.get("/") | ||||||||||||||||||
| async def root(): | ||||||||||||||||||
| async def root(request: Request): | ||||||||||||||||||
| """Root endpoint returning service information.""" | ||||||||||||||||||
| settings = request.app.state.settings | ||||||||||||||||||
| return { | ||||||||||||||||||
| "service": "TableScanner", | ||||||||||||||||||
| "version": "1.0.0", | ||||||||||||||||||
| "status": "running" | ||||||||||||||||||
| "status": "running", | ||||||||||||||||||
| "cache_dir": settings.CACHE_DIR | ||||||||||||||||||
|
||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| @router.get("/search") | ||||||||||||||||||
| def search(id: str = Query(..., description="ID to search for")): | ||||||||||||||||||
| @router.post("/search") | ||||||||||||||||||
| def search(request: Request, search_request: SearchRequest): | ||||||||||||||||||
|
||||||||||||||||||
| def search(request: Request, search_request: SearchRequest): | |
| async def search(request: Request, search_request: SearchRequest): |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The get_object_info function always raises NotImplementedError (as seen in workspace.py line 39), which will cause this endpoint to fail on every request. The function call will crash before reaching the subsequent logic. This should be implemented or the endpoint should handle the NotImplementedError appropriately.
| object_info = get_object_info(search_request.pangenome_id, token, workspace_url) | |
| try: | |
| object_info = get_object_info(search_request.pangenome_id, token, workspace_url) | |
| except NotImplementedError: | |
| raise HTTPException( | |
| status_code=501, | |
| detail="get_object_info is not implemented." | |
| ) |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Blocking I/O in async context: The download_from_handle function performs blocking network I/O using the synchronous requests library. Since this is called from a FastAPI endpoint, it blocks the event loop. Consider using an async HTTP client (like httpx) or running the download in a thread pool executor to maintain async benefits.
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The convert_to_sqlite function raises NotImplementedError, but this is called without a try-except block. This will cause the endpoint to fail with an unhandled exception. Either implement the function or add proper error handling to return a meaningful HTTP error response.
| convert_to_sqlite(cache_file_path, sqlite_file_path) | |
| try: | |
| convert_to_sqlite(cache_file_path, sqlite_file_path) | |
| except NotImplementedError: | |
| raise HTTPException( | |
| status_code=501, | |
| detail="The convert_to_sqlite function is not implemented." | |
| ) |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The convert_to_sqlite function always raises NotImplementedError (as seen in sqlite.py line 48), which will cause the endpoint to fail when attempting to convert uncached files. This should be implemented or the endpoint should handle the NotImplementedError appropriately.
| convert_to_sqlite(cache_file_path, sqlite_file_path) | |
| try: | |
| convert_to_sqlite(cache_file_path, sqlite_file_path) | |
| except NotImplementedError: | |
| raise HTTPException( | |
| status_code=501, | |
| detail="Conversion to SQLite is not implemented yet." | |
| ) |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The import for get_table_data should be moved to the top of the file with the other imports from app.utils.sqlite (line 14) for better code organization and consistency. Inline imports should be avoided unless there's a specific reason like circular imports.
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The TODO comment is missing a space after the hash and colon. The standard format is '# TODO:' with proper spacing for consistency with other TODO comments in the codebase.
| #TODO use a return model when we figure out what we want to return | |
| # TODO: use a return model when we figure out what we want to return |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inconsistent comment formatting: The TODO comment should have a space after the colon for consistency with the TODO on line 47.
| #TODO use a return model when we figure out what we want to return | |
| # TODO: use a return model when we figure out what we want to return |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Potential information disclosure: The response exposes internal file system paths (cache_file and sqlite_file). This could reveal sensitive information about the server's directory structure to clients. Consider removing these fields or making them optional for debugging purposes only.
| "cache_file": str(cache_file_path), | |
| "sqlite_file": str(sqlite_file_path), |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| """ | ||
| Utils module for TableScanner. | ||
|
|
||
| Contains business logic separated from route handlers. | ||
| """ | ||
|
|
||
| from app.utils.download import download_from_handle | ||
| from app.utils.workspace import get_object_info | ||
| from app.utils.cache import get_cache_paths, ensure_cache_dir, save_to_cache, is_cached | ||
| from app.utils.sqlite import convert_to_sqlite, query_sqlite, get_table_data | ||
|
|
||
| __all__ = [ | ||
| "download_from_handle", | ||
| "get_object_info", | ||
| "get_cache_paths", | ||
| "ensure_cache_dir", | ||
| "save_to_cache", | ||
| "is_cached", | ||
| "convert_to_sqlite", | ||
| "query_sqlite", | ||
| "get_table_data", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| """ | ||
| Cache utilities for managing local file caching. | ||
| """ | ||
|
|
||
| from pathlib import Path | ||
| from typing import Tuple | ||
|
|
||
|
|
||
| def get_cache_paths(cache_dir: Path, id: str, filename: str) -> Tuple[Path, Path]: | ||
| """ | ||
| Get cache file paths for a given ID and filename. | ||
| Args: | ||
| cache_dir: Base cache directory | ||
| id: Object ID | ||
| filename: Original filename | ||
| Returns: | ||
| Tuple of (cache_file_path, sqlite_file_path) | ||
| """ | ||
| cache_file_path = cache_dir / id / filename | ||
| sqlite_file_path = cache_dir / id / f"{Path(filename).stem}.db" | ||
|
Comment on lines
+21
to
+22
|
||
| return cache_file_path, sqlite_file_path | ||
|
|
||
|
|
||
| def ensure_cache_dir(cache_path: Path) -> None: | ||
| """ | ||
| Ensure cache directory exists. | ||
| Args: | ||
| cache_path: Path to cache file (directory will be created from parent) | ||
| """ | ||
| cache_path.parent.mkdir(parents=True, exist_ok=True) | ||
|
|
||
|
|
||
| def save_to_cache(cache_path: Path, data: bytes) -> None: | ||
| """ | ||
| Save binary data to cache file. | ||
| Args: | ||
| cache_path: Path where file should be saved | ||
| data: Binary data to save | ||
| """ | ||
| ensure_cache_dir(cache_path) | ||
| cache_path.write_bytes(data) | ||
|
|
||
|
|
||
| def is_cached(cache_path: Path) -> bool: | ||
| """ | ||
| Check if file exists in cache. | ||
| Args: | ||
| cache_path: Path to cache file | ||
| Returns: | ||
| True if file exists, False otherwise | ||
| """ | ||
| return cache_path.exists() | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,25 @@ | ||||||||||||||||
| """ | ||||||||||||||||
| Handle/Blobstore utilities for downloading files. | ||||||||||||||||
| """ | ||||||||||||||||
|
|
||||||||||||||||
| import requests | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| def download_from_handle(handle_url: str, auth_token: str) -> bytes: | ||||||||||||||||
| """ | ||||||||||||||||
| Download binary file from KBase Handle/Blobstore service. | ||||||||||||||||
| Args: | ||||||||||||||||
| handle_url: URL to the handle/blobstore service | ||||||||||||||||
| auth_token: KBase authentication token | ||||||||||||||||
| Returns: | ||||||||||||||||
| Binary data | ||||||||||||||||
| Raises: | ||||||||||||||||
| requests.HTTPError: If download fails | ||||||||||||||||
| """ | ||||||||||||||||
| headers = {"Authorization": auth_token} | ||||||||||||||||
| response = requests.get(handle_url, headers=headers) | ||||||||||||||||
|
||||||||||||||||
| response = requests.get(handle_url, headers=headers) | |
| response = requests.get(handle_url, headers=headers, timeout=30) |
Copilot
AI
Dec 15, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Poor error handling: The raise_for_status() call will raise a generic HTTPError without additional context. Consider catching this exception and raising a more informative error that includes details about what failed (e.g., which URL, what status code) to help with debugging and provide better user feedback.
| response.raise_for_status() | |
| try: | |
| response.raise_for_status() | |
| except requests.HTTPError as e: | |
| raise requests.HTTPError( | |
| f"Failed to download from {handle_url} (status code: {response.status_code}): {response.text}" | |
| ) from e |
Uh oh!
There was an error while loading. Please reload this page.