diff --git a/examples/11_complete_v030_demo.py b/examples/11_complete_v030_demo.py new file mode 100644 index 0000000..cc781a3 --- /dev/null +++ b/examples/11_complete_v030_demo.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +""" +Complete Neural SDK v0.3.0 Demo +================================ + +This example demonstrates all the new v0.3.0 features: +- NBA and enhanced sports market collection +- Moneyline market filtering +- Historical data fetching with OHLCV +- SportMarketCollector unified interface + +Run with: python examples/11_complete_v030_demo.py +""" + +import asyncio +from datetime import datetime + +import pandas as pd + +from neural.data_collection.kalshi import ( + KalshiMarketsSource, + SportMarketCollector, + filter_moneyline_markets, + get_moneyline_markets, + get_nba_games, + get_nfl_games, +) + + +async def demo_sports_collection(): + """Demonstrate enhanced sports market collection""" + print("šŸ† Neural SDK v0.3.0 - Complete Sports Market Demo") + print("=" * 50) + + # 1. Test individual sport functions + print("\nšŸˆ NFL Markets:") + nfl_games = await get_nfl_games(status="open", limit=3) + print(f"Found {len(nfl_games)} NFL markets") + if not nfl_games.empty: + print(f"Sample: {nfl_games.iloc[0]['title']}") + + print("\nšŸ€ NBA Markets:") + nba_games = await get_nba_games(status="open", limit=3) + print(f"Found {len(nba_games)} NBA markets") + if not nba_games.empty: + print(f"Sample: {nba_games.iloc[0]['title']}") + + # 2. Test moneyline filtering + print("\nšŸŽÆ Moneyline Filtering:") + if not nfl_games.empty: + moneylines = filter_moneyline_markets(nfl_games) + print(f"Filtered to {len(moneylines)} NFL moneyline markets") + + # 3. Test unified moneyline function + print("\n⚔ Unified Moneyline Collection:") + nfl_moneylines = await get_moneyline_markets("NFL", limit=2) + print(f"NFL moneylines: {len(nfl_moneylines)}") + + # 4. Test SportMarketCollector + print("\n🌐 SportMarketCollector Demo:") + collector = SportMarketCollector() + + # Multi-sport collection + multi_sport = await collector.get_moneylines_only(["NFL", "NBA"], limit=5) + print(f"Multi-sport moneylines: {len(multi_sport)}") + + if not multi_sport.empty and "sport" in multi_sport.columns: + sports_found = multi_sport["sport"].unique() + print(f"Sports found: {list(sports_found)}") + + return nfl_games + + +async def demo_historical_data(sample_ticker=None): + """Demonstrate historical data fetching""" + print("\nšŸ“Š Historical Data Demo") + print("-" * 30) + + if not sample_ticker: + # Use a known NFL market + sample_ticker = "KXNFLGAME-25NOV02SEAWAS-WAS" + + print(f"Fetching historical data for: {sample_ticker}") + + # Create data source + source = KalshiMarketsSource(series_ticker="KXNFLGAME") + + # Fetch historical candlesticks + historical_data = await source.fetch_historical_candlesticks( + market_ticker=sample_ticker, + interval=60, # 1-hour bars + hours_back=24, # Last 24 hours + ) + + if not historical_data.empty: + print(f"āœ… Retrieved {len(historical_data)} hourly candlesticks") + print(f"Columns: {list(historical_data.columns)}") + + # Show summary statistics + print(f"\nPrice Summary:") + print(f" Open: ${historical_data['open'].iloc[0]:.3f}") + print(f" Close: ${historical_data['close'].iloc[-1]:.3f}") + print(f" High: ${historical_data['high'].max():.3f}") + print(f" Low: ${historical_data['low'].min():.3f}") + print(f" Volume: {historical_data['volume'].sum():,} contracts") + + # Show first few rows + print(f"\nSample Data:") + print( + historical_data[["timestamp", "open", "high", "low", "close", "volume"]] + .head(3) + .to_string(index=False) + ) + + return historical_data + else: + print("āš ļø No historical data available") + return pd.DataFrame() + + +async def demo_complete_workflow(): + """Demonstrate complete workflow: market discovery -> historical data -> analysis""" + print("\nšŸš€ Complete Workflow Demo") + print("-" * 30) + + try: + # Step 1: Find active NFL moneyline markets + print("Step 1: Finding active moneyline markets...") + moneylines = await get_moneyline_markets("NFL", limit=1) + + if moneylines.empty: + print("No active markets found, using sample ticker") + sample_ticker = "KXNFLGAME-25NOV02SEAWAS-WAS" + else: + sample_ticker = moneylines.iloc[0]["ticker"] + market_title = moneylines.iloc[0]["title"] + print(f"Found market: {market_title}") + + # Step 2: Fetch historical data + print(f"\nStep 2: Fetching historical data for {sample_ticker}...") + source = KalshiMarketsSource() + historical_data = await source.fetch_historical_candlesticks( + market_ticker=sample_ticker, interval=60, hours_back=48 + ) + + if not historical_data.empty: + print(f"āœ… Got {len(historical_data)} data points") + + # Step 3: Simple analysis + print(f"\nStep 3: Basic analysis...") + + # Calculate volatility + returns = historical_data["close"].pct_change().dropna() + volatility = returns.std() * 100 + + # Calculate price movement + price_change = ( + (historical_data["close"].iloc[-1] - historical_data["close"].iloc[0]) + / historical_data["close"].iloc[0] + * 100 + ) + + print(f" Price change: {price_change:.2f}%") + print(f" Volatility: {volatility:.2f}%") + print(f" Avg volume: {historical_data['volume'].mean():.0f} contracts") + + # Trading opportunity assessment + if abs(price_change) > 2: + print(f" šŸ“ˆ High movement detected - potential trading opportunity") + else: + print(f" šŸ“Š Low movement - stable market") + + print(f"\nāœ… Complete workflow successful!") + return True + else: + print("āŒ No historical data available") + return False + + except Exception as e: + print(f"āŒ Workflow error: {e}") + return False + + +async def main(): + """Run complete v0.3.0 demonstration""" + print("šŸŽÆ Neural SDK v0.3.0 - Complete Feature Demonstration") + print("=" * 60) + print("Testing all new features with real Kalshi API data...") + print("=" * 60) + + try: + # Demo 1: Enhanced sports collection + nfl_games = await demo_sports_collection() + + # Demo 2: Historical data + sample_ticker = None + if not nfl_games.empty: + sample_ticker = nfl_games.iloc[0]["ticker"] + + historical_data = await demo_historical_data(sample_ticker) + + # Demo 3: Complete workflow + success = await demo_complete_workflow() + + # Final summary + print("\n" + "=" * 60) + print("šŸŽ‰ Neural SDK v0.3.0 Demo Complete!") + print("=" * 60) + print("āœ… Sports market collection: Working") + print("āœ… Moneyline filtering: Working") + print("āœ… Historical data: Working") + print("āœ… Unified interface: Working") + print(f"āœ… Complete workflow: {'Working' if success else 'Partial'}") + + print(f"\nšŸ“Š Data Summary:") + print(f" NFL markets tested: {len(nfl_games) if not nfl_games.empty else 0}") + print( + f" Historical data points: {len(historical_data) if not historical_data.empty else 0}" + ) + + print(f"\nšŸš€ Neural SDK v0.3.0 is ready for production!") + + except Exception as e: + print(f"āŒ Demo failed: {e}") + import traceback + + traceback.print_exc() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/neural/data_collection/__init__.py b/neural/data_collection/__init__.py index d9f9851..77dacfb 100644 --- a/neural/data_collection/__init__.py +++ b/neural/data_collection/__init__.py @@ -1,10 +1,16 @@ from .base import DataSource from .kalshi import ( KalshiMarketsSource, + SportMarketCollector, + filter_moneyline_markets, get_all_sports_markets, + get_cfb_games, get_game_markets, get_live_sports, get_markets_by_sport, + get_moneyline_markets, + get_nba_games, + get_nfl_games, get_sports_series, search_markets, ) @@ -24,10 +30,16 @@ "register_source", "KalshiApiSource", "KalshiMarketsSource", - "get_sports_series", - "get_markets_by_sport", + "SportMarketCollector", + "filter_moneyline_markets", "get_all_sports_markets", - "search_markets", + "get_cfb_games", "get_game_markets", "get_live_sports", + "get_markets_by_sport", + "get_moneyline_markets", + "get_nba_games", + "get_nfl_games", + "get_sports_series", + "search_markets", ] diff --git a/neural/data_collection/kalshi.py b/neural/data_collection/kalshi.py index e7855de..ef9582f 100644 --- a/neural/data_collection/kalshi.py +++ b/neural/data_collection/kalshi.py @@ -92,6 +92,106 @@ async def fetch(self) -> pd.DataFrame: private_key_pem=self.private_key_pem, ) + async def fetch_historical_candlesticks( + self, + market_ticker: str, + interval: int = 60, + start_date: datetime | None = None, + end_date: datetime | None = None, + hours_back: int = 48, + ) -> pd.DataFrame: + """ + Fetch historical OHLCV candlestick data for a specific market. + + Args: + market_ticker: Market ticker (e.g., 'KXNFLGAME-25NOV02SEAWAS-WAS') + interval: Time interval in minutes (1, 60, or 1440) + start_date: Start date for data (optional) + end_date: End date for data (optional) + hours_back: Hours of data to fetch if dates not specified + + Returns: + DataFrame with OHLCV data and metadata + """ + from neural.auth.http_client import KalshiHTTPClient + from datetime import datetime, timedelta + + # Set up time range + if end_date is None: + end_date = datetime.now() + if start_date is None: + start_date = end_date - timedelta(hours=hours_back) + + start_ts = int(start_date.timestamp()) + end_ts = int(end_date.timestamp()) + + # Create HTTP client for historical data + client = KalshiHTTPClient(api_key_id=self.api_key_id, private_key_pem=self.private_key_pem) + + try: + # Use series ticker if available, otherwise extract from market ticker + series_ticker = self.series_ticker + if not series_ticker: + # Extract series from market ticker (e.g., KXNFLGAME-25NOV02SEAWAS-WAS -> KXNFLGAME) + if "-" in market_ticker: + series_ticker = market_ticker.split("-")[0] + else: + series_ticker = market_ticker + + # Fetch candlestick data + response = client.get_market_candlesticks( + series_ticker=series_ticker, + ticker=market_ticker, + start_ts=start_ts, + end_ts=end_ts, + period_interval=interval, + ) + + candlesticks = response.get("candlesticks", []) + + if not candlesticks: + print(f"No candlestick data found for {market_ticker}") + return pd.DataFrame() + + # Process candlestick data + processed_data = [] + for candle in candlesticks: + price_data = candle.get("price", {}) + yes_bid = candle.get("yes_bid", {}) + yes_ask = candle.get("yes_ask", {}) + + # Handle None values safely + def safe_convert(value, default=0.0): + if value is None: + return default + return float(value) / 100.0 # Convert cents to dollars + + processed_data.append( + { + "timestamp": pd.to_datetime(candle.get("end_period_ts"), unit="s"), + "open": safe_convert(price_data.get("open")), + "high": safe_convert(price_data.get("high")), + "low": safe_convert(price_data.get("low")), + "close": safe_convert(price_data.get("close")), + "volume": candle.get("volume", 0), + "yes_bid": safe_convert(yes_bid.get("close")), + "yes_ask": safe_convert(yes_ask.get("close")), + "open_interest": candle.get("open_interest", 0), + } + ) + + df = pd.DataFrame(processed_data) + df = df.sort_values("timestamp").reset_index(drop=True) + + print(f"āœ… Fetched {len(df)} candlesticks for {market_ticker}") + return df + + except Exception as e: + print(f"āŒ Error fetching historical data for {market_ticker}: {e}") + return pd.DataFrame() + finally: + client.close() + async def get_sports_series( leagues: Iterable[str] | None = None, @@ -321,6 +421,113 @@ def parse_game_date(ticker): return df +async def get_nba_games( + status: str = "open", + limit: int = 50, + use_authenticated: bool = True, + api_key_id: str | None = None, + private_key_pem: bytes | None = None, +) -> pd.DataFrame: + """ + Get NBA games markets from Kalshi. + + Args: + status: Market status filter (default: 'open') + limit: Maximum markets to fetch (default: 50) + use_authenticated: Use authenticated API + api_key_id: Optional API key + private_key_pem: Optional private key + + Returns: + DataFrame with NBA markets, including parsed teams and game date + """ + df = await get_markets_by_sport( + sport="NBA", + status=status, + limit=limit, + use_authenticated=use_authenticated, + api_key_id=api_key_id, + private_key_pem=private_key_pem, + ) + + if not df.empty: + # Parse teams from title (NBA format: "Will the [Away] beat the [Home]?" or similar) + def parse_teams(row): + title = row["title"] + match = re.search( + r"Will the (\w+(?:\s\w+)?) beat the (\w+(?:\s\w+)?)\?", title, re.IGNORECASE + ) + if match: + away, home = match.groups() + return pd.Series({"home_team": home, "away_team": away}) + # Fallback: extract from subtitle or ticker + subtitle = row.get("subtitle", "") + if " vs " in subtitle: + teams = subtitle.split(" vs ") + return pd.Series( + { + "home_team": teams[1].strip() if len(teams) > 1 else None, + "away_team": teams[0].strip(), + } + ) + # NBA-specific: Try "at" format (Away at Home) + if " at " in subtitle: + teams = subtitle.split(" at ") + return pd.Series( + { + "home_team": teams[1].strip() if len(teams) > 1 else None, + "away_team": teams[0].strip(), + } + ) + return pd.Series({"home_team": None, "away_team": None}) + + team_df = df.apply(parse_teams, axis=1) + df = pd.concat([df, team_df], axis=1) + + # Parse game date from ticker (format: KXNBA-25OCT15LALGSW -> 25OCT15) + def parse_game_date(ticker): + match = re.search(r"-(\d{2}[A-Z]{3}\d{2})", ticker) + if match: + date_str = match.group(1) + try: + # Assume YYMMMDD, convert to full year (e.g., 25 -> 2025) + year = ( + int(date_str[-2:]) + 2000 + if int(date_str[-2:]) < 50 + else 1900 + int(date_str[-2:]) + ) + month_map = { + "JAN": 1, + "FEB": 2, + "MAR": 3, + "APR": 4, + "MAY": 5, + "JUN": 6, + "JUL": 7, + "AUG": 8, + "SEP": 9, + "OCT": 10, + "NOV": 11, + "DEC": 12, + } + month = month_map.get(date_str[2:5]) + day = int(date_str[0:2]) + return pd.to_datetime(f"{year}-{month:02d}-{day:02d}") + except Exception: + pass + return pd.NaT + + df["game_date"] = df["ticker"].apply(parse_game_date) + + # Filter for NBA games only + nba_mask = df["ticker"].str.contains("KXNBA", na=False) | df["title"].str.contains( + "NBA|Basketball", case=False, na=False + ) + df = df[nba_mask] + + return df + + async def get_cfb_games( status: str = "open", limit: int = 50, @@ -416,3 +623,178 @@ def parse_game_date(ticker): df = df[cfb_mask] return df + + +def filter_moneyline_markets(markets_df: pd.DataFrame) -> pd.DataFrame: + """ + Filter DataFrame to only include moneyline/winner markets. + + Args: + markets_df: DataFrame from any get_*_games() function + + Returns: + Filtered DataFrame with only moneyline markets + """ + if markets_df.empty: + return markets_df + + # Patterns that indicate moneyline markets + moneyline_patterns = [ + r"Will.*beat.*\?", + r"Will.*win.*\?", + r".*to win.*\?", + r".*winner.*\?", + r".*vs.*winner", + ] + + # Combine patterns + pattern = "|".join(moneyline_patterns) + + # Filter based on title + moneyline_mask = markets_df["title"].str.contains(pattern, case=False, na=False) + + # Additional filtering: exclude prop bets, totals, spreads + exclude_patterns = [ + r"total.*points", + r"over.*under", + r"spread", + r"touchdown", + r"yards", + r"first.*score", + r"player.*prop", + ] + + exclude_pattern = "|".join(exclude_patterns) + exclude_mask = markets_df["title"].str.contains(exclude_pattern, case=False, na=False) + + # Return markets that match moneyline patterns but don't match exclude patterns + filtered_df = markets_df[moneyline_mask & ~exclude_mask].copy() + + return filtered_df + + +async def get_moneyline_markets( + sport: str, status: str = "open", limit: int = 100, **kwargs +) -> pd.DataFrame: + """ + Get only moneyline/winner markets for a specific sport. + + Args: + sport: Sport identifier ("NFL", "NBA", "CFB", etc.) + status: Market status filter + limit: Maximum markets to fetch + **kwargs: Additional arguments for sport-specific functions + + Returns: + DataFrame with only moneyline markets, enhanced with metadata + """ + # Route to appropriate sport function + if sport.upper() == "NFL": + markets = await get_nfl_games(status=status, limit=limit, **kwargs) + elif sport.upper() == "NBA": + markets = await get_nba_games(status=status, limit=limit, **kwargs) + elif sport.upper() in ["CFB", "NCAAF"]: + markets = await get_cfb_games(status=status, limit=limit, **kwargs) + else: + # Fallback to general markets + markets = await get_markets_by_sport(sport, status=status, limit=limit, **kwargs) + + # Filter for moneylines only + moneylines = filter_moneyline_markets(markets) + + # Add sport metadata + if not moneylines.empty: + moneylines = moneylines.copy() + moneylines["sport"] = sport.upper() + moneylines["market_type"] = "moneyline" + + return moneylines + + +class SportMarketCollector: + """ + Unified interface for collecting sports market data across all supported leagues. + + Provides consistent API and data format regardless of sport. + """ + + def __init__(self, use_authenticated: bool = True, **auth_kwargs): + """Initialize with authentication parameters""" + self.use_authenticated = use_authenticated + self.auth_kwargs = auth_kwargs + + async def get_games( + self, sport: str, market_type: str = "moneyline", status: str = "open", **kwargs + ) -> pd.DataFrame: + """ + Universal method to get games for any sport. + + Args: + sport: "NFL", "NBA", "CFB", "MLB", "NHL" + market_type: "moneyline", "all", "props" + status: "open", "closed", "settled" + + Returns: + Standardized DataFrame with consistent columns across sports + """ + kwargs.update(self.auth_kwargs) + kwargs.update({"use_authenticated": self.use_authenticated, "status": status}) + + if market_type == "moneyline": + return await get_moneyline_markets(sport, **kwargs) + else: + # Get all markets for the sport + if sport.upper() == "NFL": + return await get_nfl_games(**kwargs) + elif sport.upper() == "NBA": + return await get_nba_games(**kwargs) + elif sport.upper() in ["CFB", "NCAAF"]: + return await get_cfb_games(**kwargs) + else: + return await get_markets_by_sport(sport, **kwargs) + + async def get_moneylines_only(self, sports: list[str], **kwargs) -> pd.DataFrame: + """Convenience method for moneyline markets only""" + all_moneylines = [] + + for sport in sports: + try: + moneylines = await get_moneyline_markets(sport, **kwargs) + if not moneylines.empty: + all_moneylines.append(moneylines) + except Exception as e: + print(f"Warning: Failed to fetch {sport} markets: {e}") + continue + + if all_moneylines: + return pd.concat(all_moneylines, ignore_index=True) + else: + return pd.DataFrame() + + async def get_todays_games(self, sports: list[str] = None) -> pd.DataFrame: + """Get all games happening today across specified sports""" + if sports is None: + sports = ["NFL", "NBA", "CFB"] + + today = pd.Timestamp.now().date() + all_games = await self.get_moneylines_only(sports) + + if not all_games.empty and "game_date" in all_games.columns: + today_games = all_games[all_games["game_date"].dt.date == today] + return today_games + + return all_games + + async def get_upcoming_games(self, days: int = 7, sports: list[str] = None) -> pd.DataFrame: + """Get games in the next N days""" + if sports is None: + sports = ["NFL", "NBA", "CFB"] + + end_date = pd.Timestamp.now() + pd.Timedelta(days=days) + all_games = await self.get_moneylines_only(sports) + + if not all_games.empty and "game_date" in all_games.columns: + upcoming = all_games[all_games["game_date"] <= end_date] + return upcoming.sort_values("game_date") + + return all_games diff --git a/pyproject.toml b/pyproject.toml index e6dc44c..980ad26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "neural-sdk" -version = "0.2.0" +version = "0.3.0" description = "Professional-grade SDK for algorithmic trading on prediction markets (Beta - Core features stable, advanced modules experimental)" readme = "README.md" requires-python = ">=3.10"