diff --git a/AGENTS.md b/AGENTS.md index 40474f4..bf4e5b4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,6 +8,7 @@ This document is a living map for developers building agents in the Codon ecosys - [`instrumentation-packages/codon-instrumentation-langgraph/AGENTS.md`](instrumentation-packages/codon-instrumentation-langgraph/AGENTS.md) covers the LangGraph decorators, the `LangGraphWorkloadAdapter`, and how to inherit telemetry automatically. - [`instrumentation-packages/codon-instrumentation-openai/AGENTS.md`](instrumentation-packages/codon-instrumentation-openai/AGENTS.md) will track OpenAI-specific instrumentation work. It currently outlines expectations and open tasks. - Telemetry initialization now lives in the core SDK (`codon_sdk.instrumentation.initialize_telemetry`), with a hardcoded production default endpoint and API-key header support. Framework packages re-export this entrypoint to stay aligned. CodonWorkload can emit spans on its own when `enable_tracing=True` (default: False); leave it disabled if another instrumentation layer is already wrapping nodes to avoid duplicate spans. +- Auto-instrumentation note: a configurator hook exists (`OTEL_PYTHON_CONFIGURATOR=codon_sdk.instrumentation.config:otel_configure`) to run Codon telemetry init during `opentelemetry-instrument`, but this path is not yet verified end-to-end and may block telemetry. Prefer explicit `initialize_telemetry()` for now; revisit auto-instrumentation stability later. If you are introducing a new framework integration, create an `AGENTS.md` alongside it and link back here. diff --git a/instrumentation-packages/codon-instrumentation-langgraph/pyproject.toml b/instrumentation-packages/codon-instrumentation-langgraph/pyproject.toml index 49c47d6..66e0f88 100644 --- a/instrumentation-packages/codon-instrumentation-langgraph/pyproject.toml +++ b/instrumentation-packages/codon-instrumentation-langgraph/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "codon-instrumentation-langgraph" -version = "0.1.0a2" +version = "0.1.0a3" license = {text = "Apache-2.0"} authors = [ { name="Codon, Inc.", email="martin@codonops.ai" }, diff --git a/sdk/AGENTS.md b/sdk/AGENTS.md index 22011a5..d0c5a3b 100644 --- a/sdk/AGENTS.md +++ b/sdk/AGENTS.md @@ -84,6 +84,8 @@ For a full walkthrough, see `docs/guides/codon-workload-quickstart.md`. - Document additions here before using them in instrumentation packages to keep alignment. - Telemetry initialization is centralized in `codon_sdk.instrumentation.initialize_telemetry`, with default endpoint `https://ingest.codonops.ai:4317` and `x-codon-api-key` header support (args override env; env vars `OTEL_EXPORTER_OTLP_ENDPOINT`, `CODON_API_KEY`, `OTEL_SERVICE_NAME` remain valid). Optional attach mode (`attach_to_existing` arg or `CODON_ATTACH_TO_EXISTING_OTEL_PROVIDER` env) lets you add Codon’s exporter to an existing tracer provider instead of replacing it—useful when OTEL auto-instrumentation is already active. - CodonWorkload can emit spans natively when `enable_tracing=True` (default: False). It uses the global tracer provider configured via `initialize_telemetry` to create one span per node execution with workload/org/deployment IDs, logic/run IDs, and NodeSpec attributes. Leave it disabled if another instrumentation layer (e.g., LangGraph adapter) is already wrapping nodes to avoid duplicate spans. +- Organization metadata: when an API key is present and an org lookup URL is configured, `initialize_telemetry` will resolve the organization and namespace and apply them to telemetry resources and as the default `org_namespace` for NodeSpecs (overriding `ORG_NAMESPACE`). If no org is resolved, NodeSpecs fall back to `ORG_NAMESPACE` or a placeholder with a warning to avoid crashes. +- Auto-instrumentation: a configurator hook exists (`OTEL_PYTHON_CONFIGURATOR=codon_sdk.instrumentation.config:otel_configure`) to run Codon telemetry init during `opentelemetry-instrument`, but this path is not yet stable end-to-end. For reliable results, call `initialize_telemetry()` explicitly; revisit the configurator once validated. ## Extending the SDK - Capture requirements for new schema fields inside each class docstring and mirror them here. diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 8acc957..ddd00fa 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] # This is the main SDK package name name = "codon_sdk" -version = "0.1.0a2" +version = "0.1.0a3" license = {text = "Apache-2.0"} authors = [ { name="Codon, Inc.", email="martin@codonops.ai" }, diff --git a/sdk/src/codon_sdk/instrumentation/config.py b/sdk/src/codon_sdk/instrumentation/config.py index 575121e..cb02e96 100644 --- a/sdk/src/codon_sdk/instrumentation/config.py +++ b/sdk/src/codon_sdk/instrumentation/config.py @@ -13,7 +13,9 @@ # limitations under the License. import os -from typing import Dict, Optional +import json +import urllib.request +from typing import Dict, Optional, Tuple from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter @@ -22,13 +24,14 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor import logging +from codon_sdk.instrumentation.schemas.nodespec import set_default_org_namespace # Avoid configuring root logger; module-level logger only. logger = logging.getLogger(__name__) # Hardcoded production ingest endpoint; can be overridden via argument or env. -DEFAULT_INGEST_ENDPOINT = "https://ingest.codonops.ai:4317" - +DEFAULT_INGEST_ENDPOINT = "https://ingest.codonops.ai" +DEFAULT_ORG_LOOKUP_URL = "https://optimization.codonops.ai/api/v1/auth/validate" _TRUE_VALUES = {"1", "true", "t", "yes", "y", "on"} @@ -44,6 +47,8 @@ def initialize_telemetry( service_name: Optional[str] = None, endpoint: Optional[str] = None, attach_to_existing: Optional[bool] = None, + org_lookup_url: Optional[str] = None, + org_lookup_timeout: Optional[float] = None, ) -> None: """Initialize OpenTelemetry tracing for Codon. @@ -81,7 +86,42 @@ def initialize_telemetry( "CODON telemetry initialized without an API key; spans may be rejected by the gateway" ) + lookup_url = ( + org_lookup_url + or os.getenv("CODON_ORG_LOOKUP_URL") + or DEFAULT_ORG_LOOKUP_URL + ) + lookup_timeout = ( + org_lookup_timeout + if org_lookup_timeout is not None + else _coerce_timeout(os.getenv("CODON_ORG_LOOKUP_TIMEOUT"), default=3.0) + ) + + org_id: Optional[str] = None + org_namespace: Optional[str] = None + if final_api_key and lookup_url: + org_id, org_namespace = _resolve_org_metadata( + api_key=final_api_key, + url=lookup_url, + timeout=lookup_timeout, + ) + if org_namespace: + set_default_org_namespace(org_namespace) + if not org_namespace or not org_id: + logger.warning( + "Unable to resolve organization metadata from API key via %s; spans and NodeSpecs will omit org unless provided elsewhere", + lookup_url, + ) + elif not final_api_key: + logger.warning( + "Skipping organization lookup because no API key was provided; NodeSpecs and spans may use placeholder org values" + ) + resource = Resource(attributes={"service.name": final_service_name}) + if org_id: + resource = resource.merge(Resource(attributes={"codon.organization.id": org_id})) + if org_namespace: + resource = resource.merge(Resource(attributes={"org.namespace": org_namespace})) exporter = OTLPSpanExporter(endpoint=final_endpoint, headers=headers) if attach and isinstance(existing_provider, TracerProvider): @@ -111,3 +151,44 @@ def _has_equivalent_processor(provider: TracerProvider, exporter: OTLPSpanExport if same_endpoint and same_headers: return True return False + + +def _resolve_org_metadata( + *, + api_key: str, + url: str, + timeout: float, +) -> Tuple[Optional[str], Optional[str]]: + req = urllib.request.Request(url, headers={"x-codon-api-key": api_key}) + try: # pragma: no cover - network dependent + with urllib.request.urlopen(req, timeout=timeout) as resp: + payload = resp.read() + data = json.loads(payload.decode()) + logger.debug("Org lookup response: %s", data) + metadata = data.get("metadata") or data + org_id = metadata.get("organization_id") + namespace = metadata.get("namespace") or metadata.get("org_namespace") + return org_id, namespace + except Exception as exc: # pragma: no cover - network dependent + logger.warning("Organization metadata lookup failed: %s", exc) + return None, None + + +def _coerce_timeout(value: Optional[str], default: float) -> float: + if value is None: + return default + try: + return float(value) + except ValueError: + return default + + +def otel_configure() -> None: + """Configurator hook for OTEL auto-instrumentation. + + Point ``OTEL_PYTHON_CONFIGURATOR`` at ``codon_sdk.instrumentation.config:otel_configure`` + to run Codon telemetry initialization when using ``opentelemetry-instrument``. Uses env + vars for all inputs. + """ + + initialize_telemetry() diff --git a/sdk/src/codon_sdk/instrumentation/schemas/nodespec/__init__.py b/sdk/src/codon_sdk/instrumentation/schemas/nodespec/__init__.py index eb592a4..b3027be 100644 --- a/sdk/src/codon_sdk/instrumentation/schemas/nodespec/__init__.py +++ b/sdk/src/codon_sdk/instrumentation/schemas/nodespec/__init__.py @@ -21,7 +21,9 @@ import inspect import json import os +import logging +logger = logging.getLogger(__name__) class FunctionAnalysisResult(BaseModel): name: str = Field(description="The name of the function.") @@ -44,21 +46,29 @@ class NodeSpecEnv(BaseModel): description="The namespace of the calling organization.", ) OrgNamespaceDefault: str = Field( - default="local", - description="The default ORG_NAMESPACE value." + default="unknown", + description="The default ORG_NAMESPACE value used when none is provided." ) nodespec_env = NodeSpecEnv() +_RESOLVED_ORG_NAMESPACE: Optional[str] = None + + +def set_default_org_namespace(namespace: Optional[str]) -> None: + """Set a process-wide default org namespace, typically from API-key lookup.""" + + global _RESOLVED_ORG_NAMESPACE + _RESOLVED_ORG_NAMESPACE = namespace class NodeSpec(BaseModel): """Immutable specification that introspects Python callables and generates stable SHA-256 identifiers. - NodeSpec inspects Python callables to capture the function signature, type hints, and optional + NodeSpec inspects Python callables to capture the function signature, type hints, and optional model metadata. It emits a deterministic SHA-256 ID that downstream systems can rely on. - NodeSpec requires type annotations to build JSON schemas for inputs and outputs. If annotations + NodeSpec requires type annotations to build JSON schemas for inputs and outputs. If annotations are missing, the generated schemas may be empty. """ model_config = ConfigDict(extra="forbid", frozen=True) @@ -120,7 +130,7 @@ def __init__( Example: >>> nodespec = NodeSpec( ... org_namespace="acme", - ... name="summarize", + ... name="summarize", ... role="processor", ... callable=summarize_function, ... model_name="gpt-4o", @@ -130,10 +140,14 @@ def __init__( """ callable_attrs = analyze_function(callable) - namespace = org_namespace or os.getenv(nodespec_env.OrgNamespace) + # Precedence: resolved default (e.g., from API-key lookup) > explicit arg/env > default placeholder + namespace = _RESOLVED_ORG_NAMESPACE or org_namespace or os.getenv(nodespec_env.OrgNamespace) if not namespace: - raise NodeSpecValidationError( - f"{nodespec_env.OrgNamespace} environment variable not set." + namespace = nodespec_env.OrgNamespaceDefault + logger.warning( + "NodeSpec created without org namespace; defaulting to '%s'. " + "Provide an API key or set ORG_NAMESPACE to avoid shared identifiers.", + namespace, ) nodespec_id = self._generate_nodespec_id( diff --git a/sdk/test/instrumentation/schemas/nodespec/test_nodespec.py b/sdk/test/instrumentation/schemas/nodespec/test_nodespec.py index e8d8f26..e588fcf 100644 --- a/sdk/test/instrumentation/schemas/nodespec/test_nodespec.py +++ b/sdk/test/instrumentation/schemas/nodespec/test_nodespec.py @@ -7,9 +7,9 @@ analyze_function, nodespec_hash_method, NodeSpec, - NodeSpecValidationError, FunctionAnalysisResult, nodespec_env, + set_default_org_namespace, ) # Test data @@ -75,13 +75,16 @@ def test_nodespec_creation_success(set_my_org_env_var): assert spec.callable_signature == "sample_function(a: int, b: str) -> float" assert spec.id is not None -def test_nodespec_creation_no_env_var_fails(): - with pytest.raises(NodeSpecValidationError, match="ORG_NAMESPACE environment variable not set"): - NodeSpec( - name="test_node", - role="test_role", - callable=sample_function, - ) +def test_nodespec_creation_no_env_var_warns(caplog): + caplog.set_level("WARNING") + set_default_org_namespace(None) + spec = NodeSpec( + name="test_node", + role="test_role", + callable=sample_function, + ) + assert spec.org_namespace == nodespec_env.OrgNamespaceDefault + assert any("NodeSpec created without org namespace" in rec.message for rec in caplog.records) def test_nodespec_id_generation(set_my_org_env_var): spec = NodeSpec( @@ -109,12 +112,14 @@ def test_nodespec_id_generation(set_my_org_env_var): assert spec.id == expected_id -@pytest.mark.xfail(reason="The field_validator for spec_version is not working as expected for frozen models") -def test_nodespec_spec_version_override_fails(set_my_org_env_var): - with pytest.raises(NodeSpecValidationError, match="spec_version cannot be changed"): - NodeSpec( - name="test_node", - role="test_role", - callable=sample_function, - spec_version="anything" - ) + +def test_resolved_namespace_overrides_env(monkeypatch): + monkeypatch.setenv(nodespec_env.OrgNamespace, "env-org") + set_default_org_namespace("resolved-org") + spec = NodeSpec( + name="test_node", + role="test_role", + callable=sample_function, + ) + assert spec.org_namespace == "resolved-org" + set_default_org_namespace(None) diff --git a/sdk/test/instrumentation/test_initialize_telemetry.py b/sdk/test/instrumentation/test_initialize_telemetry.py index 296f6cf..1448a40 100644 --- a/sdk/test/instrumentation/test_initialize_telemetry.py +++ b/sdk/test/instrumentation/test_initialize_telemetry.py @@ -39,8 +39,11 @@ def clear_env(monkeypatch): "OTEL_EXPORTER_OTLP_ENDPOINT", "OTEL_SERVICE_NAME", "CODON_ATTACH_TO_EXISTING_OTEL_PROVIDER", + "CODON_ORG_LOOKUP_URL", + "CODON_ORG_LOOKUP_TIMEOUT", ]: monkeypatch.delenv(key, raising=False) + instrumentation_config.set_default_org_namespace(None) yield