Snowflake-Labs · sfc-gh-ajiang · Jan 6, 2026 · Jan 6, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/samples/ml/ml_jobs/README.md b/samples/ml/ml_jobs/README.md
@@ -139,6 +139,62 @@ job4 = submit_from_stage(
 `job1`, `job2` and `job3` are job handles, see [Function Dispatch](#function-dispatch)
 for usage examples.
 
+### Job definition
+
+A job definition captures the reusable parts of an ML Job—payload location, compute pool, and other configuration—while keeping
+arguments separate. This lets you create multiple jobs from the same payload with different arguments, without re-uploading the
+payload. Defining a job is very similar to creating a job.
+
+```python
+from snowflake.ml.jobs import remote
+
+compute_pool = "MY_COMPUTE_POOL"
+@remote(compute_pool, stage_name="payload_stage")
+def hello_world(name: str = "world"):
+    from datetime import datetime
+
+    print(f"{datetime.now()} Hello {name}!")
+
+# this is a definition handle
+definition = hello_world
+
+job1 = hello_world()
+
+job2 = hello_world(name="ML Job Definition")
+```
+
+```python
+from snowflake.ml.jobs import MLJobDefinition
+
+job_definition = MLJobDefinition.register(
+    "/path/to/repo/my_script.py",
+    # If you register a source directory, provide the entrypoint file:
+    # entrypoint="/path/to/repo/my_script.py",
+    compute_pool=self.compute_pool,
+    stage_name="payload_stage",
+    session=self.session,
+)
+# Arguments follow the same format used in file dispatch
+job1 = job_definition("arg1", "--arg2_key", "arg2_value")
+
+job2 = job_definition("arg3", "--arg4_key", "arg4_value")
+
+```
+
+### Task Integration
+
+ML Job definitions integrate directly with the Task SDK. Use a definition as the task definition when creating a DAG task.
+For a detailed example, see `e2e_task_graph/README.md`.
+
+```python
+@remote(COMPUTE_POOL, stage_name=JOB_STAGE, target_instances=2)
+def train_model(input_data: DataSource) -> Optional[str]:
+    ...
+
+train_model_task = DAGTask("TRAIN_MODEL", definition=train_model)
+```
+
+
 ### Supporting Additional Payloads in Submissions
 
 When submitting a file, directory, or from a stage, additional payloads are supported for use during job execution.

diff --git a/samples/ml/ml_jobs/e2e_task_graph/README.md b/samples/ml/ml_jobs/e2e_task_graph/README.md
@@ -119,8 +119,7 @@ for downstream consumption.
 Run the ML pipeline locally without task graph orchestration:
 
 ```bash
-python src/pipeline_local.py
-python src/pipeline_local.py --no-register  # Skip model registration for faster experimentation
+python src/pipeline_local.py --no-register  # Skip model registration for faster 
 ```
 
 You can monitor the corresponding ML Job for model training via the [Job UI in Snowsight](../README.md#job-ui-in-snowsight).
@@ -187,16 +186,17 @@ This visual interface makes it easy to:
 - **Branching Logic**: Using `DAGTaskBranch` for conditional execution paths
 - **Finalizer Tasks**: Ensuring cleanup always runs regardless of success/failure
 
-### Model Training on SPCS using ML Jobs
-
-The `train_model` function uses the `@remote` decorator to run multi-node training on Snowpark Container Services:
+### Model Training on SPCS Using ML Jobs
 
+The `train_model` function is decorated with `@remote` to execute multi-node training on Snowpark Container Services (SPCS):
 ```python
 @remote(COMPUTE_POOL, stage_name=JOB_STAGE, target_instances=2)
-def train_model(session: Session, input_data: DataSource) -> XGBClassifier:
+def train_model() -> None:
     # Training logic runs on distributed compute
 ```
 
+When running as a DAG task, the dataset information is retrieved from the previous task (PREPARE_DATA) via `TaskContext`. The model is trained and evaluated, and the results (model path and metrics) are saved and passed to the next task. The Task SDK lets you use that ML Job definition directly when creating a DAG task. For additional ML Job definition examples, see `../README.md`.
+
 ### Conditional Model Promotion
 
 The task graph includes branching logic that only promotes models meeting quality thresholds:

diff --git a/samples/ml/ml_jobs/e2e_task_graph/images/task-graph-overview.png b/samples/ml/ml_jobs/e2e_task_graph/images/task-graph-overview.png
diff --git a/samples/ml/ml_jobs/e2e_task_graph/src/modeling.py b/samples/ml/ml_jobs/e2e_task_graph/src/modeling.py
@@ -1,13 +1,13 @@
 import os
 import logging
 from datetime import datetime, timedelta, timezone
-from typing import Any, Dict, Optional, Union
+from typing import Optional
 
 import cloudpickle as cp
+from xgboost import Booster, DMatrix
 import data
 import ops
 from constants import (
-    COMPUTE_POOL,
     DAG_STAGE,
     DB_NAME,
     JOB_STAGE,
@@ -18,7 +18,6 @@
 from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
 from snowflake.ml.data import DataConnector, DatasetInfo, DataSource
 from snowflake.ml.dataset import Dataset, load_dataset
-from snowflake.ml.jobs import remote
 from snowflake.ml.model import ModelVersion
 from snowflake.snowpark import Session
 from snowflake.snowpark.exceptions import SnowparkSQLException
@@ -144,10 +143,7 @@ def prepare_datasets(
     return (ds, train_ds, test_ds)
 
 
-# NOTE: Remove `target_instances=2` to run training on a single node
-#       See https://docs.snowflake.com/en/developer-guide/snowflake-ml/ml-jobs/distributed-ml-jobs
-@remote(COMPUTE_POOL, stage_name=JOB_STAGE, target_instances=2)
-def train_model(session: Session, input_data: DataSource) -> XGBClassifier:
+def train_model(session: Session, input_data: Optional[DataSource] = None) -> XGBClassifier:
     """
     Train a model on the training dataset.
 
@@ -162,18 +158,15 @@ def train_model(session: Session, input_data: DataSource) -> XGBClassifier:
     Returns:
         XGBClassifier: Trained XGBoost classifier model
     """
-    input_data_df = DataConnector.from_sources(session, [input_data]).to_pandas()
-
+    input_data_dc = DataConnector.from_sources(session, [input_data])
+    
     assert isinstance(input_data, DatasetInfo), "Input data must be a DatasetInfo"
     exclude_cols = input_data.exclude_cols
     label_col = exclude_cols[0]
 
-    X_train = input_data_df.drop(exclude_cols, axis=1)
-    y_train = input_data_df[label_col].squeeze()
-
     model_params = dict(
-        max_depth=50,
         n_estimators=3,
+        max_depth=50,
         learning_rate=0.75,
         objective="binary:logistic",
         booster="gbtree",
@@ -186,26 +179,29 @@ def train_model(session: Session, input_data: DataSource) -> XGBClassifier:
             XGBEstimator,
             XGBScalingConfig,
         )
+        all_cols = input_data_dc.to_pandas(limit=1).columns.tolist()
+        input_cols = [c for c in all_cols if c not in exclude_cols]
         estimator = XGBEstimator(
             params=model_params,
             scaling_config=XGBScalingConfig(),
         )
+        model = estimator.fit(input_data_dc,input_cols = input_cols, label_col = label_col)
+        return model
     else:
-        # Single node training - can use standard XGBClassifier
-        estimator = XGBClassifier(**model_params)
-
-    estimator.fit(X_train, y_train)
-
-    # Convert distributed estimator to standard XGBClassifier if needed
-    return getattr(estimator, '_sklearn_estimator', estimator)
+        df = input_data_dc.to_pandas()
+        X_train = df.drop(exclude_cols, axis=1)
+        y_train = df[label_col].squeeze()
+        estimator = XGBClassifier(**model_params)    
+        model = estimator.fit(X_train, y_train)
+        return model   
 
 
 def evaluate_model(
     session: Session,
     model: XGBClassifier,
     input_data: DataSource,
     *,
-    prefix: str = None,
+    prefix: Optional[str] = None,
 ) -> dict:
     """
     Evaluate a model on the training and test datasets.
@@ -232,7 +228,12 @@ def evaluate_model(
 
     X_test = input_data_df.drop(exclude_cols, axis=1)
     expected = input_data_df[label_col].squeeze()
-    actual = model.predict(X_test)
+    # inside evaluate_model
+    if isinstance(model, Booster):
+        dmatrix = DMatrix(X_test)
+        actual = (model.predict(dmatrix) > 0.5).astype(int)
+    else:
+        actual = model.predict(X_test)
 
     metric_types = [
         f1_score,

diff --git a/samples/ml/ml_jobs/e2e_task_graph/src/ops.py b/samples/ml/ml_jobs/e2e_task_graph/src/ops.py
@@ -166,3 +166,7 @@ def promote_model(
     # Set model as default
     base_model = registry.get_model(model.model_name)
     base_model.default = model
+
+def get_model(session: Session, model_name: str, version_name: str) -> ModelVersion:
+    registry = get_model_registry(session)
+    return registry.get_model(model_name).version(version_name)