Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
95 commits
Select commit Hold shift + click to select a range
5d7117a
init
sfc-gh-aalam May 6, 2025
4ddd5d8
minor improvement in display
sfc-gh-aalam May 6, 2025
6b3e259
improvement
sfc-gh-aalam May 7, 2025
c62053a
handle nested calls
sfc-gh-aalam May 7, 2025
44f93a2
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam May 7, 2025
7c58414
improve type hints
sfc-gh-aalam May 7, 2025
ee12e72
Merge branch 'aalam-SNOW-2084165-add-error-trace' of github.com:snowf…
sfc-gh-aalam May 7, 2025
c3f0374
refactor + add more comments
sfc-gh-aalam May 7, 2025
f5b4946
+comments
sfc-gh-aalam May 7, 2025
108dcdc
more comments
sfc-gh-aalam May 7, 2025
432ec6b
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam May 8, 2025
f6c72a3
more protection; more comments
sfc-gh-aalam May 8, 2025
b410188
Merge branch 'aalam-SNOW-2084165-add-error-trace' of github.com:snowf…
sfc-gh-aalam May 8, 2025
e1d15b4
don't read extra
sfc-gh-aalam May 8, 2025
34bd358
efficient file reads
sfc-gh-aalam May 9, 2025
3c9f5f8
minor improvement
sfc-gh-aalam May 9, 2025
b239195
repl and snippet collection improvements
sfc-gh-aalam May 9, 2025
6dfd0b4
add integ tests
sfc-gh-aalam May 9, 2025
db6a354
add unit test
sfc-gh-aalam May 10, 2025
b5bd7e5
fix test
sfc-gh-aalam May 12, 2025
24a4e77
refactor for better error handling
sfc-gh-aalam May 12, 2025
eaa59b3
fix ast tests
sfc-gh-aalam May 13, 2025
c197a2e
minor clean-up
sfc-gh-aalam May 13, 2025
8bc7243
fix tests
sfc-gh-aalam May 13, 2025
b8f4428
fix ex_info.value
sfc-gh-aalam May 13, 2025
7e22397
undo ex_info changes
sfc-gh-aalam May 14, 2025
617e105
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam May 14, 2025
d79129f
undo ex_info changes
sfc-gh-aalam May 14, 2025
3cb32d7
Update str(ex_info) to str(ex_info.value)
sfc-gh-aalam May 14, 2025
4bed385
Merge branch 'aalam-SNOW-2084165-fix-ex-info' into aalam-SNOW-2084165…
sfc-gh-aalam May 14, 2025
f89114a
fix missing testcase
sfc-gh-aalam May 14, 2025
ada9782
Merge branch 'aalam-SNOW-2084165-fix-ex-info' into aalam-SNOW-2084165…
sfc-gh-aalam May 14, 2025
9388547
fix more missing ex.value
sfc-gh-aalam May 14, 2025
71a2894
Merge branch 'aalam-SNOW-2084165-fix-ex-info' into aalam-SNOW-2084165…
sfc-gh-aalam May 14, 2025
5b74a13
fix regex tests
sfc-gh-aalam May 15, 2025
890a06d
minor wording change
sfc-gh-aalam May 15, 2025
746d054
include source information which was skipped earlier
sfc-gh-aalam May 15, 2025
b4b34d4
re-add deleted line
sfc-gh-aalam May 15, 2025
1353d26
fix last commit
sfc-gh-aalam May 15, 2025
fbe633a
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam May 15, 2025
361c23f
Update src/snowflake/snowpark/dataframe.py
sfc-gh-aalam May 15, 2025
645e17c
fix graphite suggestion
sfc-gh-aalam May 15, 2025
88ae65e
temp update
sfc-gh-aalam May 17, 2025
02bdab4
Add associated df astid to SnowflakePlan and Selectable
sfc-gh-aalam May 17, 2025
c5a6d50
merge with new base
sfc-gh-aalam May 17, 2025
fe60424
Merge branch 'main' into aalam-SNOW-2096094-snowflake-plan-with-df-as…
sfc-gh-aalam May 17, 2025
f6bfb4f
Merge branch 'aalam-SNOW-2096094-snowflake-plan-with-df-ast-id' into …
sfc-gh-aalam May 17, 2025
a89f95a
address comments and clean-up
sfc-gh-aalam May 17, 2025
6f60d3b
Merge branch 'aalam-SNOW-2084165-add-error-trace' of github.com:snowf…
sfc-gh-aalam May 17, 2025
896b9a7
wrap describe query error
sfc-gh-aalam May 17, 2025
1204935
Update src/snowflake/snowpark/exceptions.py
sfc-gh-aalam May 17, 2025
89eab2b
Update src/snowflake/snowpark/_internal/debug_utils.py
sfc-gh-aalam May 17, 2025
ffad17e
test fixes
sfc-gh-aalam May 17, 2025
3ab7c3d
Merge branch 'aalam-SNOW-2084165-add-error-trace' of github.com:snowf…
sfc-gh-aalam May 17, 2025
1e5ce2d
add comments
sfc-gh-aalam May 17, 2025
14d38dd
fix tests
sfc-gh-aalam May 17, 2025
bd3961d
update mock class
sfc-gh-aalam May 17, 2025
12bc2bc
update plan df ast id if plan is not None
sfc-gh-aalam May 17, 2025
2c87fc0
improve attributes
sfc-gh-aalam May 17, 2025
e449e47
Merge branch 'aalam-SNOW-2096094-snowflake-plan-with-df-ast-id' into …
sfc-gh-aalam May 17, 2025
c8b4b4a
Update tests/integ/test_df_lineage.py
sfc-gh-aalam May 17, 2025
f5cdd8f
add tests
sfc-gh-aalam May 17, 2025
19b226d
minor refactor
sfc-gh-aalam May 17, 2025
a45bfdc
Merge branch 'aalam-SNOW-2096094-snowflake-plan-with-df-ast-id' into …
sfc-gh-aalam May 17, 2025
91ae396
adjust with refactor
sfc-gh-aalam May 17, 2025
ba2f4d7
fix local-test
sfc-gh-aalam May 17, 2025
6176b5d
fix local-test
sfc-gh-aalam May 17, 2025
ab12938
add debug context to snowparksqlexception
sfc-gh-aalam May 17, 2025
853cd40
Merge branch 'aalam-SNOW-2084165-add-debug-info-to-error-message' int…
sfc-gh-aalam May 17, 2025
30d0cdc
add changelog
sfc-gh-aalam May 17, 2025
e754699
improve coverage
sfc-gh-aalam May 18, 2025
35dc3dd
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam May 21, 2025
8fc3de1
merge with main
sfc-gh-aalam May 22, 2025
bd76e46
improve type hints
sfc-gh-aalam May 22, 2025
b61234b
Update CHANGELOG.md
sfc-gh-aalam May 22, 2025
a1f679b
address comments
sfc-gh-aalam May 22, 2025
04de2a3
Merge branch 'aalam-SNOW-2084165-add-error-trace' of github.com:snowf…
sfc-gh-aalam May 22, 2025
1f0b227
address comments
sfc-gh-aalam May 22, 2025
04d3770
add type-hints
sfc-gh-aalam May 22, 2025
02eee1f
fix type-hint
sfc-gh-aalam May 22, 2025
2ffada8
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam May 23, 2025
73e86b6
update how we enable/disable
sfc-gh-aalam May 29, 2025
32560cb
merge with main
sfc-gh-aalam May 29, 2025
5a38b4a
address comments
sfc-gh-aalam May 29, 2025
394b10b
address comments
sfc-gh-aalam May 29, 2025
cc8eb7c
Allow user to enable/disable ast collection
sfc-gh-aalam Jun 4, 2025
697e969
Merge branch 'aalam-SNOW-2110972-allow-local-override-for-ast-collect…
sfc-gh-aalam Jun 4, 2025
ff95ad8
minor fix
sfc-gh-aalam Jun 4, 2025
452f9ec
Merge branch 'aalam-SNOW-2110972-allow-local-override-for-ast-collect…
sfc-gh-aalam Jun 4, 2025
670295d
update changelog
sfc-gh-aalam Jun 4, 2025
8b12c3e
fix unit tests
sfc-gh-aalam Jun 4, 2025
19b20ff
Merge branch 'aalam-SNOW-2110972-allow-local-override-for-ast-collect…
sfc-gh-aalam Jun 4, 2025
24ff32c
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
sfc-gh-aalam Jun 6, 2025
acb68c9
Merge branch 'aalam-SNOW-2084165-add-error-trace' of github.com:snowf…
sfc-gh-aalam Jun 6, 2025
a53c60c
undo minor err
sfc-gh-aalam Jun 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
- Added support for ignoring surrounding whitespace in the XML element using `ignoreSurroundingWhitespace` option.
- Added support for parameter `return_dataframe` in `Session.call`, which can be used to set the return type of the functions to a `DataFrame` object.
- Added a new argument to `Dataframe.describe` called `strings_include_math_stats` that triggers `stddev` and `mean` to be calculated for String columns.
- Added debuggability improvements to show a trace of most recent dataframe transformations if an operation leads to a `SnowparkSQLException`. Enable it using `snowflake.snowpark.context.configure_development_features()`. This feature also depends on AST collection to be enabled in the session which can be done using `session.ast_enabled = True`.
- Improved the error message for `Session.write_pandas()` and `Session.create_dataframe()` when the input pandas DataFrame does not have a column.
- Added support for retrieving `Edge.properties` when retrieving lineage from `DGQL` in `DataFrame.lineage.trace`.
- Added a parameter `table_exists` to `DataFrameWriter.save_as_table` that allows specifying if a table already exists. This allows skipping a table lookup that can be expensive.
Expand Down
56 changes: 49 additions & 7 deletions src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#
import copy
import difflib
from logging import getLogger
import re
import sys
import uuid
Expand Down Expand Up @@ -32,6 +33,9 @@
TableFunctionRelation,
TableFunctionJoin,
)
from snowflake.snowpark._internal.debug_utils import (
get_df_transform_trace_message,
)

if TYPE_CHECKING:
from snowflake.snowpark._internal.analyzer.select_statement import (
Expand Down Expand Up @@ -134,6 +138,8 @@
else:
from collections.abc import Iterable

_logger = getLogger(__name__)


class SnowflakePlan(LogicalPlan):
class Decorator:
Expand All @@ -147,7 +153,15 @@ class Decorator:

@staticmethod
def wrap_exception(func):
"""This wrapper is used to wrap snowflake connector ProgrammingError into SnowparkSQLException.
It also adds additional debug information to the raised exception when possible.
"""

def wrap(*args, **kwargs):
from snowflake.snowpark.context import (
_enable_dataframe_trace_on_error,
)

try:
return func(*args, **kwargs)
except snowflake.connector.errors.ProgrammingError as e:
Expand All @@ -158,9 +172,35 @@ def wrap(*args, **kwargs):
query = getattr(e, "query", None)
tb = sys.exc_info()[2]
assert e.msg is not None

# extract df_ast_id, stmt_cache from args
df_ast_id, stmt_cache = None, None
for arg in args:
if isinstance(arg, SnowflakePlan):
df_ast_id = arg.df_ast_id
stmt_cache = arg.session._ast_batch._bind_stmt_cache
break
Comment on lines +178 to +182
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible that there are multiple plans in the args? and if so, is the left most plan guaranteed to be the most recent plan?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have some code below:

children = [
                            arg for arg in args if isinstance(arg, SnowflakePlan)
                        ]

which means there will be multiple plans ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible for args to have multiple SnowflakePlans but only the first arg is relevant to the failure. This is not obvious and we have to see the usage of this decorator to be sure of this.

The decorator is used for the following functions

  1. SnowflakePlan._analyze_attributes link
  2. Selectable._analyze_attributes link
  3. SnowflakePlanBuilder.build link
  4. SnowflakePlanBuilder.build_binary link
  5. ServerConnection.get_result_set link

Out of these only build and build_binary can have multiple snowflake plans. For each of them, the source of failure can only possible come from describe queries. But we have already wrapped SnowflakePlan._analyze_attributes and Selectable._analyze_attributes and they can have at most one SnowflakePlan. Outside of describe queries, the wrapper will be triggered to wrap programming exception on getting full result for example for .show() or .collect(). For these cases, we have wrapped get_result_set which only has one SnowflakePlan arg. IMO, the wrapper of build and build_binary are redundant. @sfc-gh-jdu did you notice any other case where parsing plans in these functions is actually giving you more information?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, thanks for the context.
I think we can put a brief context into the comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df_transform_debug_trace = None
try:
if (
_enable_dataframe_trace_on_error
and df_ast_id is not None
and stmt_cache is not None
):
df_transform_debug_trace = get_df_transform_trace_message(
df_ast_id, stmt_cache
)
except Exception as trace_error:
# If we encounter an error when getting the df_transform_debug_trace,
# we will ignore the error and not add the debug trace to the error message.
_logger.info(
f"Error when getting the df_transform_debug_trace: {trace_error}"
)
pass

if "unexpected 'as'" in e.msg.lower():
ne = SnowparkClientExceptionMessages.SQL_PYTHON_REPORT_UNEXPECTED_ALIAS(
query
query, debug_context=df_transform_debug_trace
)
raise ne.with_traceback(tb) from None
elif e.sqlstate == "42000" and "invalid identifier" in e.msg:
Expand All @@ -171,7 +211,7 @@ def wrap(*args, **kwargs):
)
if not match: # pragma: no cover
ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
e
e, debug_context=df_transform_debug_trace
)
raise ne.with_traceback(tb) from None
col = match.group(1)
Expand All @@ -193,7 +233,9 @@ def wrap(*args, **kwargs):
unaliased_cols[0] if unaliased_cols else "<colname>"
)
ne = SnowparkClientExceptionMessages.SQL_PYTHON_REPORT_INVALID_ID(
orig_col_name, query
orig_col_name,
query,
debug_context=df_transform_debug_trace,
)
raise ne.with_traceback(tb) from None
elif (
Expand All @@ -210,7 +252,7 @@ def wrap(*args, **kwargs):
> 1
):
ne = SnowparkClientExceptionMessages.SQL_PYTHON_REPORT_JOIN_AMBIGUOUS(
col, col, query
col, col, query, debug_context=df_transform_debug_trace
)
raise ne.with_traceback(tb) from None
else:
Expand All @@ -220,7 +262,7 @@ def wrap(*args, **kwargs):
)
if not match: # pragma: no cover
ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
e
e, debug_context=df_transform_debug_trace
)
raise ne.with_traceback(tb) from None
col = match.group(1)
Expand Down Expand Up @@ -282,7 +324,7 @@ def add_single_quote(string: str) -> str:

e.msg = f"{e.msg}\n{msg}"
ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
e
e, debug_context=df_transform_debug_trace
)
raise ne.with_traceback(tb) from None
elif e.sqlstate == "42601" and "SELECT with no columns" in e.msg:
Expand Down Expand Up @@ -329,7 +371,7 @@ def search_read_file_node(
raise ne.with_traceback(tb) from None

ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
e
e, debug_context=df_transform_debug_trace
)
raise ne.with_traceback(tb) from None

Expand Down
3 changes: 1 addition & 2 deletions src/snowflake/snowpark/_internal/ast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,7 @@ def with_src_position(
# Once we've stepped out of the snowpark package, we should be in the code of interest.
# However, the code of interest may execute in an environment that is not accessible via the filesystem.
# e.g. Jupyter notebooks, REPLs, calls to exec, etc.
filename = frame.f_code.co_filename if frame is not None else ""
if frame is None or not Path(filename).is_file():
if frame is None:
src.file = __intern_string("")
return expr_ast

Expand Down
200 changes: 200 additions & 0 deletions src/snowflake/snowpark/_internal/debug_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
#

from functools import cached_property
import os
import sys
from typing import Dict, List, Optional
import itertools

from snowflake.snowpark._internal.ast.batch import get_dependent_bind_ids
from snowflake.snowpark._internal.ast.utils import __STRING_INTERNING_MAP__
import snowflake.snowpark._internal.proto.generated.ast_pb2 as proto

UNKNOWN_FILE = "__UNKNOWN_FILE__"
SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH = (
"SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH"
)


class DataFrameTraceNode:
"""A node representing a dataframe operation in the DAG that represents the lineage of a DataFrame."""

def __init__(self, batch_id: int, stmt_cache: Dict[int, proto.Stmt]) -> None:
self.batch_id = batch_id
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I would argue that this isn't meant to be batch ID anymore. Within each Python session that imports the Snowpark module, each AST ID for a Table or Dataframe will be a UID.

self.stmt_cache = stmt_cache

@cached_property
def children(self) -> set[int]:
"""Returns the batch_ids of the children of this node."""
return get_dependent_bind_ids(self.stmt_cache[self.batch_id])

def get_src(self) -> Optional[proto.SrcPosition]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the hybrid client prototype we are using a slightly different method to get the source location; by just using inspect to walk the stack to the appropriate source location. We have to do this because modin is not using any of the AST stuff, but it's also relatively straight forward.

I sort of want to use your debugging tool for snowpandas as well; but we may want to refactor this so we don't require any of the protobuf work.

def get_user_source_location(group: str) -> dict[str, str]:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about using this function?

def context_manager_code_location(frame_info, func) -> Tuple[str, int]:

Essentially we seem to have three approaches to this problem. I'm /less/ of a fan of the AST because it doesn't help pandas for this type of debugging. but it seems like we might be able to consolidate w/ the open telemetry approach.

"""The source Stmt of the DataFrame described by the batch_id."""
stmt = self.stmt_cache[self.batch_id]
api_call = stmt.bind.expr.WhichOneof("variant")
return (
getattr(stmt.bind.expr, api_call).src
if api_call and getattr(stmt.bind.expr, api_call).HasField("src")
else None
)

def _read_file(
self, filename, start_line, end_line, start_column, end_column
) -> str:
"""Read the relevant code snippets of where the DataFrame was created. The filename given here
must have read permissions for the executing user."""
with open(filename) as f:
code_lines = []
if sys.version_info >= (3, 11):
# Skip to start_line and read only the required lines
lines = itertools.islice(f, start_line - 1, end_line)
code_lines = list(lines)
if start_line == end_line:
code_lines[0] = code_lines[0][start_column:end_column]
else:
code_lines[0] = code_lines[0][start_column:]
code_lines[-1] = code_lines[-1][:end_column]
else:
# For python 3.9/3.10, we do not extract the end line from the source code
# so we just read the start line and return.
for line in itertools.islice(f, start_line - 1, start_line):
code_lines.append(line)

code_lines = [line.rstrip() for line in code_lines]
return "\n".join(code_lines)

@cached_property
def source_id(self) -> str:
"""Unique identifier of the location of the DataFrame creation in the source code."""
src = self.get_src()
if src is None: # pragma: no cover
return ""

fileno = src.file
start_line = src.start_line
start_column = src.start_column
end_line = src.end_line
end_column = src.end_column
return f"{fileno}:{start_line}:{start_column}-{end_line}:{end_column}"

def get_source_snippet(self) -> str:
"""Read the source file and extract the snippet where the dataframe is created."""
src = self.get_src()
if src is None: # pragma: no cover
return "No source"

# get the latest mapping of fileno to filename
_fileno_to_filename_map = {v: k for k, v in __STRING_INTERNING_MAP__.items()}
fileno = src.file
filename = _fileno_to_filename_map.get(fileno, UNKNOWN_FILE)

start_line = src.start_line
end_line = src.end_line
start_column = src.start_column
end_column = src.end_column

# Build the code identifier to find the operations where the DataFrame was created
if sys.version_info >= (3, 11):
code_identifier = (
f"{filename}|{start_line}:{start_column}-{end_line}:{end_column}"
)
else:
code_identifier = f"{filename}|{start_line}"

if filename != UNKNOWN_FILE and os.access(filename, os.R_OK):
# If the file is readable, read the code snippet
code = self._read_file(
filename, start_line, end_line, start_column, end_column
)
return f"{code_identifier}: {code}"
return code_identifier # pragma: no cover


def _get_df_transform_trace(
batch_id: int,
stmt_cache: Dict[int, proto.Stmt],
) -> List[DataFrameTraceNode]:
"""Helper function to get the transform trace of the dataframe involved in the exception.
It gathers the lineage in the following way:

1. Start by creating a DataFrameTraceNode for the given batch_id.
2. We use BFS to traverse the lineage using the node created in 1. as the first layer.
3. During each iteration, we check if the node's source_id has been visited. If not,
we add it to the visited set and append its source format to the trace. This step
is needed to avoid source_id added multiple times in lineage due to loops.
4. We then explore the next layer by adding the children of the current node to the
next layer. We check if the child ID has been visited and if not, we add it to the
visited set and append the DataFrameTraceNode for it to the next layer.
5. We repeat this process until there are no more nodes to explore.

Args:
batch_id: The batch ID of the dataframe involved in the exception.
stmt_cache: The statement cache of the session.

Returns:
A list of DataFrameTraceNode objects representing the transform trace of the dataframe.
"""
visited_batch_id = set()
visited_source_id = set()

visited_batch_id.add(batch_id)
curr = [DataFrameTraceNode(batch_id, stmt_cache)]
lineage = []

while curr:
next: List[DataFrameTraceNode] = []
for node in curr:
# tracing updates
source_id = node.source_id
if source_id not in visited_source_id:
visited_source_id.add(source_id)
lineage.append(node)

# explore next layer
for child_id in node.children:
if child_id in visited_batch_id:
continue
visited_batch_id.add(child_id)
next.append(DataFrameTraceNode(child_id, stmt_cache))

curr = next

return lineage


def get_df_transform_trace_message(
df_ast_id: int, stmt_cache: Dict[int, proto.Stmt]
) -> Optional[str]:
"""Get the transform trace message for the dataframe involved in the exception.

Args:
df_ast_id: The AST ID of the dataframe involved in the exception.
stmt_cache: The statement cache of the session.

Returns:
A string representing the transform trace message.
"""
df_transform_trace_nodes = _get_df_transform_trace(df_ast_id, stmt_cache)
if len(df_transform_trace_nodes) == 0: # pragma: no cover
return None

df_transform_trace_length = len(df_transform_trace_nodes)
show_trace_length = int(
os.environ.get(SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH, 5)
)

debug_info_lines = [
"\n\n--- Additional Debug Information ---\n",
f"Trace of the most recent dataframe operations associated with the error (total {df_transform_trace_length}):\n",
]
for node in df_transform_trace_nodes[:show_trace_length]:
debug_info_lines.append(node.get_source_snippet())
if df_transform_trace_length > show_trace_length:
debug_info_lines.append(
f"... and {df_transform_trace_length - show_trace_length} more.\nYou can increase "
f"the lineage length by setting {SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH} "
"environment variable."
)
return "\n".join(debug_info_lines)
Loading
Loading