feat(spark): Refactor unit tests to sdk coding standards #293
feat(spark): Refactor unit tests to sdk coding standards #293digvijay-y wants to merge 5 commits intokubeflow:mainfrom
Conversation
|
[APPROVALNOTIFIER] This PR is NOT APPROVED This pull-request has been approved by: The full list of commands accepted by this bot can be found here. DetailsNeeds approval from an approver in each of these files:Approvers can indicate their approval by writing |
|
🎉 Welcome to the Kubeflow SDK! 🎉 Thanks for opening your first PR! We're happy to have you as part of our community 🚀 Here's what happens next:
Join the community:
Feel free to ask questions in the comments if you need any help or clarification! |
There was a problem hiding this comment.
Pull request overview
This PR refactors the SparkClient unit tests to follow the Kubeflow SDK coding standards by converting individual test classes to @pytest.mark.parametrize format with TestCase dataclass patterns. The refactoring aligns with issue #273 and follows the approach established in PR #225.
Changes:
- Introduced TestCase dataclass for parametrized test structure
- Converted all test methods from individual test classes to parametrized tests
- Added mock improvements for handling both existing and non-existent sessions
- Consolidated test organization into four main parametrized test functions
| call_args = mock_backend.create_and_connect.call_args | ||
| assert call_args.kwargs["options"] is None | ||
|
|
||
| print(f"✓ {test_case.name} succeeded") |
There was a problem hiding this comment.
Print statements are not used in other test files in the codebase. Remove these debugging print statements to maintain consistency with the codebase testing conventions.
| if test_case.expected_status == SUCCESS: | ||
| if "operation" in test_case.config: | ||
| if test_case.config["operation"] == "delete": | ||
| spark_client.delete_session(test_case.config["session_name"]) | ||
| mock_backend.delete_session.assert_called_once_with( | ||
| test_case.config["session_name"] | ||
| ) | ||
| elif test_case.config["operation"] == "logs": | ||
| result = list(spark_client.get_session_logs(test_case.config["session_name"])) | ||
| assert len(result) == test_case.expected_output | ||
| mock_backend.get_session_logs.assert_called_once_with( | ||
| test_case.config["session_name"], follow=False | ||
| ) | ||
| elif "session_name" in test_case.config: | ||
| result = spark_client.get_session(test_case.config["session_name"]) | ||
| assert result.name == test_case.expected_output | ||
| mock_backend.get_session.assert_called_with(test_case.config["session_name"]) | ||
| else: | ||
| result = spark_client.list_sessions() | ||
| assert len(result) == test_case.expected_output | ||
| mock_backend.list_sessions.assert_called_once() | ||
| print(f"✓ {test_case.name} succeeded") | ||
| else: | ||
| with pytest.raises(Exception) as exc_info: | ||
| spark_client.get_session(test_case.config["session_name"]) | ||
| assert test_case.expected_error in str(exc_info.value) | ||
| print(f"✓ {test_case.name} failed as expected") |
There was a problem hiding this comment.
The test structure should use try/except blocks to check expected_status instead of if/else conditionals, following the established pattern in other test files.
| if "options" in test_case.config: | ||
| options = test_case.config["options"] | ||
| spark_client.connect(options=options) | ||
| mock_backend.create_and_connect.assert_called_once() | ||
| call_args = mock_backend.create_and_connect.call_args | ||
| assert call_args.kwargs["options"] == options | ||
|
|
||
| def test_connect_without_options_auto_generates(self, spark_client, mock_backend): | ||
| """C19: Connect without options auto-generates name via backend.""" | ||
| mock_session = Mock() | ||
| mock_backend.create_and_connect.return_value = mock_session | ||
| else: | ||
| spark_client.connect() | ||
| mock_backend.create_and_connect.assert_called_once() | ||
| call_args = mock_backend.create_and_connect.call_args | ||
| assert call_args.kwargs["options"] is None | ||
|
|
||
| print(f"✓ {test_case.name} succeeded") |
There was a problem hiding this comment.
The test structure should use try/except blocks to check expected_status instead of if/else conditionals, following the established pattern in other test files.
| expected_output="backend_created", | ||
| ), | ||
| TestCase( | ||
| name="custom namespace initialization", | ||
| expected_status=SUCCESS, | ||
| config={"namespace": "spark"}, | ||
| expected_output="backend_created", |
There was a problem hiding this comment.
The expected_output value "backend_created" is not used in the test logic. Remove this unused field or use it to validate actual test outputs.
| expected_output="backend_created", | |
| ), | |
| TestCase( | |
| name="custom namespace initialization", | |
| expected_status=SUCCESS, | |
| config={"namespace": "spark"}, | |
| expected_output="backend_created", | |
| ), | |
| TestCase( | |
| name="custom namespace initialization", | |
| expected_status=SUCCESS, | |
| config={"namespace": "spark"}, |
| @dataclass | ||
| class TestCase: | ||
| """Test case structure for parametrized SparkClient tests.""" | ||
|
|
||
| name: str | ||
| expected_status: str | ||
| config: dict[str, Any] | ||
| expected_output: Optional[Any] = None | ||
| expected_error: Optional[str] = None | ||
| # Prevent pytest from collecting this dataclass as a test | ||
| __test__ = False | ||
|
|
||
|
|
||
| SUCCESS = "SUCCESS" | ||
| EXCEPTION = "EXCEPTION" |
There was a problem hiding this comment.
The TestCase dataclass and SUCCESS/FAILED constants should be imported from kubeflow.spark.test.common instead of being redefined. This ensures consistency across the codebase. Additionally, expected_error should be Optional[type[Exception]] instead of Optional[str] to match the established pattern.
| expected_output="options_passed", | ||
| ), | ||
| TestCase( | ||
| name="connect without options auto-generates", | ||
| expected_status=SUCCESS, | ||
| config={}, | ||
| expected_output="no_options", |
There was a problem hiding this comment.
The expected_output value "options_passed" is not used in the test logic. Remove this unused field or use it to validate actual test outputs.
| expected_output="options_passed", | |
| ), | |
| TestCase( | |
| name="connect without options auto-generates", | |
| expected_status=SUCCESS, | |
| config={}, | |
| expected_output="no_options", | |
| ), | |
| TestCase( | |
| name="connect without options auto-generates", | |
| expected_status=SUCCESS, | |
| config={}, |
| with patch("kubeflow.spark.api.spark_client.KubernetesBackend"): | ||
| client = SparkClient() | ||
| assert client.backend is not None | ||
| print(f"✓ {test_case.name} succeeded") |
There was a problem hiding this comment.
Print statements are not used in other test files in the codebase. Remove these debugging print statements to maintain consistency with the codebase testing conventions.
| def test_connect_with_url_invalid(self, spark_client): | ||
| """C04b: Connect with invalid URL raises ValueError.""" | ||
| SparkClient(backend_config=test_case.config["backend_config"]) | ||
| print(f"✓ {test_case.name} failed as expected") |
There was a problem hiding this comment.
Print statements are not used in other test files in the codebase. Remove these debugging print statements to maintain consistency with the codebase testing conventions.
| ) | ||
| def test_spark_client_connect(test_case: TestCase, spark_client): | ||
| """Test SparkClient connect method scenarios.""" | ||
| print(f"Running test: {test_case.name}") |
There was a problem hiding this comment.
Print statements are not used in other test files in the codebase. Remove these debugging print statements to maintain consistency with the codebase testing conventions.
Signed-off-by: digvijay-y <yewaredigvijay@gmail.com>
Signed-off-by: Jon Burdo <jon@jonburdo.com> Signed-off-by: digvijay-y <yewaredigvijay@gmail.com>
Signed-off-by: digvijay-y <yewaredigvijay@gmail.com>
db47042 to
5d8280e
Compare
| except Exception as e: | ||
| # If we got an exception but expected success, fail | ||
| assert test_case.expected_status == EXCEPTION, ( | ||
| f"Unexpected exception in {test_case.name}: {e}" | ||
| ) | ||
| # Validate the exception type/message if specified | ||
| if test_case.expected_error: | ||
| assert test_case.expected_error in str(e), ( | ||
| f"Expected error '{test_case.expected_error}' but got '{str(e)}'" | ||
| ) |
There was a problem hiding this comment.
expected_error is set to the string "ValueError", but the assertion checks "ValueError" in str(e); str(ValueError(...)) is just the message and typically won’t contain the class name, so this test will fail even when the correct exception is raised (prefer asserting the exception type, e.g. type(e) is ValueError, or using pytest.raises(ValueError)).
| f"Unexpected exception in {test_case.name}: {e}" | ||
| ) | ||
| # Validate the exception type/message if specified | ||
| if test_case.expected_error: |
There was a problem hiding this comment.
Same issue here: the invalid-URL case sets expected_error="ValueError" but validates it via substring match against str(e), which won’t include the exception type (use pytest.raises(ValueError, match=...) or compare type(e) to the expected exception).
| if test_case.expected_error: | |
| if test_case.expected_error: | |
| # First, allow matching by exception type name (e.g., "ValueError") | |
| if type(e).__name__ == test_case.expected_error: | |
| return | |
| # Otherwise, fall back to matching against the exception message |
| TestCase( | ||
| name="connect create session verification", | ||
| expected_status=SUCCESS, | ||
| config={}, | ||
| ), | ||
| ], | ||
| ) | ||
| def test_spark_client_connect(test_case: TestCase, spark_client): | ||
| """Test SparkClient connect method scenarios.""" | ||
|
|
||
| try: | ||
| if "url" in test_case.config: | ||
| from kubeflow.spark.backends.kubernetes.utils import validate_spark_connect_url | ||
|
|
||
| result = validate_spark_connect_url(test_case.config["url"]) | ||
| assert result == test_case.expected_output | ||
| else: | ||
| # Verify backend methods are not called initially | ||
| spark_client.backend.create_session.assert_not_called() | ||
| spark_client.backend.wait_for_session_ready.assert_not_called() | ||
|
|
There was a problem hiding this comment.
The "connect create session verification" case doesn’t call SparkClient.connect() and only asserts some backend methods haven’t been called, so it doesn’t validate any behavior and can’t fail if connect() regresses; either remove this case or make it call connect() and assert the expected backend interaction (e.g., create_and_connect is invoked).
|
/ok-to-test |
Refactor unit tests to follow Kubeflow SDK coding standards:
testcasedataclassRef #225
Fixes #273
Checklist: