From 14c8556ff6fcf92f295ca39208e615dfe2cef594 Mon Sep 17 00:00:00 2001 From: hategan Date: Sun, 23 Feb 2025 22:35:25 -0800 Subject: [PATCH 1/6] Fix launcher error extraction --- src/psij/launchers/script_based_launcher.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/psij/launchers/script_based_launcher.py b/src/psij/launchers/script_based_launcher.py index 9d11f932..505b3c0e 100644 --- a/src/psij/launchers/script_based_launcher.py +++ b/src/psij/launchers/script_based_launcher.py @@ -204,4 +204,7 @@ def is_launcher_failure(self, output: str) -> bool: def get_launcher_failure_message(self, output: str) -> str: """See :func:`~psij.Launcher.get_launcher_failure_message`.""" - return '\n'.join(output.split('\n')[:-2]) + # If, according to the above, it is a launcher failure, then + # the magic line should not be present (aka, all of the output + # is the failure). + return output From 937a7d7d97ab44fb18b6407b363338a4668c375a Mon Sep 17 00:00:00 2001 From: hategan Date: Sun, 23 Feb 2025 22:36:11 -0800 Subject: [PATCH 2/6] In tests, when checking for completion, don't bail out when one of the outputs isn't found, but attempt to read both. --- tests/_test_tools.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/_test_tools.py b/tests/_test_tools.py index a5666af9..b1d2edc8 100644 --- a/tests/_test_tools.py +++ b/tests/_test_tools.py @@ -28,8 +28,13 @@ def _read_file(path: Optional[Path]) -> str: if path is None: return '' - with open(path, 'r') as f: - return f.read() + try: + with open(path, 'r') as f: + return f.read() + except FileNotFoundError as ex: + return '' + except Exception as ex: + return f'' def assert_completed(job: Job, status: Optional[JobStatus], attached: bool = False) -> None: From 8a291d1e921b0ced3e77ab509067e03fba1fea4d Mon Sep 17 00:00:00 2001 From: hategan Date: Sun, 23 Feb 2025 22:37:03 -0800 Subject: [PATCH 3/6] Some mpirun deployments will add spurious messages to the stream and `-v` is not enough to suppress them, so use tagged output. --- src/psij/launchers/scripts/mpi_launch.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/psij/launchers/scripts/mpi_launch.sh b/src/psij/launchers/scripts/mpi_launch.sh index 153fc2b6..f5a92d54 100644 --- a/src/psij/launchers/scripts/mpi_launch.sh +++ b/src/psij/launchers/scripts/mpi_launch.sh @@ -15,12 +15,21 @@ fi pre_launch +filter_out() { + sed -nE 's/^\[[^]]+\]:(.*)/\1/p' +} + +filter_err() { + sed -nE 's/^\[[^]]+\]:(.*)/\1/p' +} + set +e if [ "$IS_OPENMPI_5" == "1" ]; then - # there is no -q parameter in OMPI 5 - mpirun --oversubscribe -n $_PSI_J_PROCESS_COUNT "$@" 1>$_PSI_J_STDOUT 2>$_PSI_J_STDERR <$_PSI_J_STDIN + mpirun --oversubscribe --output TAG -n $_PSI_J_PROCESS_COUNT "$@" \ + 1> >(filter_out > $_PSI_J_STDOUT) 2> >(filter_err > $_PSI_J_STDERR) <$_PSI_J_STDIN elif [ "$IS_OPENMPI" == "1" ]; then - mpirun --oversubscribe -q -n $_PSI_J_PROCESS_COUNT "$@" 1>$_PSI_J_STDOUT 2>$_PSI_J_STDERR <$_PSI_J_STDIN + mpirun --oversubscribe --tag-output -q -n $_PSI_J_PROCESS_COUNT "$@" \ + 1> >(filter_out > "$_PSI_J_STDOUT") 2> >(filter_err > $_PSI_J_STDERR) <$_PSI_J_STDIN else mpirun -n $_PSI_J_PROCESS_COUNT "$@" 1>$_PSI_J_STDOUT 2>$_PSI_J_STDERR <$_PSI_J_STDIN fi From 388577b4bf8f53aa418b63184b6df6a2fbc63c0a Mon Sep 17 00:00:00 2001 From: hategan Date: Sun, 23 Feb 2025 22:37:18 -0800 Subject: [PATCH 4/6] Add test for stderr redirection. --- tests/test_executor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_executor.py b/tests/test_executor.py index 50f9ebf3..ac6bef0e 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -36,6 +36,22 @@ def test_simple_job_redirect(execparams: ExecutorTestParams) -> None: assert contents == '_x_' +def test_stderr_redirect(execparams: ExecutorTestParams) -> None: + _make_test_dir() + with TemporaryDirectory(dir=Path.home() / '.psij' / 'test') as td: + outp = Path(td, 'stderr.txt') + job = Job(JobSpec(executable='/bin/bash', arguments=['-c', 'echo -n _x_ 1>&2'], + stderr_path=outp)) + ex = _get_executor_instance(execparams, job) + ex.submit(job) + status = job.wait(timeout=_get_timeout(execparams)) + assert_completed(job, status) + f = outp.open("r") + contents = f.read() + f.close() + assert contents == '_x_' + + def test_attach(execparams: ExecutorTestParams) -> None: job1 = Job(JobSpec(executable='/bin/sleep', arguments=['1'])) ex = _get_executor_instance(execparams, job1) From 9620c4283e052160dd80844b1548edf2e321fefc Mon Sep 17 00:00:00 2001 From: hategan Date: Sun, 23 Feb 2025 22:44:10 -0800 Subject: [PATCH 5/6] pep8 --- tests/_test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/_test_tools.py b/tests/_test_tools.py index b1d2edc8..f780081f 100644 --- a/tests/_test_tools.py +++ b/tests/_test_tools.py @@ -31,7 +31,7 @@ def _read_file(path: Optional[Path]) -> str: try: with open(path, 'r') as f: return f.read() - except FileNotFoundError as ex: + except FileNotFoundError: return '' except Exception as ex: return f'' From 9504745e151241e4f3a68a4cd37098a819e63802 Mon Sep 17 00:00:00 2001 From: hategan Date: Tue, 25 Feb 2025 09:39:26 -0800 Subject: [PATCH 6/6] It appears that mpirun from 5 adds a space after the `:`. --- src/psij/launchers/scripts/mpi_launch.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/psij/launchers/scripts/mpi_launch.sh b/src/psij/launchers/scripts/mpi_launch.sh index f5a92d54..31c45233 100644 --- a/src/psij/launchers/scripts/mpi_launch.sh +++ b/src/psij/launchers/scripts/mpi_launch.sh @@ -23,10 +23,18 @@ filter_err() { sed -nE 's/^\[[^]]+\]:(.*)/\1/p' } +filter_out_5() { + sed -nE 's/^\[[^]]+\]: (.*)/\1/p' +} + +filter_err_5() { + sed -nE 's/^\[[^]]+\]: (.*)/\1/p' +} + set +e if [ "$IS_OPENMPI_5" == "1" ]; then mpirun --oversubscribe --output TAG -n $_PSI_J_PROCESS_COUNT "$@" \ - 1> >(filter_out > $_PSI_J_STDOUT) 2> >(filter_err > $_PSI_J_STDERR) <$_PSI_J_STDIN + 1> >(filter_out_5 > $_PSI_J_STDOUT) 2> >(filter_err_5 > $_PSI_J_STDERR) <$_PSI_J_STDIN elif [ "$IS_OPENMPI" == "1" ]; then mpirun --oversubscribe --tag-output -q -n $_PSI_J_PROCESS_COUNT "$@" \ 1> >(filter_out > "$_PSI_J_STDOUT") 2> >(filter_err > $_PSI_J_STDERR) <$_PSI_J_STDIN