diff --git a/src/psij/launchers/script_based_launcher.py b/src/psij/launchers/script_based_launcher.py index 9d11f932..505b3c0e 100644 --- a/src/psij/launchers/script_based_launcher.py +++ b/src/psij/launchers/script_based_launcher.py @@ -204,4 +204,7 @@ def is_launcher_failure(self, output: str) -> bool: def get_launcher_failure_message(self, output: str) -> str: """See :func:`~psij.Launcher.get_launcher_failure_message`.""" - return '\n'.join(output.split('\n')[:-2]) + # If, according to the above, it is a launcher failure, then + # the magic line should not be present (aka, all of the output + # is the failure). + return output diff --git a/src/psij/launchers/scripts/mpi_launch.sh b/src/psij/launchers/scripts/mpi_launch.sh index 153fc2b6..31c45233 100644 --- a/src/psij/launchers/scripts/mpi_launch.sh +++ b/src/psij/launchers/scripts/mpi_launch.sh @@ -15,12 +15,29 @@ fi pre_launch +filter_out() { + sed -nE 's/^\[[^]]+\]:(.*)/\1/p' +} + +filter_err() { + sed -nE 's/^\[[^]]+\]:(.*)/\1/p' +} + +filter_out_5() { + sed -nE 's/^\[[^]]+\]: (.*)/\1/p' +} + +filter_err_5() { + sed -nE 's/^\[[^]]+\]: (.*)/\1/p' +} + set +e if [ "$IS_OPENMPI_5" == "1" ]; then - # there is no -q parameter in OMPI 5 - mpirun --oversubscribe -n $_PSI_J_PROCESS_COUNT "$@" 1>$_PSI_J_STDOUT 2>$_PSI_J_STDERR <$_PSI_J_STDIN + mpirun --oversubscribe --output TAG -n $_PSI_J_PROCESS_COUNT "$@" \ + 1> >(filter_out_5 > $_PSI_J_STDOUT) 2> >(filter_err_5 > $_PSI_J_STDERR) <$_PSI_J_STDIN elif [ "$IS_OPENMPI" == "1" ]; then - mpirun --oversubscribe -q -n $_PSI_J_PROCESS_COUNT "$@" 1>$_PSI_J_STDOUT 2>$_PSI_J_STDERR <$_PSI_J_STDIN + mpirun --oversubscribe --tag-output -q -n $_PSI_J_PROCESS_COUNT "$@" \ + 1> >(filter_out > "$_PSI_J_STDOUT") 2> >(filter_err > $_PSI_J_STDERR) <$_PSI_J_STDIN else mpirun -n $_PSI_J_PROCESS_COUNT "$@" 1>$_PSI_J_STDOUT 2>$_PSI_J_STDERR <$_PSI_J_STDIN fi diff --git a/tests/_test_tools.py b/tests/_test_tools.py index a5666af9..f780081f 100644 --- a/tests/_test_tools.py +++ b/tests/_test_tools.py @@ -28,8 +28,13 @@ def _read_file(path: Optional[Path]) -> str: if path is None: return '' - with open(path, 'r') as f: - return f.read() + try: + with open(path, 'r') as f: + return f.read() + except FileNotFoundError: + return '' + except Exception as ex: + return f'' def assert_completed(job: Job, status: Optional[JobStatus], attached: bool = False) -> None: diff --git a/tests/test_executor.py b/tests/test_executor.py index 50f9ebf3..ac6bef0e 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -36,6 +36,22 @@ def test_simple_job_redirect(execparams: ExecutorTestParams) -> None: assert contents == '_x_' +def test_stderr_redirect(execparams: ExecutorTestParams) -> None: + _make_test_dir() + with TemporaryDirectory(dir=Path.home() / '.psij' / 'test') as td: + outp = Path(td, 'stderr.txt') + job = Job(JobSpec(executable='/bin/bash', arguments=['-c', 'echo -n _x_ 1>&2'], + stderr_path=outp)) + ex = _get_executor_instance(execparams, job) + ex.submit(job) + status = job.wait(timeout=_get_timeout(execparams)) + assert_completed(job, status) + f = outp.open("r") + contents = f.read() + f.close() + assert contents == '_x_' + + def test_attach(execparams: ExecutorTestParams) -> None: job1 = Job(JobSpec(executable='/bin/sleep', arguments=['1'])) ex = _get_executor_instance(execparams, job1)