From cb7b4a1c01ada4cd463bafbf1f756bec12b255ad Mon Sep 17 00:00:00 2001 From: Egmont Koblinger Date: Wed, 10 Dec 2025 12:12:00 +0100 Subject: [PATCH 1/5] Ticket #4480: Read the entire working directory from the subshell If the subshell writes the working directory slowly, previously we could read its beginning and stop there. Signed-off-by: Egmont Koblinger --- src/subshell/common.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/subshell/common.c b/src/subshell/common.c index 8b05c43fd..16d3819fb 100644 --- a/src/subshell/common.c +++ b/src/subshell/common.c @@ -918,10 +918,23 @@ feed_subshell (int how, gboolean fail_on_error) } else if (FD_ISSET (subshell_pipe[READ], &read_set)) - // Read the subshell's CWD and capture its prompt + // Read the subshell's CWD and later capture its prompt. + // We're inside an "else" branch to checking data on subshell_pty, and CWD is only written + // to subshell_pipe after completing the subshell command, therefore we have already read + // the entire output of the command. { + // CWD in subshell_pipe may not be complete yet (ticket #4480). Wait until the shell + // completes writing it. + // Note: this deadlocks if pwd is longer than a Unix pipe's buffer size, but this + // should not be a problem in practice, pipe size is typically at least 16kB. + synchronize (); + const ssize_t bytes = read (subshell_pipe[READ], subshell_cwd, sizeof (subshell_cwd)); + // Note: if CWD is longer than our buffer size then there are two bugs. We silently + // chop the value and use that directory, instead of explicitly raising an error. + // We also don't flush the remaining data from the pipe, breaking the next CWD read. + if (bytes <= 0) { tcsetattr (STDOUT_FILENO, TCSANOW, &shell_mode); @@ -932,8 +945,6 @@ feed_subshell (int how, gboolean fail_on_error) subshell_cwd[(size_t) bytes - 1] = '\0'; // Squash the final '\n' - synchronize (); - clear_subshell_prompt_string (); should_read_new_subshell_prompt = TRUE; subshell_ready = TRUE; From 2b658bf3eae4f70c99d8bc059c1cd35452f1a64f Mon Sep 17 00:00:00 2001 From: Egmont Koblinger Date: Wed, 10 Dec 2025 12:12:10 +0100 Subject: [PATCH 2/5] Remove a commented and outdated busybox code This piece of code was never live in mc. It would work around a BusyBox bug that was fixed in 2012. Signed-off-by: Egmont Koblinger --- src/subshell/common.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/subshell/common.c b/src/subshell/common.c index 16d3819fb..f31621b31 100644 --- a/src/subshell/common.c +++ b/src/subshell/common.c @@ -1308,13 +1308,6 @@ subshell_name_quote (const char *s) quote_cmd_start = "(printf '%b' '"; quote_cmd_end = "')"; } - /* TODO: When BusyBox printf is fixed, get rid of this "else if", see - https://lists.busybox.net/pipermail/busybox/2012-March/077460.html */ - /* else if (subshell_type == ASH_BUSYBOX) - { - quote_cmd_start = "\"`echo -en '"; - quote_cmd_end = "'`\""; - } */ else { quote_cmd_start = "\"`printf '%b' '"; From c2126aedf5b4e280d6d2d661ce27e767090b6f07 Mon Sep 17 00:00:00 2001 From: Egmont Koblinger Date: Wed, 10 Dec 2025 12:12:20 +0100 Subject: [PATCH 3/5] Ticket #2325, #4480: Improve entering a directory with special characters Handle trailing '\n' character in the directory name. Make sure to construct the cd command in physical lines no longer than 250 bytes so that we don't hit the small limit of the kernel's cooked mode tty buffer size on some platforms. tcsh still has problems entering directories with special characters (including invalid UTF-8) in their name. Other shells are now believed to handle any directory name properly. Signed-off-by: Egmont Koblinger --- src/subshell/common.c | 176 +++++++++++++++++++++++++++++++++--------- 1 file changed, 139 insertions(+), 37 deletions(-) diff --git a/src/subshell/common.c b/src/subshell/common.c index f31621b31..962a7a9a1 100644 --- a/src/subshell/common.c +++ b/src/subshell/common.c @@ -152,6 +152,10 @@ gboolean should_read_new_subshell_prompt; /* Length of the buffer for all I/O with the subshell */ #define PTY_BUFFER_SIZE BUF_MEDIUM // Arbitrary; but keep it >= 80 +/* Assume that the kernel's cooked mode buffer size might not be larger than this. + * On Solaris it's 256 bytes, see ticket #4480. Shave off a few bytes, just in case. */ +#define COOKED_MODE_BUFFER_SIZE 250 + /*** file scope type declarations ****************************************************************/ /* For pipes */ @@ -1278,68 +1282,162 @@ init_subshell_precmd (void) /* --------------------------------------------------------------------------------------------- */ /** - * Carefully quote directory name to allow entering any directory safely, - * no matter what weird characters it may contain in its name. - * NOTE: Treat directory name an untrusted data, don't allow it to cause - * executing any commands in the shell. Escape all control characters. - * Use following technique: - * - * printf(1) with format string containing a single conversion specifier, - * "b", and an argument which contains a copy of the string passed to - * subshell_name_quote() with all characters, except digits and letters, - * replaced by the backslash-escape sequence \0nnn, where "nnn" is the - * numeric value of the character converted to octal number. + * Carefully construct a 'cd' command that allows entering any directory safely. Two things to + * watch out for: * - * cd "`printf '%b' 'ABC\0nnnDEF\0nnnXYZ'`" + * Enter any directory safely, no matter what special bytes its name contains (special shell + * characters, control characters, non-printable characters, invalid UTF-8 etc.). + * NOTE: Treat directory name as untrusted data, don't allow it to cause executing any commands in + * the shell! * - * N.B.: Use single quotes for conversion specifier to work around - * tcsh 6.20+ parser breakage, see ticket #3852 for the details. + * Keep physical lines under COOKED_MODE_BUFFER_SIZE bytes, as in some kernels the buffer for the + * tty line's cooked mode is quite small. If the directory name is longer, we have to somehow + * construct a multiline cd command. */ static GString * -subshell_name_quote (const char *s) +create_cd_command (const char *s) { GString *ret; const char *n; - const char *quote_cmd_start, *quote_cmd_end; + const char *quote_cmd_start, *before_wrap, *after_wrap, *quote_cmd_end; + const char *escape_fmt; + int line_length; + char buf[BUF_TINY]; - if (mc_global.shell->type == SHELL_FISH) + if (mc_global.shell->type == SHELL_BASH || mc_global.shell->type == SHELL_ZSH) + { + /* + * bash and zsh: Use $'...\ooo...' notation (ooo is three octal digits). + * + * Use octal because hex mode likes to go multibyte. + * + * Line wrapping (if necessary) with a trailing backslash outside of quotes. + */ + quote_cmd_start = " cd $'"; + before_wrap = "'\\"; + after_wrap = "$'"; + quote_cmd_end = "'"; + escape_fmt = "\\%03o"; + } + else if (mc_global.shell->type == SHELL_FISH) { - quote_cmd_start = "(printf '%b' '"; - quote_cmd_end = "')"; + /* + * fish: Use ...\xHH... notation (HH is two hex digits). + * + * Its syntax requires that escapes go outside of quotes, and the rest is also safe there. + * Use hex because it only supports octal for low (up to octal 177 / decimal 127) bytes. + * + * Line wrapping (if necessary) with a trailing backslash. + */ + quote_cmd_start = " cd "; + before_wrap = "\\"; + after_wrap = ""; + quote_cmd_end = ""; + escape_fmt = "\\x%02X"; + } + else if (mc_global.shell->type == SHELL_TCSH) + { + /* + * tcsh: Use $'...\ooo...' notation (ooo is three octal digits). + * + * It doesn't support string contants spanning across multipline lines (a trailing + * backslash introduces a space), therefore construct the string in a tmp variable. + * Nevertheless emit a trailing backslash so it's just one line in its history. + * + * The :q modifier is needed to preserve newlines and other special chars. + * + * Note that tcsh's variables aren't binary clean, in its UTF-8 mode they are enforced + * to be valid UTF-8. So unfortunately we cannot enter every weird directory. + */ + quote_cmd_start = " set _mc_newdir=$'"; + before_wrap = "'; \\"; + after_wrap = " set _mc_newdir=${_mc_newdir:q}$'"; + quote_cmd_end = "'; cd ${_mc_newdir:q}"; + escape_fmt = "\\%03o"; } else { - quote_cmd_start = "\"`printf '%b' '"; - quote_cmd_end = "'`\""; + /* + * Fallback / POSIX sh: Construct a command like this: + * + * _mc_newdir_=`printf '%b_' 'ABC\0oooDEF\0oooXYZ'` # ooo are three octal digits + * cd "${_mc_newdir_%_}" + * + * Command substitution removes trailing '\n's, hence the added and later removed '_'. + * + * Wrapping to new line with a trailing backslash outside of the innermost single quotes. + */ + quote_cmd_start = " _mc_newdir_=`printf '%b_' '"; + before_wrap = "'\\"; + after_wrap = "'"; + quote_cmd_end = "'`; cd \"${_mc_newdir_%_}\""; + escape_fmt = "\\0%03o"; } + const int quote_cmd_start_len = strlen (quote_cmd_start); + const int before_wrap_len = strlen (before_wrap); + const int after_wrap_len = strlen (after_wrap); + const int quote_cmd_end_len = strlen (quote_cmd_end); + + /* Measure the length of an escaped byte. In the unlikely case that it won't be uniform in some + * future shell, have an upper estimate by measuring the largest byte. */ + const int escaped_char_len = sprintf (buf, escape_fmt, 0xFF); + ret = g_string_sized_new (64); + // Copy the beginning of the command to the buffer + g_string_append_len (ret, quote_cmd_start, quote_cmd_start_len); + // Prevent interpreting leading '-' as a switch for 'cd' if (s[0] == '-') g_string_append (ret, "./"); - // Copy the beginning of the command to the buffer - g_string_append (ret, quote_cmd_start); + /* Sending physical lines over a certain small limit causes problems on some platforms, + * see ticket #4480. Make sure to wrap in time. See how large we can grow so that an + * additional line wrapping or closing string still fits. */ + const int max_length = COOKED_MODE_BUFFER_SIZE - MAX (before_wrap_len, quote_cmd_end_len); + g_assert (max_length >= 64); // make sure we have enough room to breathe - /* - * Print every character except digits and letters as a backslash-escape - * sequence of the form \0nnn, where "nnn" is the numeric value of the - * character converted to octal number. - */ + line_length = ret->len; + + /* Print every character except digits and letters as a backslash-escape sequence. */ for (const char *su = s; su[0] != '\0'; su = n) { n = str_cget_next_char_safe (su); if (str_isalnum (su)) + { + if (line_length + (n - su) > max_length) + { + // wrap to next physical line + g_string_append_len (ret, before_wrap, before_wrap_len); + g_string_append_c (ret, '\n'); + g_string_append_len (ret, after_wrap, after_wrap_len); + line_length = after_wrap_len; + } + // append character g_string_append_len (ret, su, (size_t) (n - su)); + line_length += (n - su); + } else for (size_t c = 0; c < (size_t) (n - su); c++) - g_string_append_printf (ret, "\\0%03o", (unsigned char) su[c]); + { + if (line_length + escaped_char_len > max_length) + { + // wrap to next physical line + g_string_append_len (ret, before_wrap, before_wrap_len); + g_string_append_c (ret, '\n'); + g_string_append_len (ret, after_wrap, after_wrap_len); + line_length = after_wrap_len; + } + // append escaped byte + g_string_append_printf (ret, escape_fmt, (unsigned char) su[c]); + line_length += escaped_char_len; + } } - g_string_append (ret, quote_cmd_end); + g_string_append_len (ret, quote_cmd_end, quote_cmd_end_len); return ret; } @@ -1420,23 +1518,27 @@ do_subshell_chdir (const vfs_path_t *vpath, gboolean update_prompt) feed_subshell (QUIETLY, TRUE); } - /* The initial space keeps this out of the command history (in bash - because we set "HISTCONTROL=ignorespace") */ - write_all (mc_global.tty.subshell_pty, " cd ", 4); - if (vpath == NULL) - write_all (mc_global.tty.subshell_pty, "/", 1); + { + /* The initial space keeps this out of the command history (in bash + because we set "HISTCONTROL=ignorespace") */ + const char *cmd = " cd /"; + write_all (mc_global.tty.subshell_pty, cmd, sizeof (cmd) - 1); + } else { const char *translate = vfs_translate_path (vfs_path_as_str (vpath)); if (translate == NULL) - write_all (mc_global.tty.subshell_pty, ".", 1); + { + const char *cmd = " cd ."; + write_all (mc_global.tty.subshell_pty, cmd, sizeof (cmd) - 1); + } else { GString *temp; - temp = subshell_name_quote (translate); + temp = create_cd_command (translate); write_all (mc_global.tty.subshell_pty, temp->str, temp->len); g_string_free (temp, TRUE); } From 04bfcccab4465d027a1010dbe8a8500c4f5fe8e2 Mon Sep 17 00:00:00 2001 From: Egmont Koblinger Date: Wed, 10 Dec 2025 12:12:30 +0100 Subject: [PATCH 4/5] Ticket #4480: Don't escape safe shell chars and multibyte UTF-8 Don't escape safe shell characters commonly used in paths, such as '/', '.', '-' and '_'. Don't escape multibyte UTF-8 characters. Escaping each byte separately in string assignments doesn't work in tcsh. The previous commit introduces a regression here: tcsh cannot enter directories whose name is valid UTF-8 but contains non-alphanumeric UTF-8 characters. It used to work because printf would glue them together correctly, but we no longer use printf and command substitution because that breaks newlines. Signed-off-by: Egmont Koblinger --- src/subshell/common.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/subshell/common.c b/src/subshell/common.c index 962a7a9a1..e6bf73a4d 100644 --- a/src/subshell/common.c +++ b/src/subshell/common.c @@ -1406,8 +1406,15 @@ create_cd_command (const char *s) { n = str_cget_next_char_safe (su); - if (str_isalnum (su)) + // tcsh doesn't support defining strings with UTF-8 characters broken down into individual + // bytes each escaped in octal, such as $'\303\251' instead of 'é'. So copy all valid + // multibyte UTF-8 characters as-is, without escaping; they aren't special shell characters + // in any shell and don't need protection. + // Also don't escape frequent safe filename characters like '/', '.' and such. + if ((unsigned char) su[0] >= 0x80 || g_ascii_isalnum (su[0]) + || strchr ("/.-_", su[0]) != NULL) { + // It's a safe character that we copy as-is. if (line_length + (n - su) > max_length) { // wrap to next physical line @@ -1421,6 +1428,8 @@ create_cd_command (const char *s) line_length += (n - su); } else + // It's a special shell character, or maybe an invalid UTF-8 segment. + // Escape each byte separately. for (size_t c = 0; c < (size_t) (n - su); c++) { if (line_length + escaped_char_len > max_length) From 2de51255fd2c92bb2b3d1c791891adb4fe653ce5 Mon Sep 17 00:00:00 2001 From: Egmont Koblinger Date: Wed, 10 Dec 2025 12:12:50 +0100 Subject: [PATCH 5/5] Ticket #4884: mc-wrapper.{sh,fish}: Preserve '\n' characters Signed-off-by: Egmont Koblinger --- contrib/mc-wrapper.fish.in | 3 ++- contrib/mc-wrapper.sh.in | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/contrib/mc-wrapper.fish.in b/contrib/mc-wrapper.fish.in index bd75d73cc..ea5db4117 100644 --- a/contrib/mc-wrapper.fish.in +++ b/contrib/mc-wrapper.fish.in @@ -11,7 +11,8 @@ set --local MC_STATUS 0 @bindir@/mc -P "$MC_PWD_FILE" $argv || set MC_STATUS $status if test -r "$MC_PWD_FILE" - set --local MC_PWD (cat $MC_PWD_FILE) + # This construct preserves internal and trailing '\n' characters. + read -l -z MC_PWD < (cat $MC_PWD_FILE | psub) if test -n $MC_PWD && test $MC_PWD != $PWD && test -d $MC_PWD cd $MC_PWD || true end diff --git a/contrib/mc-wrapper.sh.in b/contrib/mc-wrapper.sh.in index 550ad6f60..66766c38f 100644 --- a/contrib/mc-wrapper.sh.in +++ b/contrib/mc-wrapper.sh.in @@ -10,7 +10,9 @@ MC_STATUS=0 @bindir@/mc -P "$MC_PWD_FILE" "$@" || MC_STATUS=$? if test -r "$MC_PWD_FILE"; then - MC_PWD="`cat "$MC_PWD_FILE"`" + # Command substitution removes trailing '\n's, hence the added and later removed '_'. + MC_PWD="`cat "$MC_PWD_FILE"; printf '_'`" + MC_PWD="${MC_PWD%_}" if test -n "$MC_PWD" && test "$MC_PWD" != "$PWD" && test -d "$MC_PWD"; then cd "$MC_PWD" || true fi