From 4eb0fe082a4b11ac0776e4db92ea2fd56ea03f7c Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Wed, 5 Nov 2025 18:13:33 -0500 Subject: [PATCH 1/7] Add `_wchar` functions on Windows to fix unicode paths --- src/lib.rs | 43 ++++++++++++++++++++++++++++++++++++++----- taglib-sys/src/lib.rs | 9 ++++++++- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 21bb379..85ebbc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,6 +242,24 @@ impl Drop for File { impl File { /// Creates a new `taglib::File` for the given `filename`. pub fn new>(path: P) -> Result { + #[cfg(target_os = "windows")] + { + + use std::os::windows::ffi::OsStrExt; + let filename = path.as_ref().to_path_buf(); + let wide = filename.as_os_str().encode_wide(); + let mut wide: Vec<_> = wide.collect(); + wide.push(0); // Null terminate. + let f = unsafe { ll::taglib_file_new_wchar(wide.as_ptr()) }; + if f.is_null() { + return Err(FileError::InvalidFile); + } + + return Ok(File { raw: f }); + } + + #[cfg_attr(target_os = "windows", allow(unreachable_code))] + let filename = path.as_ref().to_str().ok_or(FileError::InvalidFileName)?; let filename_c = CString::new(filename).ok().ok_or(FileError::InvalidFileName)?; let filename_c_ptr = filename_c.as_ptr(); @@ -256,13 +274,28 @@ impl File { /// Creates a new `taglib::File` for the given `filename` and type of file. pub fn new_type(filename: &str, filetype: FileType) -> Result { - let filename_c = match CString::new(filename) { - Ok(s) => s, - _ => return Err(FileError::InvalidFileName), + #[cfg(target_os = "windows")] + let f = { + use std::os::windows::ffi::OsStrExt; + use std::ffi::OsStr; + + let str = OsStr::new(filename); + let wide = str.encode_wide(); + let mut wide: Vec<_> = wide.collect(); + wide.push(0); // Null terminate. + unsafe { ll::taglib_file_new_type_wchar(wide.as_ptr(), filetype as u32) } }; - let filename_c_ptr = filename_c.as_ptr(); - let f = unsafe { ll::taglib_file_new_type(filename_c_ptr, filetype as u32) }; + #[cfg(not(target_os = "windows"))] + let f = { + let filename_c = match CString::new(filename) { + Ok(s) => s, + _ => return Err(FileError::InvalidFileName), + }; + + let filename_c_ptr = filename_c.as_ptr(); + unsafe { ll::taglib_file_new_type(filename_c_ptr, filetype as u32) } + }; if f.is_null() { return Err(FileError::InvalidFile); } diff --git a/taglib-sys/src/lib.rs b/taglib-sys/src/lib.rs index 774b319..3e155ae 100644 --- a/taglib-sys/src/lib.rs +++ b/taglib-sys/src/lib.rs @@ -21,7 +21,7 @@ #![allow(non_camel_case_types)] extern crate libc; -use libc::{c_int, c_uint, c_char, c_void}; +use libc::{c_char, c_int, c_uint, c_void, wchar_t}; // Public types; these are all opaque pointer types pub type TagLib_File = c_void; @@ -45,10 +45,17 @@ pub const TAGLIB_FILE_ASF: TagLib_FileType = 9; // tag_c.h extern "C" { pub fn taglib_file_new(filename: *const c_char) -> *mut TagLib_File; + #[cfg(target_os = "windows")] + pub fn taglib_file_new_wchar(filename: *const wchar_t) -> *mut TagLib_File; pub fn taglib_file_new_type( filename: *const c_char, filetype: TagLib_FileType, ) -> *mut TagLib_File; + #[cfg(target_os = "windows")] + pub fn taglib_file_new_type_wchar( + filename: *const wchar_t, + filetype: TagLib_FileType, + ) -> *mut TagLib_File; pub fn taglib_file_is_valid(file: *mut TagLib_File) -> TagLib_Bool; pub fn taglib_file_free(file: *mut TagLib_File); pub fn taglib_file_save(file: *mut TagLib_File) -> TagLib_Bool; From 7b77d5d13341254cf6655400611a330d5f08823c Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Wed, 5 Nov 2025 19:34:02 -0500 Subject: [PATCH 2/7] Wchar conversions: Clean up code slightly --- src/lib.rs | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 85ebbc8..d0a3936 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -239,18 +239,23 @@ impl Drop for File { } } +#[cfg(target_os = "windows")] +fn path_to_vec_wchar>(path: P) -> Vec { + use std::os::windows::ffi::OsStrExt; + let as_os_str = path.as_ref().as_os_str(); + + as_os_str.encode_wide() + .chain(std::iter::once(0)) + .collect() +} + impl File { /// Creates a new `taglib::File` for the given `filename`. pub fn new>(path: P) -> Result { #[cfg(target_os = "windows")] { - - use std::os::windows::ffi::OsStrExt; - let filename = path.as_ref().to_path_buf(); - let wide = filename.as_os_str().encode_wide(); - let mut wide: Vec<_> = wide.collect(); - wide.push(0); // Null terminate. - let f = unsafe { ll::taglib_file_new_wchar(wide.as_ptr()) }; + let wchar = path_to_vec_wchar(path); + let f = unsafe { ll::taglib_file_new_wchar(wchar.as_ptr()) }; if f.is_null() { return Err(FileError::InvalidFile); } @@ -276,14 +281,8 @@ impl File { pub fn new_type(filename: &str, filetype: FileType) -> Result { #[cfg(target_os = "windows")] let f = { - use std::os::windows::ffi::OsStrExt; - use std::ffi::OsStr; - - let str = OsStr::new(filename); - let wide = str.encode_wide(); - let mut wide: Vec<_> = wide.collect(); - wide.push(0); // Null terminate. - unsafe { ll::taglib_file_new_type_wchar(wide.as_ptr(), filetype as u32) } + let wchar = path_to_vec_wchar(filename); + unsafe { ll::taglib_file_new_type_wchar(wchar.as_ptr(), filetype as u32) } }; #[cfg(not(target_os = "windows"))] From bcaef44d6880f5dedd44793ced64e52e21f6e37c Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Thu, 6 Nov 2025 11:48:40 -0500 Subject: [PATCH 3/7] Add slightly nicer approach on Unix --- src/lib.rs | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d0a3936..8aa9055 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -239,6 +239,8 @@ impl Drop for File { } } +/// On Windows, we need to convert the paths into wchar_t*, essentially, and +/// use the matching APIs. #[cfg(target_os = "windows")] fn path_to_vec_wchar>(path: P) -> Vec { use std::os::windows::ffi::OsStrExt; @@ -249,6 +251,30 @@ fn path_to_vec_wchar>(path: P) -> Vec { .collect() } +/// On Unix platforms, converting a Path to a CString can be done simply by +/// using the as_bytes() method from OsStrExt. +/// +/// Returns a FileError if the path contains an internal NUL. +#[cfg(unix)] +fn path_to_cstring>(path: P) -> Result { + use std::os::unix::ffi::OsStrExt; + CString::new(path.as_ref().as_os_str().as_bytes()) + .map_err(|_| FileError::InvalidFileName) +} + +/// On non-Windows, non-Unix platforms, it's unclear what exactly the byte +/// encoding should be. We will assume it is UTF-8 as a common denominator, +/// and attempt to get a valid UTF-8 string out of the input, returning +/// FileError::InvalidFileName if it is not UTF-8. +/// +/// HOWEVER! This will NOT work correctly if the internal path encoding on +/// that system is NOT actually UTF-8! Beware! +#[cfg(not(unix))] +fn path_to_cstring>(path: P) -> Result { + let filename = path.as_ref().to_str().ok_or(FileError::InvalidFileName)?; + CString::new(filename).ok().ok_or(FileError::InvalidFileName) +} + impl File { /// Creates a new `taglib::File` for the given `filename`. pub fn new>(path: P) -> Result { @@ -263,13 +289,9 @@ impl File { return Ok(File { raw: f }); } - #[cfg_attr(target_os = "windows", allow(unreachable_code))] - - let filename = path.as_ref().to_str().ok_or(FileError::InvalidFileName)?; - let filename_c = CString::new(filename).ok().ok_or(FileError::InvalidFileName)?; - let filename_c_ptr = filename_c.as_ptr(); + let filename_c = path_to_cstring(path)?; - let f = unsafe { ll::taglib_file_new(filename_c_ptr) }; + let f = unsafe { ll::taglib_file_new(filename_c.as_ptr()) }; if f.is_null() { return Err(FileError::InvalidFile); } From b0ff245804e7400d9d995739be2d3cdec1dbdbde Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Thu, 6 Nov 2025 11:50:00 -0500 Subject: [PATCH 4/7] Better platform-dependent structure for File::new() --- src/lib.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8aa9055..2860006 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -278,20 +278,20 @@ fn path_to_cstring>(path: P) -> Result { impl File { /// Creates a new `taglib::File` for the given `filename`. pub fn new>(path: P) -> Result { - #[cfg(target_os = "windows")] - { - let wchar = path_to_vec_wchar(path); - let f = unsafe { ll::taglib_file_new_wchar(wchar.as_ptr()) }; - if f.is_null() { - return Err(FileError::InvalidFile); + let f = { + #[cfg(windows)] + { + let wchar = path_to_vec_wchar(path); + unsafe { ll::taglib_file_new_wchar(wchar.as_ptr()) } } - return Ok(File { raw: f }); - } - - let filename_c = path_to_cstring(path)?; - - let f = unsafe { ll::taglib_file_new(filename_c.as_ptr()) }; + #[cfg(not(windows))] + { + let filename_c = path_to_cstring(path)?; + unsafe { ll::taglib_file_new(filename_c.as_ptr()) } + } + }; + if f.is_null() { return Err(FileError::InvalidFile); } From e1d17cb36b9de426829d217ebdcb73c059234d4f Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Thu, 6 Nov 2025 11:52:43 -0500 Subject: [PATCH 5/7] Make the API for File::new & File::new_type more consistent --- src/lib.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2860006..6a69bb3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -269,7 +269,7 @@ fn path_to_cstring>(path: P) -> Result { /// /// HOWEVER! This will NOT work correctly if the internal path encoding on /// that system is NOT actually UTF-8! Beware! -#[cfg(not(unix))] +#[cfg(all(not(unix), not(windows)))] fn path_to_cstring>(path: P) -> Result { let filename = path.as_ref().to_str().ok_or(FileError::InvalidFileName)?; CString::new(filename).ok().ok_or(FileError::InvalidFileName) @@ -277,17 +277,17 @@ fn path_to_cstring>(path: P) -> Result { impl File { /// Creates a new `taglib::File` for the given `filename`. - pub fn new>(path: P) -> Result { + pub fn new>(filename: P) -> Result { let f = { #[cfg(windows)] { - let wchar = path_to_vec_wchar(path); + let wchar = path_to_vec_wchar(filename); unsafe { ll::taglib_file_new_wchar(wchar.as_ptr()) } } #[cfg(not(windows))] { - let filename_c = path_to_cstring(path)?; + let filename_c = path_to_cstring(filename)?; unsafe { ll::taglib_file_new(filename_c.as_ptr()) } } }; @@ -300,23 +300,23 @@ impl File { } /// Creates a new `taglib::File` for the given `filename` and type of file. - pub fn new_type(filename: &str, filetype: FileType) -> Result { - #[cfg(target_os = "windows")] + pub fn new_type>(filename: P, filetype: FileType) -> Result { + // Convert filetype for ABI. + let filetype = filetype as u32; let f = { - let wchar = path_to_vec_wchar(filename); - unsafe { ll::taglib_file_new_type_wchar(wchar.as_ptr(), filetype as u32) } - }; - - #[cfg(not(target_os = "windows"))] - let f = { - let filename_c = match CString::new(filename) { - Ok(s) => s, - _ => return Err(FileError::InvalidFileName), - }; + #[cfg(windows)] + { + let wchar = path_to_vec_wchar(filename); + unsafe { ll::taglib_file_new_type_wchar(wchar.as_ptr(), filetype) } + } - let filename_c_ptr = filename_c.as_ptr(); - unsafe { ll::taglib_file_new_type(filename_c_ptr, filetype as u32) } + #[cfg(not(windows))] + { + let filename_c = path_to_cstring(filename)?; + unsafe { ll::taglib_file_new_type(filename_c.as_ptr(), filetype) } + } }; + if f.is_null() { return Err(FileError::InvalidFile); } From 3ca5246b28b56a080c1221fd6f9ee2e4364def0f Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Thu, 6 Nov 2025 11:53:44 -0500 Subject: [PATCH 6/7] Fix warnings for elided lifetimes --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6a69bb3..c732ad4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -325,7 +325,7 @@ impl File { } /// Returns the `taglib::Tag` instance for the given file. - pub fn tag(&self) -> Result { + pub fn tag(&self) -> Result, FileError> { let res = unsafe { ll::taglib_file_tag(self.raw) }; if res.is_null() { @@ -344,7 +344,7 @@ impl File { } /// Returns the `taglib::AudioProperties` instance for the given file. - pub fn audioproperties(&self) -> Result { + pub fn audioproperties(&self) -> Result, FileError> { let res = unsafe { ll::taglib_file_audioproperties(self.raw) }; if res.is_null() { From 3b45d728bf9c8699633fc7c444c82569c88326af Mon Sep 17 00:00:00 2001 From: Ben Wall Date: Thu, 6 Nov 2025 12:00:50 -0500 Subject: [PATCH 7/7] Safety check in path_to_vec_wchar: Check for internal NULs --- src/lib.rs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c732ad4..bbd264e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,13 +242,23 @@ impl Drop for File { /// On Windows, we need to convert the paths into wchar_t*, essentially, and /// use the matching APIs. #[cfg(target_os = "windows")] -fn path_to_vec_wchar>(path: P) -> Vec { +fn path_to_vec_wchar>(path: P) -> Result, FileError> { use std::os::windows::ffi::OsStrExt; let as_os_str = path.as_ref().as_os_str(); - as_os_str.encode_wide() + let wchar: Vec<_> = as_os_str.encode_wide() .chain(std::iter::once(0)) - .collect() + .collect(); + + // For the filename to be valid, it must not have any internal 0s (i.e. any + // internal nul-terminators). + for encoded in &wchar[0..wchar.len() - 1] { + if *encoded == 0 { + return Err(FileError::InvalidFileName); + } + } + + Ok(wchar) } /// On Unix platforms, converting a Path to a CString can be done simply by @@ -281,7 +291,7 @@ impl File { let f = { #[cfg(windows)] { - let wchar = path_to_vec_wchar(filename); + let wchar = path_to_vec_wchar(filename)?; unsafe { ll::taglib_file_new_wchar(wchar.as_ptr()) } } @@ -306,7 +316,7 @@ impl File { let f = { #[cfg(windows)] { - let wchar = path_to_vec_wchar(filename); + let wchar = path_to_vec_wchar(filename)?; unsafe { ll::taglib_file_new_type_wchar(wchar.as_ptr(), filetype) } }