diff --git a/Cargo.lock b/Cargo.lock index 28bc2732ac..b3bc8c3221 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,6 +88,8 @@ dependencies = [ "quote", "regex", "rustc-hash", + "serde", + "serde_json", "shlex", "syn", ] @@ -120,6 +122,8 @@ dependencies = [ "prettyplease", "proc-macro2", "regex", + "serde", + "serde_json", "shlex", "similar", "syn", @@ -346,6 +350,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "libc" version = "0.2.174" @@ -552,6 +562,55 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index edb0d35371..ea29b5a038 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,8 @@ quickcheck = "1.0" quote = { version = "1", default-features = false } regex = { version = "1.5.3", default-features = false } rustc-hash = "2.1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" shlex = "1" similar = "2.2.1" syn = "2.0" diff --git a/bindgen-tests/Cargo.toml b/bindgen-tests/Cargo.toml index 77a28ca3cb..671e79bbe1 100644 --- a/bindgen-tests/Cargo.toml +++ b/bindgen-tests/Cargo.toml @@ -13,6 +13,8 @@ owo-colors.workspace = true prettyplease = { workspace = true, features = ["verbatim"] } proc-macro2.workspace = true regex.workspace = true +serde.workspace = true +serde_json.workspace = true shlex.workspace = true similar = { workspace = true, features = ["inline"] } syn.workspace = true diff --git a/bindgen-tests/tests/expectations/tests/emit-symbol-list-bar.rs b/bindgen-tests/tests/expectations/tests/emit-symbol-list-bar.rs new file mode 100644 index 0000000000..ebe0d0d60d --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/emit-symbol-list-bar.rs @@ -0,0 +1,22 @@ +include!("emit-symbol-list-foo.rs"); +pub type bar_int_t = foo_int_t; +#[repr(C)] +pub struct bar_struct { + pub foo: foo_struct, + pub z: ::std::os::raw::c_int, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of bar_struct"][::std::mem::size_of::() - 12usize]; + ["Alignment of bar_struct"][::std::mem::align_of::() - 4usize]; + [ + "Offset of field: bar_struct::foo", + ][::std::mem::offset_of!(bar_struct, foo) - 0usize]; + ["Offset of field: bar_struct::z"][::std::mem::offset_of!(bar_struct, z) - 8usize]; +}; +unsafe extern "C" { + pub fn bar_function(arg: *mut foo_struct, arg2: *mut bar_struct); +} +unsafe extern "C" { + pub static mut bar_global_var: ::std::os::raw::c_int; +} diff --git a/bindgen-tests/tests/expectations/tests/emit-symbol-list-foo.rs b/bindgen-tests/tests/expectations/tests/emit-symbol-list-foo.rs new file mode 100644 index 0000000000..8624543542 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/emit-symbol-list-foo.rs @@ -0,0 +1,20 @@ +pub type foo_int_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct foo_struct { + pub x: ::std::os::raw::c_int, + pub y: f32, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of foo_struct"][::std::mem::size_of::() - 8usize]; + ["Alignment of foo_struct"][::std::mem::align_of::() - 4usize]; + ["Offset of field: foo_struct::x"][::std::mem::offset_of!(foo_struct, x) - 0usize]; + ["Offset of field: foo_struct::y"][::std::mem::offset_of!(foo_struct, y) - 4usize]; +}; +unsafe extern "C" { + pub fn foo_function(arg: ::std::os::raw::c_int); +} +unsafe extern "C" { + pub static mut foo_global_var: ::std::os::raw::c_int; +} diff --git a/bindgen-tests/tests/expectations/tests/emit-symbol-list-foo.symbols b/bindgen-tests/tests/expectations/tests/emit-symbol-list-foo.symbols new file mode 100644 index 0000000000..54f5a4682a --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/emit-symbol-list-foo.symbols @@ -0,0 +1,19 @@ +{ + "types": [ + "foo_int_t", + "int", + "foo_struct", + "int", + "float", + "_bindgen_ty_id_9", + "int", + "void", + "int" + ], + "functions": [ + "foo_function" + ], + "vars": [ + "foo_global_var" + ] +} \ No newline at end of file diff --git a/bindgen-tests/tests/expectations/tests/emit-symbol-list.rs b/bindgen-tests/tests/expectations/tests/emit-symbol-list.rs new file mode 100644 index 0000000000..3c7e54f11a --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/emit-symbol-list.rs @@ -0,0 +1,23 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +pub type my_int_t = ::std::os::raw::c_int; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct my_struct { + pub x: ::std::os::raw::c_int, + pub y: f32, +} +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of my_struct"][::std::mem::size_of::() - 8usize]; + ["Alignment of my_struct"][::std::mem::align_of::() - 4usize]; + ["Offset of field: my_struct::x"] + [::std::mem::offset_of!(my_struct, x) - 0usize]; + ["Offset of field: my_struct::y"] + [::std::mem::offset_of!(my_struct, y) - 4usize]; +}; +unsafe extern "C" { + pub fn my_function(arg: ::std::os::raw::c_int); +} +unsafe extern "C" { + pub static mut my_global_var: ::std::os::raw::c_int; +} diff --git a/bindgen-tests/tests/expectations/tests/emit-symbol-list.symbols b/bindgen-tests/tests/expectations/tests/emit-symbol-list.symbols new file mode 100644 index 0000000000..7722a40302 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/emit-symbol-list.symbols @@ -0,0 +1,11 @@ +type my_int_t +type int +type my_struct +type int +type float +type _bindgen_ty_id_9 +type int +type void +function my_function +type int +var my_global_var diff --git a/bindgen-tests/tests/headers/emit-symbol-list-bar.h b/bindgen-tests/tests/headers/emit-symbol-list-bar.h new file mode 100644 index 0000000000..544fabd490 --- /dev/null +++ b/bindgen-tests/tests/headers/emit-symbol-list-bar.h @@ -0,0 +1,15 @@ +// Dependent library header (like "bar" in the issue) +// bar depends on foo + +#include "emit-symbol-list-foo.h" + +typedef foo_int_t bar_int_t; + +struct bar_struct { + struct foo_struct foo; + int z; +}; + +void bar_function(struct foo_struct* arg, struct bar_struct* arg2); + +extern int bar_global_var; diff --git a/bindgen-tests/tests/headers/emit-symbol-list-foo.h b/bindgen-tests/tests/headers/emit-symbol-list-foo.h new file mode 100644 index 0000000000..c77b51bbbc --- /dev/null +++ b/bindgen-tests/tests/headers/emit-symbol-list-foo.h @@ -0,0 +1,12 @@ +// Base library header (like "foo" in the issue) + +typedef int foo_int_t; + +struct foo_struct { + int x; + float y; +}; + +void foo_function(int arg); + +extern int foo_global_var; diff --git a/bindgen-tests/tests/headers/emit-symbol-list.h b/bindgen-tests/tests/headers/emit-symbol-list.h new file mode 100644 index 0000000000..1d59bd1390 --- /dev/null +++ b/bindgen-tests/tests/headers/emit-symbol-list.h @@ -0,0 +1,12 @@ +// Test header for emit-symbol-list feature + +typedef int my_int_t; + +struct my_struct { + int x; + float y; +}; + +void my_function(int arg); + +extern int my_global_var; diff --git a/bindgen-tests/tests/tests.rs b/bindgen-tests/tests/tests.rs index 6e3c358d3e..c756c5c2c9 100644 --- a/bindgen-tests/tests/tests.rs +++ b/bindgen-tests/tests/tests.rs @@ -774,3 +774,129 @@ fn test_wrap_static_fns() { .unwrap(); } } + +/// Test the emit-symbol-list feature end-to-end, matching the use case from issue #3326: +/// - foo-sys generates bindings and emits a symbol list +/// - bar-sys (which depends on foo) uses that symbol list to blocklist foo's symbols +#[test] +fn test_emit_symbol_list() { + use bindgen::Builder; + + let foo_header = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/headers/emit-symbol-list-foo.h" + ); + let bar_header = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/headers/emit-symbol-list-bar.h" + ); + let expected_foo_rust = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/expectations/tests/emit-symbol-list-foo.rs" + ); + let expected_foo_symbols = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/expectations/tests/emit-symbol-list-foo.symbols" + ); + let expected_bar_rust = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/expectations/tests/emit-symbol-list-bar.rs" + ); + + let observed_symbols = tempfile::NamedTempFile::new().unwrap(); + + // Step 1: Generate bindings for foo and emit symbol list + let foo_bindings = Builder::default() + .header(foo_header) + .clang_arg("--target=x86_64-unknown-linux") + .emit_symbol_list(observed_symbols.path()) + .generate() + .expect("Failed to generate foo bindings"); + + // Check foo's generated Rust bindings + let observed_foo_rust = format_code(foo_bindings.to_string()).unwrap(); + let expected_foo_rust_content = + fs::read_to_string(expected_foo_rust).unwrap(); + let expected_foo_rust_formatted = + format_code(&expected_foo_rust_content).unwrap(); + + if observed_foo_rust != expected_foo_rust_formatted { + error_diff_mismatch( + &observed_foo_rust, + &expected_foo_rust_formatted, + Some(Path::new(foo_header)), + Path::new(expected_foo_rust), + ) + .unwrap(); + } + + // Check foo's symbol list output + let observed_symbols_content = + fs::read_to_string(observed_symbols.path()).unwrap(); + let expected_symbols_content = + fs::read_to_string(expected_foo_symbols).unwrap(); + + if observed_symbols_content != expected_symbols_content { + error_diff_mismatch( + &observed_symbols_content, + &expected_symbols_content, + Some(Path::new(foo_header)), + Path::new(expected_foo_symbols), + ) + .unwrap(); + } + + // Step 2: Generate bindings for bar, using foo's symbol list as blocklist + // Include foo's bindings so bar can reference the blocklisted types. + // + // Note: In real usage, you'd typically `use foo_sys::*;` from a separate + // crate/module rather than `include` --- we use `include` here because + // expectation files are linted independently. + let mut bar_builder = Builder::default() + .header(bar_header) + .clang_arg("--target=x86_64-unknown-linux") + .clang_arg(concat!("-I", env!("CARGO_MANIFEST_DIR"), "/tests/headers")) + .raw_line("include!(\"emit-symbol-list-foo.rs\");"); + + // Parse the JSON symbol list and add blocklist entries + #[derive(serde::Deserialize)] + struct SymbolList { + types: Vec, + functions: Vec, + vars: Vec, + } + + let symbols: SymbolList = + serde_json::from_str(&observed_symbols_content).unwrap(); + + for name in &symbols.types { + bar_builder = bar_builder.blocklist_type(format!("^{name}$")); + } + for name in &symbols.functions { + bar_builder = bar_builder.blocklist_function(format!("^{name}$")); + } + for name in &symbols.vars { + bar_builder = bar_builder.blocklist_var(format!("^{name}$")); + } + + let bar_bindings = bar_builder + .generate() + .expect("Failed to generate bar bindings"); + + // Check bar's generated Rust bindings (should NOT contain foo's symbols) + let observed_bar_rust = format_code(bar_bindings.to_string()).unwrap(); + let expected_bar_rust_content = + fs::read_to_string(expected_bar_rust).unwrap(); + let expected_bar_rust_formatted = + format_code(&expected_bar_rust_content).unwrap(); + + if observed_bar_rust != expected_bar_rust_formatted { + error_diff_mismatch( + &observed_bar_rust, + &expected_bar_rust_formatted, + Some(Path::new(bar_header)), + Path::new(expected_bar_rust), + ) + .unwrap(); + } +} diff --git a/bindgen/Cargo.toml b/bindgen/Cargo.toml index 478574edb3..054eb1c325 100644 --- a/bindgen/Cargo.toml +++ b/bindgen/Cargo.toml @@ -39,6 +39,8 @@ proc-macro2.workspace = true quote.workspace = true regex = { workspace = true, features = ["std", "unicode-perl"] } rustc-hash.workspace = true +serde.workspace = true +serde_json.workspace = true shlex.workspace = true syn = { workspace = true, features = ["full", "extra-traits", "visit-mut"] } diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index a5aa73b5d8..692fec7bc2 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -5297,6 +5297,16 @@ pub(crate) fn codegen( } } + if let Some(path) = context.options().emit_symbol_list.as_ref() { + match utils::write_symbol_list(context, path) { + Ok(()) => info!( + "Your symbol list was generated successfully into: {}", + path.display() + ), + Err(e) => warn!("{e}"), + } + } + if let Some(spec) = context.options().depfile.as_ref() { match spec.write(context.deps()) { Ok(()) => info!( @@ -5404,6 +5414,93 @@ pub(crate) mod utils { Ok(()) } + /// Write a list of generated symbols to a JSON file. + /// + /// The output format is: + /// ```json + /// { + /// "types": ["type_name", ...], + /// "functions": ["func_name", ...], + /// "vars": ["var_name", ...] + /// } + /// ``` + /// + /// The names use the same format as blocklist matching, so downstream crates + /// can directly use these names with `blocklist_type`, `blocklist_function`, + /// or `blocklist_var`. + /// + /// Only user-defined symbols (those with a source location) are included. + /// Built-in types like `int`, `void`, etc. are excluded. + pub(super) fn write_symbol_list( + context: &BindgenContext, + path: &std::path::Path, + ) -> std::io::Result<()> { + use serde::Serialize; + use std::fs::File; + use std::io::BufWriter; + + #[derive(Serialize)] + struct SymbolList { + types: Vec, + functions: Vec, + vars: Vec, + } + + // Create parent directories if they don't exist + if let Some(dir) = path.parent() { + if !dir.as_os_str().is_empty() && !dir.exists() { + std::fs::create_dir_all(dir)?; + } + } + + let file = File::create(path)?; + let writer = BufWriter::new(file); + + let codegen_items = context.codegen_items(); + + let mut symbols = SymbolList { + types: Vec::new(), + functions: Vec::new(), + vars: Vec::new(), + }; + + for (id, item) in context.items() { + // Skip items that won't be generated + if !codegen_items.contains(&id) { + continue; + } + + // Skip items without a source location (built-in types) + if item.location().is_none() { + continue; + } + + // Use the same name format as blocklist matching + let path = item.path_for_allowlisting(context); + let name = path[1..].join("::"); + + // Categorize by item type + match item.kind() { + crate::ir::item_kind::ItemKind::Module(_) => { + // Skip modules - they're organizational, not symbols + } + crate::ir::item_kind::ItemKind::Type(_) => { + symbols.types.push(name); + } + crate::ir::item_kind::ItemKind::Function(_) => { + symbols.functions.push(name); + } + crate::ir::item_kind::ItemKind::Var(_) => { + symbols.vars.push(name); + } + } + } + + serde_json::to_writer_pretty(writer, &symbols)?; + + Ok(()) + } + pub(super) fn wrap_as_variadic_fn( ctx: &BindgenContext, signature: &FunctionSig, diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index 5304862584..81b0dd3aeb 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -319,6 +319,9 @@ struct BindgenCommand { /// Dump a graphviz dot file to PATH. #[arg(long, value_name = "PATH")] emit_ir_graphviz: Option, + /// Emit a list of generated symbols to PATH. + #[arg(long, value_name = "PATH")] + emit_symbol_list: Option, /// Enable support for C++ namespaces. #[arg(long)] enable_cxx_namespaces: bool, @@ -636,6 +639,7 @@ where emit_clang_ast, emit_ir, emit_ir_graphviz, + emit_symbol_list, enable_cxx_namespaces, disable_name_namespacing, disable_nested_struct_naming, @@ -947,6 +951,7 @@ where emit_clang_ast => |b, _| b.emit_clang_ast(), emit_ir => |b, _| b.emit_ir(), emit_ir_graphviz, + emit_symbol_list, enable_cxx_namespaces => |b, _| b.enable_cxx_namespaces(), enable_function_attribute_detection => |b, _| b.enable_function_attribute_detection(), disable_name_namespacing => |b, _| b.disable_name_namespacing(), diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index baa541c5ac..83a46e1ed9 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -794,6 +794,27 @@ options! { as_args: "--emit-ir-graphviz", }, + /// Output path for a symbol list file (one symbol per line). + emit_symbol_list: Option { + methods: { + /// Set the path for a file where `bindgen` will emit a list of all generated symbols. + /// + /// This is useful for downstream crates that depend on bindings generated by this + /// crate: they can read this file and use it to blocklist symbols that have already + /// been bound. + /// + /// The output format is one symbol per line. Each line has the format: + /// `KIND NAME` where KIND is one of `type`, `function`, or `var`. + /// + /// This list is not emitted by default. + pub fn emit_symbol_list>(mut self, path: P) -> Builder { + self.options.emit_symbol_list = Some(path.into()); + self + } + }, + as_args: "--emit-symbol-list", + }, + /// Whether we should emulate C++ namespaces with Rust modules. enable_cxx_namespaces: bool { methods: {