-
Notifications
You must be signed in to change notification settings - Fork 252
Rust: merge structurally equal types in bindgen #1468
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ use wit_parser::*; | |
| #[derive(Default)] | ||
| pub struct Types { | ||
| type_info: HashMap<TypeId, TypeInfo>, | ||
| equal_types: UnionFind, | ||
| } | ||
|
|
||
| #[derive(Default, Clone, Copy, Debug)] | ||
|
|
@@ -91,6 +92,23 @@ impl Types { | |
| } | ||
| } | ||
| } | ||
| pub fn collect_equal_types(&mut self, resolve: &Resolve) { | ||
| let type_ids: Vec<_> = resolve.types.iter().map(|(id, _)| id).collect(); | ||
| for (i, &ty) in type_ids.iter().enumerate() { | ||
| // TODO: we could define a hash function for TypeDefKind to prevent the inner loop. | ||
| for &earlier in &type_ids[..i] { | ||
| if self.equal_types.find(ty) == self.equal_types.find(earlier) { | ||
| continue; | ||
| } | ||
| // The correctness of is_structurally_equal relies on the fact that | ||
| // resolve.types.iter() is in topological order. | ||
| if self.is_structurally_equal(resolve, ty, earlier) { | ||
| self.equal_types.union(ty, earlier); | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn type_info_func(&mut self, resolve: &Resolve, func: &Function, import: bool) { | ||
| let mut live = LiveTypes::default(); | ||
|
|
@@ -228,4 +246,132 @@ impl Types { | |
| None => TypeInfo::default(), | ||
| } | ||
| } | ||
| fn is_structurally_equal(&mut self, resolve: &Resolve, a: TypeId, b: TypeId) -> bool { | ||
| let a_def = &resolve.types[a].kind; | ||
| let b_def = &resolve.types[b].kind; | ||
| if self.is_resource_like_type(a_def) || self.is_resource_like_type(b_def) { | ||
| return false; | ||
| } | ||
| match (a_def, b_def) { | ||
| (TypeDefKind::Type(ta), TypeDefKind::Type(tb)) => { | ||
| // This function is called in topological order, so the equivalence | ||
| // classes of ta and tb have already been computed. We can use the representative | ||
| // TypeId to check equality, instead of recursing down. | ||
| self.types_equal(resolve, ta, tb) | ||
| } | ||
| (TypeDefKind::Record(ra), TypeDefKind::Record(rb)) => { | ||
| ra.fields.len() == rb.fields.len() | ||
| // Fields are ordered in WIT, so record {a: T, b: U} is different from {b: U, a: T} | ||
| && ra.fields.iter().zip(rb.fields.iter()).all(|(fa, fb)| { | ||
| fa.name == fb.name && self.types_equal(resolve, &fa.ty, &fb.ty) | ||
| }) | ||
| } | ||
| (TypeDefKind::Variant(va), TypeDefKind::Variant(vb)) => { | ||
| va.cases.len() == vb.cases.len() | ||
| && va.cases.iter().zip(vb.cases.iter()).all(|(ca, cb)| { | ||
| ca.name == cb.name && self.optional_types_equal(resolve, &ca.ty, &cb.ty) | ||
| }) | ||
| } | ||
| (TypeDefKind::Enum(ea), TypeDefKind::Enum(eb)) => { | ||
| ea.cases.len() == eb.cases.len() | ||
| && ea | ||
| .cases | ||
| .iter() | ||
| .zip(eb.cases.iter()) | ||
| .all(|(ca, cb)| ca.name == cb.name) | ||
| } | ||
| (TypeDefKind::Flags(fa), TypeDefKind::Flags(fb)) => { | ||
| fa.flags.len() == fb.flags.len() | ||
| && fa | ||
| .flags | ||
| .iter() | ||
| .zip(fb.flags.iter()) | ||
| .all(|(fa, fb)| fa.name == fb.name) | ||
| } | ||
| (TypeDefKind::Tuple(ta), TypeDefKind::Tuple(tb)) => { | ||
| ta.types.len() == tb.types.len() | ||
| && ta | ||
| .types | ||
| .iter() | ||
| .zip(tb.types.iter()) | ||
| .all(|(a, b)| self.types_equal(resolve, a, b)) | ||
| } | ||
| (TypeDefKind::List(la), TypeDefKind::List(lb)) => self.types_equal(resolve, la, lb), | ||
| (TypeDefKind::FixedSizeList(ta, sa), TypeDefKind::FixedSizeList(tb, sb)) => { | ||
| sa == sb && self.types_equal(resolve, ta, tb) | ||
| } | ||
| (TypeDefKind::Option(oa), TypeDefKind::Option(ob)) => self.types_equal(resolve, oa, ob), | ||
| (TypeDefKind::Result(ra), TypeDefKind::Result(rb)) => { | ||
| self.optional_types_equal(resolve, &ra.ok, &rb.ok) | ||
| && self.optional_types_equal(resolve, &ra.err, &rb.err) | ||
| } | ||
| _ => false, | ||
| } | ||
| } | ||
| fn types_equal(&mut self, resolve: &Resolve, a: &Type, b: &Type) -> bool { | ||
| match (a, b) { | ||
| (Type::Id(a), Type::Id(b)) => { | ||
| let a_def = &resolve.types[*a].kind; | ||
| let b_def = &resolve.types[*b].kind; | ||
| if self.is_resource_like_type(a_def) || self.is_resource_like_type(b_def) { | ||
| return false; | ||
| } | ||
| self.equal_types.find(*a) == self.equal_types.find(*b) | ||
| } | ||
| (Type::ErrorContext, Type::ErrorContext) => todo!(), | ||
| _ => a == b, | ||
| } | ||
| } | ||
| fn optional_types_equal( | ||
| &mut self, | ||
| resolve: &Resolve, | ||
| a: &Option<Type>, | ||
| b: &Option<Type>, | ||
| ) -> bool { | ||
| match (a, b) { | ||
| (Some(a), Some(b)) => self.types_equal(resolve, a, b), | ||
| (None, None) => true, | ||
| _ => false, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to above, can the |
||
| } | ||
| } | ||
| fn is_resource_like_type(&self, ty: &TypeDefKind) -> bool { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might work better if this function didn't exist and it were folded directly into
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not quite. It also depends on how the world import/export these interfaces. If we In the future, if we allow It would be great if we can specify the resource equality precisely in
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For For supporting I agree the way things are handled today is weird since |
||
| match ty { | ||
| TypeDefKind::Resource | TypeDefKind::Handle(_) => true, | ||
| TypeDefKind::Future(_) | TypeDefKind::Stream(_) => true, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part can be safely omitted since futures/streams are safe to deduplicate. It's just resources that can't be deduplicated.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not very familiar with futures/streams, with this WIT Can
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, all of those types are safe to merge. The |
||
| _ => false, | ||
| } | ||
| } | ||
| pub fn get_representative_type(&mut self, id: TypeId) -> TypeId { | ||
| self.equal_types.find(id) | ||
| } | ||
| } | ||
|
|
||
| #[derive(Default)] | ||
| pub struct UnionFind { | ||
| parent: HashMap<TypeId, TypeId>, | ||
| } | ||
| impl UnionFind { | ||
| fn find(&mut self, id: TypeId) -> TypeId { | ||
| // Path compression | ||
| let parent = self.parent.get(&id).copied().unwrap_or(id); | ||
| if parent != id { | ||
| let root = self.find(parent); | ||
| self.parent.insert(id, root); | ||
| root | ||
| } else { | ||
| id | ||
| } | ||
| } | ||
| fn union(&mut self, a: TypeId, b: TypeId) { | ||
| let ra = self.find(a); | ||
| let rb = self.find(b); | ||
| if ra != rb { | ||
| // Use smaller id as root for determinism | ||
| if ra < rb { | ||
| self.parent.insert(rb, ra); | ||
| } else { | ||
| self.parent.insert(ra, rb); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2326,6 +2326,38 @@ unsafe fn call_import(&mut self, _params: Self::ParamsLower, _results: *mut u8) | |
| } | ||
| } | ||
|
|
||
| pub fn type_alias_to_eqaul_type( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/eqaul/equal/ |
||
| &mut self, | ||
| id: TypeId, | ||
| eq_ty: TypeId, | ||
| from_import: Option<&WorldKey>, | ||
| ) { | ||
| assert!(self.r#gen.opts.merge_structurally_equal_types); | ||
| if let Some(name) = from_import { | ||
| let docs = Docs { | ||
| contents: Some("wit-bindgen: alias to import equal type".to_string()), | ||
| }; | ||
| let mut path = self.path_to_root(); | ||
| let import_path = crate::compute_module_path(name, self.resolve, false).join("::"); | ||
| path.push_str(&import_path); | ||
| path.push_str("::"); | ||
| for (name, mode) in self.modes_of(id) { | ||
| self.rustdoc(&docs); | ||
| self.push_str(&format!("pub type {name}")); | ||
| self.print_generics(mode.lifetime); | ||
| self.push_str(" = "); | ||
| self.push_str(&path); | ||
| self.print_tyid(eq_ty, mode); | ||
| self.push_str(";\n"); | ||
| } | ||
| } else { | ||
| let docs = Docs { | ||
| contents: Some("wit-bindgen: alias to equal type".to_string()), | ||
| }; | ||
| self.print_typedef_alias(id, &Type::Id(eq_ty), &docs); | ||
| } | ||
| } | ||
|
|
||
| fn print_typedef_alias(&mut self, id: TypeId, ty: &Type, docs: &Docs) { | ||
| for (name, mode) in self.modes_of(id) { | ||
| self.rustdoc(docs); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -274,6 +274,15 @@ pub struct Opts { | |
| #[cfg_attr(feature = "clap", clap(flatten))] | ||
| #[cfg_attr(feature = "serde", serde(flatten))] | ||
| pub async_: AsyncFilterSet, | ||
|
|
||
| /// Find all structurally equal types and only generate one type definition for | ||
| /// each equivalence class. Other types in the same class will be type aliases to the | ||
| /// generated type. This avoids clone when converting between types that are | ||
| /// structurally equal, which is useful when import and export the same interface. | ||
| /// | ||
| /// Types containing resource, future, or stream are never considered equal. | ||
| #[cfg_attr(feature = "clap", arg(long))] | ||
| pub merge_structurally_equal_types: bool, | ||
|
Comment on lines
+278
to
+285
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This I think would be reasonable to turn on by default, but are you hesitant to enable it by default?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, sure. Happy to make it on by default. Just being cautious here, not to change the production behavior.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yeah that's find with semver and such, and this is something that I've heard others asking for and I don't know why it wouldn't be the default, so I think it's reasonable to have it on-by-default. |
||
| } | ||
|
|
||
| impl Opts { | ||
|
|
@@ -973,6 +982,42 @@ macro_rules! __export_{world_name}_impl {{ | |
| .async_ | ||
| .is_async(resolve, interface, func, is_import) | ||
| } | ||
|
|
||
| // Returns the structurally equal type id if exists. If the equal type comes from the | ||
| // import of the same interface, also returns the interface key, so that we can generate | ||
| // a type alias to the import type. | ||
| fn get_equal_type_alias<'a>( | ||
| &mut self, | ||
| resolve: &Resolve, | ||
| iface_key: Option<&'a WorldKey>, | ||
| ty_id: TypeId, | ||
| ) -> Option<(TypeId, Option<&'a WorldKey>)> { | ||
| if !self.opts.merge_structurally_equal_types { | ||
| return None; | ||
| } | ||
| let ty = &resolve.types[ty_id].kind; | ||
| if matches!(ty, TypeDefKind::Type(_)) { | ||
| // preserve all primitive type and type alias definitions | ||
| return None; | ||
| } | ||
| let root = self.types.get_representative_type(ty_id); | ||
| if root != ty_id { | ||
| Some((root, None)) | ||
| } else { | ||
| let TypeOwner::Interface(iface_id) = resolve.types[ty_id].owner else { | ||
| unreachable!() | ||
| }; | ||
| // When we allow importing/exporting the same interface multiple times, we need to update this code | ||
| if !self.types.get(ty_id).has_resource | ||
| && iface_key.is_some() | ||
| && let Some(true) = self.interface_last_seen_as_import.get(&iface_id) | ||
|
Comment on lines
+1010
to
+1013
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be my memory failing me but I'm not entirely sure what these clauses are doing. Could you expand the comment to explain more what case this is handling?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This branch means that we are processing an interface that has been imported before. With the current WIT parser, it means we are importing and exporting the same interface. For export interface, if we can't find an equal type, we can still alias the type to the imported interface (for non-resource types).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense yeah, mind expanding the comment here with some of that info? |
||
| { | ||
| Some((root, iface_key)) | ||
| } else { | ||
| None | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl WorldGenerator for RustWasm { | ||
|
|
@@ -1054,6 +1099,9 @@ impl WorldGenerator for RustWasm { | |
| "// * disable-run-ctors-once-workaround" | ||
| ); | ||
| } | ||
| if self.opts.merge_structurally_equal_types { | ||
| uwriteln!(self.src_preamble, "// * merge_structurally_equal_types"); | ||
| } | ||
| if let Some(s) = &self.opts.export_macro_name { | ||
| uwriteln!(self.src_preamble, "// * export-macro-name: {s}"); | ||
| } | ||
|
|
@@ -1073,6 +1121,9 @@ impl WorldGenerator for RustWasm { | |
| uwriteln!(self.src_preamble, "// * async: {opt}"); | ||
| } | ||
| self.types.analyze(resolve); | ||
| if self.opts.merge_structurally_equal_types { | ||
| self.types.collect_equal_types(resolve); | ||
| } | ||
| self.world = Some(world); | ||
|
|
||
| let world = &resolve.worlds[world]; | ||
|
|
@@ -1105,13 +1156,14 @@ impl WorldGenerator for RustWasm { | |
| let mut to_define = Vec::new(); | ||
| for (name, ty_id) in resolve.interfaces[id].types.iter() { | ||
| let full_name = full_wit_type_name(resolve, *ty_id); | ||
| let eq_alias = self.get_equal_type_alias(resolve, None, *ty_id); | ||
| if let Some(type_gen) = self.with.get(&full_name) { | ||
| // skip type definition generation for remapped types | ||
| if type_gen.generated() { | ||
| to_define.push((name, ty_id)); | ||
| to_define.push((name, ty_id, eq_alias)); | ||
| } | ||
| } else { | ||
| to_define.push((name, ty_id)); | ||
| to_define.push((name, ty_id, eq_alias)); | ||
| } | ||
| self.generated_types.insert(full_name); | ||
| } | ||
|
|
@@ -1129,8 +1181,12 @@ impl WorldGenerator for RustWasm { | |
| return Ok(()); | ||
| } | ||
|
|
||
| for (name, ty_id) in to_define { | ||
| r#gen.define_type(&name, *ty_id); | ||
| for (name, ty_id, eq_alias) in to_define { | ||
| if let Some((alias, _)) = eq_alias { | ||
| r#gen.type_alias_to_eqaul_type(*ty_id, alias, None); | ||
| } else { | ||
| r#gen.define_type(&name, *ty_id); | ||
| } | ||
| } | ||
|
|
||
| r#gen.generate_imports(resolve.interfaces[id].functions.values(), Some(name)); | ||
|
|
@@ -1167,9 +1223,10 @@ impl WorldGenerator for RustWasm { | |
| _files: &mut Files, | ||
| ) -> Result<()> { | ||
| let mut to_define = Vec::new(); | ||
| for (name, ty_id) in resolve.interfaces[id].types.iter() { | ||
| for (ty_name, ty_id) in resolve.interfaces[id].types.iter() { | ||
| let full_name = full_wit_type_name(resolve, *ty_id); | ||
| to_define.push((name, ty_id)); | ||
| let eq_alias = self.get_equal_type_alias(resolve, Some(name), *ty_id); | ||
| to_define.push((ty_name, ty_id, eq_alias)); | ||
| self.generated_types.insert(full_name); | ||
| } | ||
|
|
||
|
|
@@ -1186,8 +1243,12 @@ impl WorldGenerator for RustWasm { | |
| return Ok(()); | ||
| } | ||
|
|
||
| for (name, ty_id) in to_define { | ||
| r#gen.define_type(&name, *ty_id); | ||
| for (ty_name, ty_id, eq_alias) in to_define { | ||
| if let Some((alias, from_import)) = eq_alias { | ||
| r#gen.type_alias_to_eqaul_type(*ty_id, alias, from_import); | ||
| } else { | ||
| r#gen.define_type(&ty_name, *ty_id); | ||
| } | ||
| } | ||
|
|
||
| let macro_name = | ||
|
|
@@ -1247,19 +1308,24 @@ impl WorldGenerator for RustWasm { | |
| let mut to_define = Vec::new(); | ||
| for (name, ty_id) in types { | ||
| let full_name = full_wit_type_name(resolve, *ty_id); | ||
| let eq_alias = self.get_equal_type_alias(resolve, None, *ty_id); | ||
| if let Some(type_gen) = self.with.get(&full_name) { | ||
| // skip type definition generation for remapped types | ||
| if type_gen.generated() { | ||
| to_define.push((name, ty_id)); | ||
| to_define.push((name, ty_id, eq_alias)); | ||
| } | ||
| } else { | ||
| to_define.push((name, ty_id)); | ||
| to_define.push((name, ty_id, eq_alias)); | ||
| } | ||
| self.generated_types.insert(full_name); | ||
| } | ||
| let mut r#gen = self.interface(Identifier::World(world), "$root", resolve, true); | ||
| for (name, ty) in to_define { | ||
| r#gen.define_type(name, *ty); | ||
| for (name, ty, eq_alias) in to_define { | ||
| if let Some((alias, _)) = eq_alias { | ||
| r#gen.type_alias_to_eqaul_type(*ty, alias, None); | ||
| } else { | ||
| r#gen.define_type(name, *ty); | ||
| } | ||
| } | ||
| let src = r#gen.finish(); | ||
| self.src.push_str(&src); | ||
|
|
@@ -1389,7 +1455,11 @@ impl WorldGenerator for RustWasm { | |
| } | ||
| } | ||
|
|
||
| fn compute_module_path(name: &WorldKey, resolve: &Resolve, is_export: bool) -> Vec<String> { | ||
| pub(crate) fn compute_module_path( | ||
| name: &WorldKey, | ||
| resolve: &Resolve, | ||
| is_export: bool, | ||
| ) -> Vec<String> { | ||
| let mut path = Vec::new(); | ||
| if is_export { | ||
| path.push("exports".to_string()); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| package example:composition; | ||
|
|
||
| let host = new test:host { ... }; | ||
| let proxy = new test:proxy { ...host, ... }; | ||
| let runner = new test:runner { ...proxy, ... }; | ||
| export runner...; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Personally I prefer to keep things exhaustive where possible to avoid
_ => { /* ... */ }catch-call cases (same with_ => falseabove)Could this be changed to soemthing like:
That way adding new types is forced to update this to ensure it handles new types appropriately.