diff --git a/Cargo.toml b/Cargo.toml
index 96caa18e2..ea2a1f7a5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -88,7 +88,7 @@ rstest = "0.26"
 serial_test = "3.1.1"
 
 bytemuck = "1.16.1"
-bytes = { version = "1.10", default-features = false }
+bytes = { version = "1.11.1", default-features = false }
 float-ord = "0.3"
 float4 = "0.1"
 float8 = { version = "0.7", default-features = false }
diff --git a/conventions.md b/conventions.md
new file mode 100644
index 000000000..3f9ad5c43
--- /dev/null
+++ b/conventions.md
@@ -0,0 +1,46 @@
+# CubeCL Naming Conventions
+
+This document describes the naming conventions used throughout the CubeCL codebase.
+
+## Tensor Dimensions
+
+- Use **`axis`** (not `dim` or `dimension`) when referring to a specific dimension of a tensor.
+  - `tensor.stride(axis)`, `tensor.shape(axis)`, `tensor.coordinate(index, axis)`
+
+## Counts
+
+- Use **`_count` suffix** (not `num_` prefix) for quantities.
+  - `streaming_multiprocessor_count`, `cpu_core_count`, `tensor_core_count`
+  - `elem_count`, `cube_count`, `meta_count`
+  - Constants: `SM_COUNT_APPROX`
+
+## Line Size
+
+- Use **`line_size`** (not `vectorization` or `vectorization_factor`) for the number of
+  elements packed into a line.
+  - `tensor.line_size()`, `find_line_size()`, `tensor_line_size_parallel()`
+  - `tensor_vectorization_factor()` remains only as a deprecated compatibility alias.
+
+## Tensor Ordering
+
+- **`RowMajor`** / **`ColMajor`** are the primary names for matrix layouts.
+- **`DecreasingOrder`** / **`IncreasingOrder`** are available as aliases:
+  - `MatrixLayout::IncreasingOrder` = `MatrixLayout::ColMajor`
+  - `MatrixLayout::DecreasingOrder` = `MatrixLayout::RowMajor`
+
+## Coordinates and Offsets
+
+- Use **`offset`** for linear buffer/slice positions.
+- Use **`coordinate`** for multi-dimensional tensor positions.
+
+## Topology Constants
+
+- Use **`POS`** suffix for positions: `UNIT_POS`, `CUBE_POS`, `PLANE_POS`.
+- Use **`DIM`** suffix for topology dimensions: `CUBE_DIM`, `PLANE_DIM`.
+
+## Type Naming
+
+- Use **postfix suffixes** for type categories:
+  - `*Error` for error types: `LineSizeError`, `LaunchError`
+  - `*Strategy` for strategy types: `ReadingStrategy`
+  - `*Expand` for expand/meta types: `TensorExpand`, `SliceExpand`
diff --git a/crates/cubecl-core/src/codegen/metadata.rs b/crates/cubecl-core/src/codegen/metadata.rs
index 3e3183685..9ad491b83 100644
--- a/crates/cubecl-core/src/codegen/metadata.rs
+++ b/crates/cubecl-core/src/codegen/metadata.rs
@@ -44,32 +44,32 @@ const EXTENDED_LEN: u32 = 3;
 /// Helper to calculate metadata offsets based on buffer count and position
 #[derive(Clone, Debug, Default)]
 pub struct Metadata {
-    num_meta: u32,
-    num_extended_meta: u32,
+    meta_count: u32,
+    extended_meta_count: u32,
 }
 
 impl Metadata {
-    pub fn new(num_meta: u32, num_extended_meta: u32) -> Self {
+    pub fn new(meta_count: u32, extended_meta_count: u32) -> Self {
         Self {
-            num_meta,
-            num_extended_meta,
+            meta_count,
+            extended_meta_count,
         }
     }
 
     fn offset_of(&self, id: u32) -> u32 {
-        self.num_meta * id
+        self.meta_count * id
     }
 
     fn base_len(&self) -> u32 {
-        self.num_meta * BASE_LEN
+        self.meta_count * BASE_LEN
     }
 
     pub fn static_len(&self) -> u32 {
-        self.num_meta * BASE_LEN + self.num_extended_meta * EXTENDED_LEN
+        self.meta_count * BASE_LEN + self.extended_meta_count * EXTENDED_LEN
     }
 
     fn offset_of_extended(&self, id: u32) -> u32 {
-        self.base_len() + self.num_extended_meta * id
+        self.base_len() + self.extended_meta_count * id
     }
 
     pub fn buffer_len_index(&self, buffer_idx: u32) -> u32 {
@@ -163,11 +163,11 @@ impl MetadataBuilder {
     /// Build the final serialized metadata struct
     pub fn finish(&mut self, address_type: AddressType) -> MetadataBinding {
         fn finish_inner<T: Pod + NumCast>(state: &mut State<T>) -> MetadataBinding {
-            let num_base = state.buffer_lens.len();
-            let num_ext = state.ranks.len();
+            let base_count = state.buffer_lens.len();
+            let ext_count = state.ranks.len();
 
             // All entries have buffer_len and len, extended also have rank, shape_offs, strides_offs
-            let static_len = num_base * BASE_LEN as usize + num_ext * EXTENDED_LEN as usize;
+            let static_len = base_count * BASE_LEN as usize + ext_count * EXTENDED_LEN as usize;
             let dynamic_len = state.shapes.len() + state.strides.len();
             let total_len = static_len + dynamic_len;
 
diff --git a/crates/cubecl-core/src/compute/launcher.rs b/crates/cubecl-core/src/compute/launcher.rs
index a2dae7224..13f592920 100644
--- a/crates/cubecl-core/src/compute/launcher.rs
+++ b/crates/cubecl-core/src/compute/launcher.rs
@@ -192,18 +192,16 @@ impl<R: Runtime> TensorState<R> {
     }
 
     fn process_tensor(&mut self, tensor: &TensorArg<'_, R>) -> Option<Binding> {
-        let (tensor, vectorization) = match tensor {
+        let (tensor, line_size) = match tensor {
             TensorArg::Handle {
-                handle,
-                line_size: vectorization_factor,
-                ..
-            } => (handle, vectorization_factor),
+                handle, line_size, ..
+            } => (handle, line_size),
             TensorArg::Alias { .. } => return None,
         };
 
-        let elem_size = tensor.elem_size * *vectorization;
+        let elem_size = tensor.elem_size * *line_size;
         let buffer_len = tensor.handle.size() / elem_size as u64;
-        let len = tensor.shape.iter().product::<usize>() / *vectorization;
+        let len = tensor.shape.iter().product::<usize>() / *line_size;
         with_metadata(|meta| {
             meta.register_tensor(
                 tensor.strides.len() as u64,
@@ -225,21 +223,19 @@ impl<R: Runtime> TensorState<R> {
     }
 
     fn process_array(&mut self, array: &ArrayArg<'_, R>) -> Option<Binding> {
-        let (array, vectorization) = match array {
+        let (array, line_size) = match array {
             ArrayArg::Handle {
-                handle,
-                line_size: vectorization_factor,
-                ..
-            } => (handle, vectorization_factor),
+                handle, line_size, ..
+            } => (handle, line_size),
             ArrayArg::Alias { .. } => return None,
         };
 
-        let elem_size = array.elem_size * *vectorization;
+        let elem_size = array.elem_size * *line_size;
         let buffer_len = array.handle.size() / elem_size as u64;
         with_metadata(|meta| {
             meta.register_array(
                 buffer_len,
-                array.length[0] as u64 / *vectorization as u64,
+                array.length[0] as u64 / *line_size as u64,
                 self.address_type(),
             )
         });
diff --git a/crates/cubecl-core/src/frontend/container/tensor/base.rs b/crates/cubecl-core/src/frontend/container/tensor/base.rs
index 119f5b8c0..ede0c5705 100644
--- a/crates/cubecl-core/src/frontend/container/tensor/base.rs
+++ b/crates/cubecl-core/src/frontend/container/tensor/base.rs
@@ -37,15 +37,15 @@ mod metadata {
 
     #[cube]
     impl<T: CubeType> Tensor<T> {
-        /// Obtain the stride of input at dimension dim
+        /// Obtain the stride of input at the given axis
         #[allow(unused_variables)]
-        pub fn stride(&self, dim: usize) -> usize {
+        pub fn stride(&self, axis: usize) -> usize {
             intrinsic!(|scope| {
-                let dim: ExpandElement = dim.into();
+                let axis: ExpandElement = axis.into();
                 let out = scope.create_local(Type::new(usize::as_type(scope)));
                 scope.register(Instruction::new(
                     Metadata::Stride {
-                        dim: *dim,
+                        axis: *axis,
                         var: self.expand.into(),
                     },
                     out.clone().into(),
@@ -54,15 +54,15 @@ mod metadata {
             })
         }
 
-        /// Obtain the shape of input at dimension dim
+        /// Obtain the shape of input at the given axis
         #[allow(unused_variables)]
-        pub fn shape(&self, dim: usize) -> usize {
+        pub fn shape(&self, axis: usize) -> usize {
             intrinsic!(|scope| {
-                let dim: ExpandElement = dim.into();
+                let axis: ExpandElement = axis.into();
                 let out = scope.create_local(Type::new(usize::as_type(scope)));
                 scope.register(Instruction::new(
                     Metadata::Shape {
-                        dim: *dim,
+                        axis: *axis,
                         var: self.expand.into(),
                     },
                     out.clone().into(),
@@ -71,32 +71,32 @@ mod metadata {
             })
         }
 
-        /// Obtain the coordinate corresponding to the given `index` of the tensor at dimension `dim`.
+        /// Obtain the coordinate corresponding to the given `index` of the tensor at the given `axis`.
         ///
         /// A coordinate is a list of indices corresponding to the multi-dimensional position of an element in the tensor.
-        /// The `dim` element in a coordinate is the position along the `dim` dimension of the tensor.
+        /// The `axis` element in a coordinate is the position along that axis of the tensor.
         #[allow(unused_variables)]
-        pub fn coordinate(&self, index: usize, dim: usize) -> usize {
+        pub fn coordinate(&self, index: usize, axis: usize) -> usize {
             intrinsic!(|scope| {
                 let index: ExpandElement = index.into();
-                let stride = self.clone().__expand_stride_method(scope, dim.clone());
-                let shape = self.clone().__expand_shape_method(scope, dim.clone());
+                let stride = self.clone().__expand_stride_method(scope, axis.clone());
+                let shape = self.clone().__expand_shape_method(scope, axis.clone());
 
-                // Compute `num_strides = index / stride`.
-                let num_strides = scope.create_local(Type::new(usize::as_type(scope)));
+                // Compute `stride_count = index / stride`.
+                let stride_count = scope.create_local(Type::new(usize::as_type(scope)));
                 scope.register(Instruction::new(
                     Arithmetic::Div(BinaryOperator {
                         lhs: *index,
                         rhs: stride.expand.into(),
                     }),
-                    num_strides.clone().into(),
+                    stride_count.clone().into(),
                 ));
 
-                // Compute `coordinate = num_strides % shape `.
+                // Compute `coordinate = stride_count % shape `.
                 let coordinate = scope.create_local(Type::new(usize::as_type(scope)));
                 scope.register(Instruction::new(
                     Arithmetic::Modulo(BinaryOperator {
-                        lhs: *num_strides,
+                        lhs: *stride_count,
                         rhs: shape.expand.into(),
                     }),
                     coordinate.clone().into(),
@@ -106,12 +106,12 @@ mod metadata {
             })
         }
 
-        /// The number of vectorized elements in the tensor.
+        /// The number of lined elements in the tensor.
         ///
         /// # Warning
         ///
-        /// The length will be affected by the vectorization factor. To obtain the number of elements,
-        /// you should multiply the length by the vectorization factor.
+        /// The length will be affected by the line size. To obtain the number of elements,
+        /// you should multiply the length by the line size.
         #[allow(clippy::len_without_is_empty)]
         pub fn len(&self) -> usize {
             intrinsic!(|scope| {
@@ -120,12 +120,12 @@ mod metadata {
             })
         }
 
-        /// The length of the buffer representing the tensor in terms of vectorized elements.
+        /// The length of the buffer representing the tensor in terms of lined elements.
         ///
         /// # Warning
         ///
-        /// The buffer length will be affected by the vectorization factor. To obtain the number of
-        /// elements, you should multiply the length by the vectorization factor.
+        /// The buffer length will be affected by the line size. To obtain the number of
+        /// elements, you should multiply the length by the line size.
         #[allow(clippy::len_without_is_empty)]
         pub fn buffer_len(&self) -> usize {
             intrinsic!(|scope| {
diff --git a/crates/cubecl-core/src/frontend/container/tensor/launch.rs b/crates/cubecl-core/src/frontend/container/tensor/launch.rs
index 142e1bb1e..564dce568 100644
--- a/crates/cubecl-core/src/frontend/container/tensor/launch.rs
+++ b/crates/cubecl-core/src/frontend/container/tensor/launch.rs
@@ -21,7 +21,7 @@ pub enum TensorArg<'a, R: Runtime> {
     Handle {
         /// The tensor handle.
         handle: TensorHandleRef<'a, R>,
-        /// The vectorization factor.
+        /// The line size.
         line_size: LineSize,
     },
     /// The tensor is aliasing another input tensor.
@@ -118,7 +118,7 @@ impl<C: CubePrimitive> LaunchArg for Tensor<C> {
 }
 
 impl<'a, R: Runtime> TensorArg<'a, R> {
-    /// Create a new tensor argument specified with its vectorization factor.
+    /// Create a new tensor argument specified with its line size.
     ///
     /// # Safety
     ///
@@ -128,7 +128,7 @@ impl<'a, R: Runtime> TensorArg<'a, R> {
         handle: &'a cubecl_runtime::server::Handle,
         strides: &'a [usize],
         shape: &'a [usize],
-        factor: LineSize,
+        line_size: LineSize,
     ) -> Self {
         unsafe {
             Self::Handle {
@@ -138,12 +138,12 @@ impl<'a, R: Runtime> TensorArg<'a, R> {
                     shape,
                     E::size().expect("Element should have a size"),
                 ),
-                line_size: factor,
+                line_size,
             }
         }
     }
 
-    /// Create a new tensor argument specified with its vectorization factor with a manual element
+    /// Create a new tensor argument specified with its line size with a manual element
     /// size in bytes.
     ///
     /// # Safety
@@ -154,13 +154,13 @@ impl<'a, R: Runtime> TensorArg<'a, R> {
         handle: &'a cubecl_runtime::server::Handle,
         strides: &'a [usize],
         shape: &'a [usize],
-        factor: LineSize,
+        line_size: LineSize,
         elem_size: usize,
     ) -> Self {
         unsafe {
             Self::Handle {
                 handle: TensorHandleRef::from_raw_parts(handle, strides, shape, elem_size),
-                line_size: factor,
+                line_size,
             }
         }
     }
diff --git a/crates/cubecl-core/src/frontend/container/tensor/tensormap.rs b/crates/cubecl-core/src/frontend/container/tensor/tensormap.rs
index 49d6fd681..8908b957b 100644
--- a/crates/cubecl-core/src/frontend/container/tensor/tensormap.rs
+++ b/crates/cubecl-core/src/frontend/container/tensor/tensormap.rs
@@ -317,41 +317,41 @@ mod metadata {
             unexpanded!()
         }
 
-        /// Obtain the stride of input at dimension dim
-        pub fn stride(&self, _dim: usize) -> usize {
+        /// Obtain the stride of input at the given axis
+        pub fn stride(&self, _axis: usize) -> usize {
             unexpanded!()
         }
 
-        /// Obtain the shape of input at dimension dim
-        pub fn shape(&self, _dim: usize) -> usize {
+        /// Obtain the shape of input at the given axis
+        pub fn shape(&self, _axis: usize) -> usize {
             unexpanded!()
         }
 
-        /// Obtain the coordinate corresponding to the given `index` of the tensor at dimension `dim`.
+        /// Obtain the coordinate corresponding to the given `index` of the tensor at the given `axis`.
         ///
         /// A coordinate is a list of indices corresponding to the multi-dimensional position of an element in the tensor.
-        /// The `dim` element in a coordinate is the position along the `dim` dimension of the tensor.
-        pub fn coordinate(&self, _index: usize, _dim: usize) -> usize {
+        /// The `axis` element in a coordinate is the position along that axis of the tensor.
+        pub fn coordinate(&self, _index: usize, _axis: usize) -> usize {
             unexpanded!()
         }
 
-        /// The number of vectorized elements in the tensor.
+        /// The number of lined elements in the tensor.
         ///
         /// # Warning
         ///
-        /// The length will be affected by the vectorization factor. To obtain the number of elements,
-        /// you should multiply the length by the vectorization factor.
+        /// The length will be affected by the line size. To obtain the number of elements,
+        /// you should multiply the length by the line size.
         #[allow(clippy::len_without_is_empty)]
         pub fn len(&self) -> usize {
             unexpanded!()
         }
 
-        /// The length of the buffer representing the tensor in terms of vectorized elements.
+        /// The length of the buffer representing the tensor in terms of lined elements.
         ///
         /// # Warning
         ///
-        /// The buffer length will be affected by the vectorization factor. To obtain the number of
-        /// elements, you should multiply the length by the vectorization factor.
+        /// The buffer length will be affected by the line size. To obtain the number of
+        /// elements, you should multiply the length by the line size.
         #[allow(clippy::len_without_is_empty)]
         pub fn buffer_len(&self) -> usize {
             unexpanded!()
@@ -382,18 +382,18 @@ mod metadata {
         pub fn __expand_stride(
             scope: &mut Scope,
             expand: ExpandElementTyped<TensorMap<T, K>>,
-            dim: ExpandElementTyped<usize>,
+            axis: ExpandElementTyped<usize>,
         ) -> ExpandElementTyped<usize> {
-            expand.__expand_stride_method(scope, dim)
+            expand.__expand_stride_method(scope, axis)
         }
 
         // Expand function of [shape](TensorMap::shape).
         pub fn __expand_shape(
             scope: &mut Scope,
             expand: ExpandElementTyped<TensorMap<T, K>>,
-            dim: ExpandElementTyped<usize>,
+            axis: ExpandElementTyped<usize>,
         ) -> ExpandElementTyped<usize> {
-            expand.__expand_shape_method(scope, dim)
+            expand.__expand_shape_method(scope, axis)
         }
 
         // Expand function of [coordinate](TensorMap::coordinate).
@@ -401,9 +401,9 @@ mod metadata {
             scope: &mut Scope,
             expand: ExpandElementTyped<TensorMap<T, K>>,
             index: ExpandElementTyped<usize>,
-            dim: ExpandElementTyped<usize>,
+            axis: ExpandElementTyped<usize>,
         ) -> ExpandElementTyped<usize> {
-            expand.__expand_coordinate_method(scope, index, dim)
+            expand.__expand_coordinate_method(scope, index, axis)
         }
 
         // Expand function of [len](TensorMap::len).
@@ -449,13 +449,13 @@ mod metadata {
         pub fn __expand_stride_method(
             self,
             scope: &mut Scope,
-            dim: ExpandElementTyped<usize>,
+            axis: ExpandElementTyped<usize>,
         ) -> ExpandElementTyped<usize> {
-            let dim: ExpandElement = dim.into();
+            let axis: ExpandElement = axis.into();
             let out = scope.create_local(Type::new(usize::as_type(scope)));
             scope.register(Instruction::new(
                 Metadata::Stride {
-                    dim: *dim,
+                    axis: *axis,
                     var: self.expand.into(),
                 },
                 out.clone().into(),
@@ -467,13 +467,13 @@ mod metadata {
         pub fn __expand_shape_method(
             self,
             scope: &mut Scope,
-            dim: ExpandElementTyped<usize>,
+            axis: ExpandElementTyped<usize>,
         ) -> ExpandElementTyped<usize> {
-            let dim: ExpandElement = dim.into();
+            let axis: ExpandElement = axis.into();
             let out = scope.create_local(Type::new(usize::as_type(scope)));
             scope.register(Instruction::new(
                 Metadata::Shape {
-                    dim: *dim,
+                    axis: *axis,
                     var: self.expand.into(),
                 },
                 out.clone().into(),
@@ -486,27 +486,27 @@ mod metadata {
             self,
             scope: &mut Scope,
             index: ExpandElementTyped<usize>,
-            dim: ExpandElementTyped<usize>,
+            axis: ExpandElementTyped<usize>,
         ) -> ExpandElementTyped<usize> {
             let index: ExpandElement = index.into();
-            let stride = self.clone().__expand_stride_method(scope, dim.clone());
-            let shape = self.clone().__expand_shape_method(scope, dim.clone());
+            let stride = self.clone().__expand_stride_method(scope, axis.clone());
+            let shape = self.clone().__expand_shape_method(scope, axis.clone());
 
-            // Compute `num_strides = index / stride`.
-            let num_strides = scope.create_local(Type::new(usize::as_type(scope)));
+            // Compute `stride_count = index / stride`.
+            let stride_count = scope.create_local(Type::new(usize::as_type(scope)));
             scope.register(Instruction::new(
                 Arithmetic::Div(BinaryOperator {
                     lhs: *index,
                     rhs: stride.expand.into(),
                 }),
-                num_strides.clone().into(),
+                stride_count.clone().into(),
             ));
 
-            // Compute `coordinate = num_strides % shape `.
+            // Compute `coordinate = stride_count % shape `.
             let coordinate = scope.create_local(Type::new(usize::as_type(scope)));
             scope.register(Instruction::new(
                 Arithmetic::Modulo(BinaryOperator {
-                    lhs: *num_strides,
+                    lhs: *stride_count,
                     rhs: shape.expand.into(),
                 }),
                 coordinate.clone().into(),
diff --git a/crates/cubecl-core/src/frontend/operation/base.rs b/crates/cubecl-core/src/frontend/operation/base.rs
index 51cddc881..2498604b5 100644
--- a/crates/cubecl-core/src/frontend/operation/base.rs
+++ b/crates/cubecl-core/src/frontend/operation/base.rs
@@ -26,7 +26,7 @@ where
     let item_lhs = lhs.ty;
     let item_rhs = rhs.ty;
 
-    let line_size = find_vectorization(item_lhs, item_rhs);
+    let line_size = find_line_size(item_lhs, item_rhs);
 
     let item = item_lhs.line(line_size);
 
@@ -90,7 +90,7 @@ where
     let vec = if let Some(line_size) = line_size {
         line_size
     } else {
-        find_vectorization(item_lhs, item_rhs)
+        find_line_size(item_lhs, item_rhs)
     };
 
     let item = item_lhs.line(vec);
@@ -152,7 +152,7 @@ where
     let item_lhs = lhs.ty;
     let item_rhs = rhs.ty;
 
-    let line_size = find_vectorization(item_lhs, item_rhs);
+    let line_size = find_line_size(item_lhs, item_rhs);
 
     let out_item = Type::scalar(ElemType::Bool).line(line_size);
 
@@ -254,7 +254,7 @@ where
     out
 }
 
-pub(crate) fn find_vectorization(lhs: Type, rhs: Type) -> LineSize {
+pub(crate) fn find_line_size(lhs: Type, rhs: Type) -> LineSize {
     if matches!(lhs, Type::Scalar(_)) && matches!(rhs, Type::Scalar(_)) {
         0
     } else {
diff --git a/crates/cubecl-core/src/frontend/operation/binary.rs b/crates/cubecl-core/src/frontend/operation/binary.rs
index 583e13a65..a03bb1b16 100644
--- a/crates/cubecl-core/src/frontend/operation/binary.rs
+++ b/crates/cubecl-core/src/frontend/operation/binary.rs
@@ -45,7 +45,7 @@ pub mod sub {
                 let item_lhs = lhs.expand.ty;
                 let item_rhs = rhs.expand.ty;
 
-                let line_size = find_vectorization(item_lhs, item_rhs);
+                let line_size = find_line_size(item_lhs, item_rhs);
 
                 let item = item_lhs.line(line_size);
                 let value = (lhs_val - rhs_val).into();
@@ -288,8 +288,8 @@ macro_rules! impl_binary_func {
     }
 }
 
-macro_rules! impl_binary_func_fixed_output_vectorization {
-    ($trait_name:ident, $method_name:ident, $operator:expr, $out_vectorization: expr, $($type:ty),*) => {
+macro_rules! impl_binary_func_fixed_output_line_size {
+    ($trait_name:ident, $method_name:ident, $operator:expr, $out_line_size: expr, $($type:ty),*) => {
         paste::paste! {
             pub trait $trait_name: CubePrimitive + CubeType<ExpandType: [<$trait_name Expand>]> + Sized {
                 fn $method_name(self, _rhs: Self) -> Self {
@@ -313,7 +313,7 @@ macro_rules! impl_binary_func_fixed_output_vectorization {
             impl<T: CubePrimitive + $trait_name> [<$trait_name Expand>] for ExpandElementTyped<T> {
                 fn [<__expand_ $method_name _method>](self, scope: &mut Scope, rhs: Self) -> Self {
                     let lhs: ExpandElement = self.into();
-                    let item = lhs.ty.line($out_vectorization);
+                    let item = lhs.ty.line($out_line_size);
                     binary_expand_fixed_output(scope, lhs, rhs.into(), item, $operator).into()
                 }
             }
@@ -574,7 +574,7 @@ impl_binary_func!(
     usize,
     isize
 );
-impl_binary_func_fixed_output_vectorization!(
+impl_binary_func_fixed_output_line_size!(
     Dot,
     dot,
     Arithmetic::Dot,
diff --git a/crates/cubecl-core/src/frontend/operation/unary.rs b/crates/cubecl-core/src/frontend/operation/unary.rs
index a17846ddc..b1bbe7593 100644
--- a/crates/cubecl-core/src/frontend/operation/unary.rs
+++ b/crates/cubecl-core/src/frontend/operation/unary.rs
@@ -72,8 +72,8 @@ impl Exp for f32 {
     }
 }
 
-macro_rules! impl_unary_func_fixed_out_vectorization {
-    ($trait_name:ident, $method_name:ident, $operator:expr, $out_vectorization: expr, $($type:ty),*) => {
+macro_rules! impl_unary_func_fixed_out_line_size {
+    ($trait_name:ident, $method_name:ident, $operator:expr, $out_line_size: expr, $($type:ty),*) => {
         paste::paste! {
             pub trait $trait_name: CubePrimitive + CubeType<ExpandType: [<$trait_name Expand>]> + Sized {
                 #[allow(unused_variables)]
@@ -94,7 +94,7 @@ macro_rules! impl_unary_func_fixed_out_vectorization {
             impl<T: $trait_name + CubePrimitive> [<$trait_name Expand>] for ExpandElementTyped<T> {
                 fn [<__expand_ $method_name _method>](self, scope: &mut Scope) -> Self {
                     let expand_element: ExpandElement = self.into();
-                    let item = expand_element.ty.line($out_vectorization);
+                    let item = expand_element.ty.line($out_line_size);
                     unary_expand_fixed_output(scope, expand_element, item, $operator).into()
                 }
             }
@@ -410,7 +410,7 @@ impl_unary_func!(
     f32,
     f64
 );
-impl_unary_func_fixed_out_vectorization!(
+impl_unary_func_fixed_out_line_size!(
     Magnitude,
     magnitude,
     Arithmetic::Magnitude,
diff --git a/crates/cubecl-core/src/lib.rs b/crates/cubecl-core/src/lib.rs
index cd3e8d650..1dcac8947 100644
--- a/crates/cubecl-core/src/lib.rs
+++ b/crates/cubecl-core/src/lib.rs
@@ -58,28 +58,33 @@ pub use id::*;
 /// assigned to one element.
 pub fn calculate_cube_count_elemwise<R: Runtime>(
     client: &ComputeClient<R>,
-    num_elems: usize,
+    elem_count: usize,
     cube_dim: CubeDim,
 ) -> CubeCount {
-    let num_cubes = num_elems.div_ceil(cube_dim.num_elems() as usize);
-    CubeCountSelection::new(client, num_cubes as u32).cube_count()
+    let cube_count = elem_count.div_ceil(cube_dim.num_elems() as usize);
+    CubeCountSelection::new(client, cube_count as u32).cube_count()
 }
 
+/// Deprecated alias for [`tensor_line_size`].
+#[deprecated(
+    since = "0.10.0-pre.1",
+    note = "use tensor_line_size or tensor_line_size_parallel"
+)]
 pub fn tensor_vectorization_factor(
     factors: &[LineSize],
     shape: &[usize],
     strides: &[usize],
-    dim: usize,
+    axis: usize,
 ) -> LineSize {
-    tensor_line_size_parallel(factors.iter().cloned(), shape, strides, dim)
+    tensor_line_size_parallel(factors.iter().cloned(), shape, strides, axis)
 }
 pub fn tensor_line_size(
     factors: &[LineSize],
     shape: &[usize],
     strides: &[usize],
-    dim: usize,
+    axis: usize,
 ) -> LineSize {
-    tensor_line_size_parallel(factors.iter().cloned(), shape, strides, dim)
+    tensor_line_size_parallel(factors.iter().cloned(), shape, strides, axis)
 }
 
 #[derive(Debug, Clone)]
@@ -89,16 +94,16 @@ pub enum LineSizeError {
     NoValidLineSize,
 }
 
-/// Find the maximum line size usable for parallel vectorization along the given axis
-/// from the supported line sizes or return 1 if vectorization is impossible.
+/// Find the maximum line size usable for parallel line-size selection along the given axis
+/// from the supported line sizes or return 1 if a line size above 1 is impossible.
 ///
 /// This function is designed to never return a line size above 1 by error,
 /// but doesn't guarantee to always return the actual maximum possible line size.
 /// That is, it may be overly strict.
 ///
-/// Currently, this checks that the stride of the axis is 1, that it's shape is
+/// Currently, this checks that the stride of the axis is 1, that its shape is
 /// divisible by a candidate line size and that the smallest stride that is not 1
-/// is divisible by the vectorization.
+/// is divisible by the candidate line size.
 /// The last condition ensure that the current axis is contiguous within the next stride.
 pub fn tensor_line_size_parallel(
     supported_line_sizes: impl Iterator<Item = LineSize>,
@@ -135,8 +140,8 @@ pub fn try_tensor_line_size_parallel(
         .ok_or(LineSizeError::NoValidLineSize)
 }
 
-/// Find the maximum line size usable for perpendicular vectorization along the given axis
-/// from the supported line sizes or return 1 if vectorization is impossible.
+/// Find the maximum line size usable for perpendicular line-size selection along the given axis
+/// from the supported line sizes or return 1 if a line size above 1 is impossible.
 ///
 /// This function is designed to never return a line size above 1 by error,
 /// but doesn't guarantee to always return the actual maximum possible line size.
diff --git a/crates/cubecl-core/src/runtime_tests/cluster.rs b/crates/cubecl-core/src/runtime_tests/cluster.rs
index 3c041d083..681cca20a 100644
--- a/crates/cubecl-core/src/runtime_tests/cluster.rs
+++ b/crates/cubecl-core/src/runtime_tests/cluster.rs
@@ -33,14 +33,14 @@ pub fn test_cluster_meta<R: Runtime>(client: ComputeClient<R>) {
     let cube_count_y = 2;
     let cube_count_z = 6;
     let cube_count = CubeCount::new_3d(cube_count_x, cube_count_y, cube_count_z);
-    let num_cubes = cube_count_x * cube_count_y * cube_count_z;
+    let total_cube_count = cube_count_x * cube_count_y * cube_count_z;
 
-    let handle = client.empty((num_cubes as usize * 4 + 4) * size_of::<u32>());
+    let handle = client.empty((total_cube_count as usize * 4 + 4) * size_of::<u32>());
 
-    let vectorization = 1;
+    let line_size = 1;
 
     cluster_meta_kernel::launch(&client, cube_count, CubeDim::new_single(), unsafe {
-        ArrayArg::from_raw_parts::<f32>(&handle, num_cubes as usize * 8, vectorization)
+        ArrayArg::from_raw_parts::<f32>(&handle, total_cube_count as usize * 8, line_size)
     })
     .unwrap();
 
diff --git a/crates/cubecl-core/src/runtime_tests/launch.rs b/crates/cubecl-core/src/runtime_tests/launch.rs
index 0469c15a5..f52beedbf 100644
--- a/crates/cubecl-core/src/runtime_tests/launch.rs
+++ b/crates/cubecl-core/src/runtime_tests/launch.rs
@@ -165,7 +165,7 @@ pub fn test_kernel_max_shared<R: Runtime>(client: ComputeClient<R>) {
 
 pub fn test_shared_memory_error<R: Runtime>(client: ComputeClient<R>) {
     // No real limit on CPU, so ignore
-    if client.properties().hardware.num_cpu_cores.is_some() {
+    if client.properties().hardware.cpu_core_count.is_some() {
         return;
     }
 
diff --git a/crates/cubecl-cpp/src/shared/base.rs b/crates/cubecl-cpp/src/shared/base.rs
index 7d1ae4b2b..2eee0af82 100644
--- a/crates/cubecl-cpp/src/shared/base.rs
+++ b/crates/cubecl-cpp/src/shared/base.rs
@@ -335,9 +335,9 @@ impl<D: Dialect> CppCompiler<D> {
             }
         }
 
-        let num_meta = all_meta.len();
+        let meta_count = all_meta.len();
 
-        self.metadata = cubecl_core::Metadata::new(num_meta as u32, num_ext);
+        self.metadata = cubecl_core::Metadata::new(meta_count as u32, num_ext);
     }
 
     pub(crate) fn ext_meta_position(&self, var: gpu::Variable) -> u32 {
@@ -969,23 +969,23 @@ impl<D: Dialect> CppCompiler<D> {
     ) -> Instruction<D> {
         let out = out.unwrap();
         match metadata {
-            gpu::Metadata::Stride { dim, var } => {
+            gpu::Metadata::Stride { axis, var } => {
                 let position = self.ext_meta_position(var);
                 let offset = self.metadata.stride_offset_index(position);
                 Instruction::ExtendedMetadata {
                     info_offset: self.compile_variable(offset.into()),
-                    dim: self.compile_variable(dim),
+                    dim: self.compile_variable(axis),
                     split_meta: self.compilation_options.supports_features.grid_constants,
                     static_offset: self.metadata.static_len(),
                     out: self.compile_variable(out),
                 }
             }
-            gpu::Metadata::Shape { dim, var } => {
+            gpu::Metadata::Shape { axis, var } => {
                 let position = self.ext_meta_position(var);
                 let offset = self.metadata.shape_offset_index(position);
                 Instruction::ExtendedMetadata {
                     info_offset: self.compile_variable(offset.into()),
-                    dim: self.compile_variable(dim),
+                    dim: self.compile_variable(axis),
                     split_meta: self.compilation_options.supports_features.grid_constants,
                     static_offset: self.metadata.static_len(),
                     out: self.compile_variable(out),
diff --git a/crates/cubecl-cpu/src/compiler/visitor/args_manager.rs b/crates/cubecl-cpu/src/compiler/visitor/args_manager.rs
index 2d97fa28a..9fbd18637 100644
--- a/crates/cubecl-cpu/src/compiler/visitor/args_manager.rs
+++ b/crates/cubecl-cpu/src/compiler/visitor/args_manager.rs
@@ -62,9 +62,9 @@ impl<'a, 'b> ArgsManagerBuilder<'a, 'b> {
             }
         }
 
-        let num_meta = all_meta.len();
+        let meta_count = all_meta.len();
 
-        let metadata = Metadata::new(num_meta as u32, num_ext);
+        let metadata = Metadata::new(meta_count as u32, num_ext);
         let scalars = kernel.scalars.clone();
 
         let mut args = Self {
diff --git a/crates/cubecl-cpu/src/compiler/visitor/operation/metadata.rs b/crates/cubecl-cpu/src/compiler/visitor/operation/metadata.rs
index 2ff47cf29..a4c0f921e 100644
--- a/crates/cubecl-cpu/src/compiler/visitor/operation/metadata.rs
+++ b/crates/cubecl-cpu/src/compiler/visitor/operation/metadata.rs
@@ -23,7 +23,7 @@ impl<'a> Visitor<'a> {
         self.insert_variable(out, result);
     }
 
-    fn append_extended_metadata(&mut self, offset: u32, dim: Variable, out: Variable) {
+    fn append_extended_metadata(&mut self, offset: u32, axis: Variable, out: Variable) {
         let metadata_memref = self.args_manager.metadata_memref.unwrap();
         let offset = self
             .block
@@ -45,8 +45,9 @@ impl<'a> Visitor<'a> {
             self.location,
         ));
 
-        let dim = self.get_index(dim, dim.ty, true);
-        let offset = self.append_operation_with_result(arith::addi(first_rank, dim, self.location));
+        let axis = self.get_index(axis, axis.ty, true);
+        let offset =
+            self.append_operation_with_result(arith::addi(first_rank, axis, self.location));
         let result = self.append_operation_with_result(memref::load(
             metadata_memref,
             &[offset],
@@ -72,15 +73,15 @@ impl<'a> Visitor<'a> {
                 let offset = self.args_manager.metadata.rank_index(position);
                 self.append_metadata(offset, out);
             }
-            Metadata::Shape { dim, var } => {
+            Metadata::Shape { axis, var } => {
                 let position = self.args_manager.ext_meta_position(*var);
                 let offset = self.args_manager.metadata.shape_offset_index(position);
-                self.append_extended_metadata(offset, *dim, out);
+                self.append_extended_metadata(offset, *axis, out);
             }
-            Metadata::Stride { dim, var } => {
+            Metadata::Stride { axis, var } => {
                 let position = self.args_manager.ext_meta_position(*var);
                 let offset = self.args_manager.metadata.stride_offset_index(position);
-                self.append_extended_metadata(offset, *dim, out);
+                self.append_extended_metadata(offset, *axis, out);
             }
         }
     }
diff --git a/crates/cubecl-cpu/src/compiler/visitor/variables.rs b/crates/cubecl-cpu/src/compiler/visitor/variables.rs
index 262f72622..0bfc00f6e 100644
--- a/crates/cubecl-cpu/src/compiler/visitor/variables.rs
+++ b/crates/cubecl-cpu/src/compiler/visitor/variables.rs
@@ -121,13 +121,13 @@ impl<'a> Visitor<'a> {
         lhs: Variable,
         rhs: Variable,
     ) -> (Value<'a, 'a>, Value<'a, 'a>) {
-        let vectorization_factor = std::cmp::max(lhs.line_size(), rhs.line_size());
+        let line_size = std::cmp::max(lhs.line_size(), rhs.line_size());
         let (mut lhs_value, mut rhs_value) = (self.get_variable(lhs), self.get_variable(rhs));
 
         if lhs_value.r#type().is_vector() || rhs_value.r#type().is_vector() {
             if !lhs_value.r#type().is_vector() {
                 let vector_type = Type::vector(
-                    &[vectorization_factor as u64],
+                    &[line_size as u64],
                     lhs.storage_type().to_type(self.context),
                 );
                 lhs_value = self.append_operation_with_result(vector::splat(
@@ -139,7 +139,7 @@ impl<'a> Visitor<'a> {
             }
             if !rhs_value.r#type().is_vector() {
                 let vector_type = Type::vector(
-                    &[vectorization_factor as u64],
+                    &[line_size as u64],
                     rhs.storage_type().to_type(self.context),
                 );
                 rhs_value = self.append_operation_with_result(vector::splat(
diff --git a/crates/cubecl-cpu/src/runtime.rs b/crates/cubecl-cpu/src/runtime.rs
index 651117bfc..5b2416ba8 100644
--- a/crates/cubecl-cpu/src/runtime.rs
+++ b/crates/cubecl-cpu/src/runtime.rs
@@ -52,11 +52,11 @@ impl DeviceState for CpuServer {
             max_bindings: u32::MAX,
             max_shared_memory_size,
             max_cube_count,
-            num_cpu_cores: Some(available_parallelism as u32),
+            cpu_core_count: Some(available_parallelism as u32),
             max_units_per_cube: u32::MAX,
             max_cube_dim,
-            num_streaming_multiprocessors: None,
-            num_tensor_cores: None,
+            streaming_multiprocessor_count: None,
+            tensor_core_count: None,
             min_tensor_cores_dim: None,
         };
 
diff --git a/crates/cubecl-cuda/src/runtime.rs b/crates/cubecl-cuda/src/runtime.rs
index c4023e105..a14a22076 100644
--- a/crates/cubecl-cuda/src/runtime.rs
+++ b/crates/cubecl-cuda/src/runtime.rs
@@ -127,10 +127,10 @@ impl DeviceState for CudaServer {
             let grid_dim_z = get_attribute(device_ptr, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z).unwrap();
             let max_cube_count = (grid_dim_x as u32, grid_dim_y as u32, grid_dim_z as u32);
 
-            let num_streaming_multiprocessors = Some(
+            let streaming_multiprocessor_count = Some(
                 get_attribute(device_ptr, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT).unwrap() as u32,
             );
-            let num_tensor_cores = tensor_cores_per_sm(arch_version);
+            let tensor_core_count = tensor_cores_per_sm(arch_version);
 
             comp_opts.warp_size = warp_size;
 
@@ -143,14 +143,14 @@ impl DeviceState for CudaServer {
                 max_cube_count,
                 max_units_per_cube: max_threads,
                 max_cube_dim,
-                num_streaming_multiprocessors,
-                num_tensor_cores,
+                streaming_multiprocessor_count,
+                tensor_core_count,
                 min_tensor_cores_dim: if supported_wmma_combinations.is_empty() {
                     None
                 } else {
                     Some(8)
                 },
-                num_cpu_cores: None,
+                cpu_core_count: None,
             }
         };
 
diff --git a/crates/cubecl-hip/src/runtime.rs b/crates/cubecl-hip/src/runtime.rs
index 044765e15..623248e88 100644
--- a/crates/cubecl-hip/src/runtime.rs
+++ b/crates/cubecl-hip/src/runtime.rs
@@ -131,14 +131,14 @@ impl DeviceState for HipServer {
             max_cube_count,
             max_units_per_cube: prop_max_threads,
             max_cube_dim,
-            num_streaming_multiprocessors: None,
-            num_tensor_cores: None,
+            streaming_multiprocessor_count: None,
+            tensor_core_count: None,
             min_tensor_cores_dim: if supported_wmma_combinations.is_empty() {
                 None
             } else {
                 Some(16)
             },
-            num_cpu_cores: None,
+            cpu_core_count: None,
         };
 
         let mut device_props = DeviceProperties::new(
diff --git a/crates/cubecl-ir/src/cmma.rs b/crates/cubecl-ir/src/cmma.rs
index d77f6f867..b3bf4aeb9 100644
--- a/crates/cubecl-ir/src/cmma.rs
+++ b/crates/cubecl-ir/src/cmma.rs
@@ -24,6 +24,15 @@ pub enum MatrixLayout {
     Undefined,
 }
 
+impl MatrixLayout {
+    /// Alias for `ColMajor`. In column-major (increasing) order, strides increase with axis index.
+    #[allow(non_upper_case_globals)]
+    pub const IncreasingOrder: Self = Self::ColMajor;
+    /// Alias for `RowMajor`. In row-major (decreasing) order, strides decrease with axis index.
+    #[allow(non_upper_case_globals)]
+    pub const DecreasingOrder: Self = Self::RowMajor;
+}
+
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(new, Debug, Clone, Copy, TypeHash, PartialEq, Eq, Hash, PartialOrd, Ord)]
 #[allow(missing_docs)]
diff --git a/crates/cubecl-ir/src/metadata.rs b/crates/cubecl-ir/src/metadata.rs
index 581ef910a..255d24895 100644
--- a/crates/cubecl-ir/src/metadata.rs
+++ b/crates/cubecl-ir/src/metadata.rs
@@ -12,10 +12,10 @@ use crate::{OperationReflect, Variable};
 pub enum Metadata {
     /// The rank of an array.
     Rank { var: Variable },
-    /// The stride of an array at the given dimension.
-    Stride { dim: Variable, var: Variable },
-    /// The shape of an array at the given dimension.
-    Shape { dim: Variable, var: Variable },
+    /// The stride of an array at the given axis.
+    Stride { axis: Variable, var: Variable },
+    /// The shape of an array at the given axis.
+    Shape { axis: Variable, var: Variable },
     /// The length of an array.
     Length { var: Variable },
     /// The length of an array's underlying buffer.
@@ -26,8 +26,8 @@ impl Display for Metadata {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         match self {
             Metadata::Rank { var } => write!(f, "rank({var})"),
-            Metadata::Stride { dim, var } => write!(f, "{var}.strides[{dim}]"),
-            Metadata::Shape { dim, var } => write!(f, "{var}.shape[{dim}]"),
+            Metadata::Stride { axis, var } => write!(f, "{var}.strides[{axis}]"),
+            Metadata::Shape { axis, var } => write!(f, "{var}.shape[{axis}]"),
             Metadata::Length { var } => write!(f, "{var}.len()"),
             Metadata::BufferLength { var } => write!(f, "buffer_len({var})"),
         }
diff --git a/crates/cubecl-ir/src/properties.rs b/crates/cubecl-ir/src/properties.rs
index 235bad710..edac2bd5d 100644
--- a/crates/cubecl-ir/src/properties.rs
+++ b/crates/cubecl-ir/src/properties.rs
@@ -40,11 +40,11 @@ pub struct HardwareProperties {
     /// Maximum `CubeDim` in x, y, and z dimensions
     pub max_cube_dim: (u32, u32, u32),
     /// Number of streaming multiprocessors (SM), if available
-    pub num_streaming_multiprocessors: Option<u32>,
+    pub streaming_multiprocessor_count: Option<u32>,
     /// Number of available parallel cpu units, if the runtime is CPU.
-    pub num_cpu_cores: Option<u32>,
+    pub cpu_core_count: Option<u32>,
     /// Number of tensor cores per SM, if any
-    pub num_tensor_cores: Option<u32>,
+    pub tensor_core_count: Option<u32>,
     /// The minimum tiling dimension for a single axis in tensor cores.
     ///
     /// For a backend that only supports 16x16x16, the value would be 16.
diff --git a/crates/cubecl-opt/src/instructions.rs b/crates/cubecl-opt/src/instructions.rs
index 1cfae9355..a6cc679d4 100644
--- a/crates/cubecl-opt/src/instructions.rs
+++ b/crates/cubecl-opt/src/instructions.rs
@@ -257,12 +257,12 @@ impl Optimizer {
             Metadata::Rank { var } => {
                 visit_read(self, var);
             }
-            Metadata::Stride { dim, var } => {
-                visit_read(self, dim);
+            Metadata::Stride { axis, var } => {
+                visit_read(self, axis);
                 visit_read(self, var);
             }
-            Metadata::Shape { dim, var } => {
-                visit_read(self, dim);
+            Metadata::Shape { axis, var } => {
+                visit_read(self, axis);
                 visit_read(self, var);
             }
             Metadata::Length { var } => {
diff --git a/crates/cubecl-runtime/src/server.rs b/crates/cubecl-runtime/src/server.rs
index e7fd84e1f..1d96cf1eb 100644
--- a/crates/cubecl-runtime/src/server.rs
+++ b/crates/cubecl-runtime/src/server.rs
@@ -927,7 +927,7 @@ impl CubeDim {
         let plane_count = Self::calculate_plane_count_per_cube(
             working_units as u32,
             plane_size,
-            properties.hardware.num_cpu_cores,
+            properties.hardware.cpu_core_count,
         );
 
         // Make sure it respects the max units per cube (especially on wasm)
@@ -939,9 +939,9 @@ impl CubeDim {
     fn calculate_plane_count_per_cube(
         working_units: u32,
         plane_dim: u32,
-        num_cpu_cores: Option<u32>,
+        cpu_core_count: Option<u32>,
     ) -> u32 {
-        match num_cpu_cores {
+        match cpu_core_count {
             Some(num_cores) => core::cmp::min(num_cores, working_units),
             None => {
                 let plane_count_max = core::cmp::max(1, working_units / plane_dim);
diff --git a/crates/cubecl-runtime/tests/dummy/server.rs b/crates/cubecl-runtime/tests/dummy/server.rs
index eb0695c92..f083d38c6 100644
--- a/crates/cubecl-runtime/tests/dummy/server.rs
+++ b/crates/cubecl-runtime/tests/dummy/server.rs
@@ -270,10 +270,10 @@ impl DummyServer {
             max_cube_count: (u16::MAX as u32, u16::MAX as u32, u16::MAX as u32),
             max_units_per_cube: 1024,
             max_cube_dim: (1024, 1024, 64),
-            num_streaming_multiprocessors: None,
-            num_tensor_cores: None,
+            streaming_multiprocessor_count: None,
+            tensor_core_count: None,
             min_tensor_cores_dim: None,
-            num_cpu_cores: None,
+            cpu_core_count: None,
         };
         let features = Features::default();
         let timing_method = cubecl_common::profile::TimingMethod::System;
diff --git a/crates/cubecl-spirv/src/compiler.rs b/crates/cubecl-spirv/src/compiler.rs
index e32da9b7e..0c3bcad72 100644
--- a/crates/cubecl-spirv/src/compiler.rs
+++ b/crates/cubecl-spirv/src/compiler.rs
@@ -178,7 +178,7 @@ impl<T: SpirvTarget> Compiler for SpirvCompiler<T> {
             .collect();
         all_meta.sort_by_key(|(id, _)| *id);
 
-        let num_meta = all_meta.len();
+        let meta_count = all_meta.len();
 
         for (_, has_extended_meta) in all_meta.iter() {
             ext_meta_pos.push(num_ext);
@@ -190,7 +190,7 @@ impl<T: SpirvTarget> Compiler for SpirvCompiler<T> {
         self.cube_dim = value.cube_dim;
         self.mode = mode;
         self.addr_type = addr_type;
-        self.metadata = Metadata::new(num_meta as u32, num_ext);
+        self.metadata = Metadata::new(meta_count as u32, num_ext);
         self.compilation_options = compilation_options.clone();
         self.ext_meta_pos = ext_meta_pos;
 
diff --git a/crates/cubecl-spirv/src/metadata.rs b/crates/cubecl-spirv/src/metadata.rs
index 9717757fe..c30b42ca2 100644
--- a/crates/cubecl-spirv/src/metadata.rs
+++ b/crates/cubecl-spirv/src/metadata.rs
@@ -30,9 +30,9 @@ impl<T: SpirvTarget> SpirvCompiler<T> {
                 let out = self.compile_variable(out);
                 self.buffer_length(&var, Some(&out), uniform);
             }
-            Metadata::Stride { dim, var } => {
+            Metadata::Stride { axis, var } => {
                 let var = self.compile_variable(var);
-                let dim = self.compile_variable(dim);
+                let axis = self.compile_variable(axis);
                 let out = self.compile_variable(out);
 
                 let ty_id = out.item().id(self);
@@ -43,16 +43,16 @@ impl<T: SpirvTarget> SpirvCompiler<T> {
 
                 let offs_offset = self.metadata.stride_offset_index(pos);
                 let offset = self.load_const_metadata(offs_offset, None, out.item());
-                let dim_id = self.read_as(&dim, &out.item());
+                let axis_id = self.read_as(&axis, &out.item());
 
-                let index = self.i_add(ty_id, None, offset, dim_id).unwrap();
+                let index = self.i_add(ty_id, None, offset, axis_id).unwrap();
                 self.mark_uniformity(index, uniform);
                 let index = Variable::Raw(index, out.item());
                 self.load_dyn_metadata(&index, &out, out.item());
             }
-            Metadata::Shape { dim, var } => {
+            Metadata::Shape { axis, var } => {
                 let var = self.compile_variable(var);
-                let dim = self.compile_variable(dim);
+                let axis = self.compile_variable(axis);
                 let out = self.compile_variable(out);
 
                 let ty_id = out.item().id(self);
@@ -63,9 +63,9 @@ impl<T: SpirvTarget> SpirvCompiler<T> {
 
                 let offs_offset = self.metadata.shape_offset_index(pos);
                 let offset = self.load_const_metadata(offs_offset, None, out.item());
-                let dim_id = self.read_as(&dim, &out.item());
+                let axis_id = self.read_as(&axis, &out.item());
 
-                let index = self.i_add(ty_id, None, offset, dim_id).unwrap();
+                let index = self.i_add(ty_id, None, offset, axis_id).unwrap();
                 let index = Variable::Id(index);
                 self.load_dyn_metadata(&index, &out, out.item());
             }
diff --git a/crates/cubecl-std/src/tensor/contiguous/base.rs b/crates/cubecl-std/src/tensor/contiguous/base.rs
index 7646178d9..8ce4c175a 100644
--- a/crates/cubecl-std/src/tensor/contiguous/base.rs
+++ b/crates/cubecl-std/src/tensor/contiguous/base.rs
@@ -15,7 +15,7 @@ use cubecl_core::{
     tensor_line_size_parallel,
 };
 
-pub const NUM_SM_APPROX: u32 = 50;
+pub const SM_COUNT_APPROX: u32 = 50;
 
 /// Returns the offset of the tensor corresponding to the layout tensor.
 #[cube]
@@ -23,15 +23,15 @@ pub fn index_offset_with_layout<N: CubePrimitive, L: CubePrimitive>(
     tensor: &Tensor<Line<N>>,
     layout: &Tensor<Line<L>>,
     offset_layout: usize,
-    dim_start: usize,
-    dim_end: usize,
+    axis_start: usize,
+    axis_end: usize,
     #[comptime] unroll: bool,
 ) -> usize {
     let offset_ref = offset_layout * tensor.line_size();
     let mut offset = 0;
 
     #[unroll(unroll)]
-    for i in dim_start..dim_end {
+    for i in axis_start..axis_end {
         let ogwl = offset_ref / layout.stride(i);
         offset += ogwl % tensor.shape(i) * tensor.stride(i);
     }
@@ -55,10 +55,10 @@ pub fn index_offset_contiguous<N: CubePrimitive>(
 
     #[unroll(unroll)]
     for i in 0..rank {
-        let dim = rank - i - 1;
-        let shape = tensor.shape(dim);
+        let axis = rank - i - 1;
+        let shape = tensor.shape(axis);
         let ogwl = remainder % shape;
-        offset += ogwl * tensor.stride(dim);
+        offset += ogwl * tensor.stride(axis);
         remainder /= shape;
     }
 
@@ -81,10 +81,10 @@ pub fn index_offset_contiguous_fastdivmod(
 
     #[unroll]
     for i in 0..rank {
-        let dim = rank - i - 1;
+        let axis = rank - i - 1;
 
-        let (rem, ogwl) = shape[dim].div_mod(remainder);
-        offset += ogwl * stride[dim];
+        let (rem, ogwl) = shape[axis].div_mod(remainder);
+        offset += ogwl * stride[axis];
         remainder = rem;
     }
 
@@ -160,7 +160,7 @@ fn index_packed<N: Int>(
     tensor: &Tensor<N>,
     pos: usize,
     in_shape: &Sequence<FastDivmod<usize>>,
-    #[comptime] packed_dim: usize,
+    #[comptime] packed_axis: usize,
     #[comptime] packing: usize,
     #[comptime] rank: usize,
 ) -> N {
@@ -179,14 +179,14 @@ fn index_packed<N: Int>(
 
         #[unroll]
         for i in 0..rank {
-            let dim = rank - i - 1;
-            let (rem, mut local_pos) = in_shape[dim].div_mod(remainder);
+            let axis = rank - i - 1;
+            let (rem, mut local_pos) = in_shape[axis].div_mod(remainder);
             remainder = rem;
-            if dim == packed_dim {
+            if axis == packed_axis {
                 packing_offset = local_pos % packing;
                 local_pos /= packing;
             }
-            offset += local_pos * tensor.stride(dim);
+            offset += local_pos * tensor.stride(axis);
         }
         let packed_val = tensor[offset];
         let shift_in = packing_offset * bits_per_elem;
@@ -204,7 +204,7 @@ fn copy_kernel_packed<N: Int>(
     output: &mut Tensor<Line<N>>,
     out_layout: LinearLayout,
     in_shape: Sequence<FastDivmod<usize>>,
-    #[comptime] packed_dim: usize,
+    #[comptime] packed_axis: usize,
     #[comptime] packing: usize,
     #[comptime] rank: usize,
     #[comptime] elems_per_thread: usize,
@@ -230,7 +230,7 @@ fn copy_kernel_packed<N: Int>(
         for k in 0..line_size {
             let offset_input = offset_input + offset + k;
 
-            reg[k] = index_packed(input, offset_input, &in_shape, packed_dim, packing, rank);
+            reg[k] = index_packed(input, offset_input, &in_shape, packed_axis, packing, rank);
         }
         registers[i] = reg;
     }
@@ -254,7 +254,7 @@ fn copy_kernel_packed<N: Int>(
 pub fn into_contiguous_packed<R: Runtime>(
     client: &ComputeClient<R>,
     input: &TensorHandleRef<'_, R>,
-    packed_dim: usize,
+    packed_axis: usize,
     shape: &[usize],
     packing: usize,
     dtype: StorageType,
@@ -274,7 +274,7 @@ pub fn into_contiguous_packed<R: Runtime>(
         client,
         input,
         &output.as_ref(),
-        packed_dim,
+        packed_axis,
         shape,
         packing,
         dtype,
@@ -290,9 +290,9 @@ pub fn copy_gpu_ref<R: Runtime>(
     output: &TensorHandleRef<'_, R>,
     dtype: StorageType,
 ) -> Result<(), LaunchError> {
-    let num_elems: usize = input.shape.iter().product();
+    let elem_count: usize = input.shape.iter().product();
 
-    // Vectorization is only enabled when the last dimension is contiguous.
+    // Line size is only enabled when the last dimension is contiguous.
     let in_rank = input.strides.len();
     let out_rank = output.strides.len();
     let line_size_in = tensor_line_size_parallel(
@@ -309,38 +309,38 @@ pub fn copy_gpu_ref<R: Runtime>(
     );
     let line_size = line_size_in.min(line_size_out);
 
-    let num_vecs = num_elems / line_size as usize;
-    let num_sm = client
+    let vec_count = elem_count / line_size as usize;
+    let sm_count = client
         .properties()
         .hardware
-        .num_streaming_multiprocessors
-        .unwrap_or(NUM_SM_APPROX);
-    let cube_dim = CubeDim::new(client, num_vecs);
-    let simul_vecs = num_sm * cube_dim.num_elems();
-    let mut elems_per_unit = match num_vecs / simul_vecs as usize {
+        .streaming_multiprocessor_count
+        .unwrap_or(SM_COUNT_APPROX);
+    let cube_dim = CubeDim::new(client, vec_count);
+    let simul_vecs = sm_count * cube_dim.num_elems();
+    let mut elems_per_unit = match vec_count / simul_vecs as usize {
         0..2 => 1,
         2..4 => 2,
         4..8 => 4,
         8.. => 8,
     };
 
-    let mut num_elems_per_unit = line_size as usize * elems_per_unit;
+    let mut elems_per_unit_count = line_size as usize * elems_per_unit;
 
-    let last_dim = output.shape[out_rank - 1];
+    let last_axis = output.shape[out_rank - 1];
 
-    // If tensor is strided, elems_per_unit must be compatible with last dim
-    while !last_dim.is_multiple_of(num_elems_per_unit as usize) {
+    // If tensor is strided, elems_per_unit must be compatible with last axis
+    while !last_axis.is_multiple_of(elems_per_unit_count as usize) {
         elems_per_unit /= 2;
-        num_elems_per_unit /= 2;
+        elems_per_unit_count /= 2;
     }
 
     let out_vec = if line_size > 1 {
         line_size
     } else {
-        // Recompute because it needs to account for `num_elems_per_unit`
+        // Recompute because it needs to account for `elems_per_unit_count`
         client
             .io_optimized_line_sizes(&dtype)
-            .filter(|it| num_elems_per_unit.is_multiple_of(*it))
+            .filter(|it| elems_per_unit_count.is_multiple_of(*it))
             .max()
             .unwrap_or(1)
     };
@@ -353,7 +353,7 @@ pub fn copy_gpu_ref<R: Runtime>(
 
     let cube_count = calculate_cube_count_elemwise(
         client,
-        num_elems.div_ceil(num_elems_per_unit as usize),
+        elem_count.div_ceil(elems_per_unit_count as usize),
         cube_dim,
     );
 
@@ -381,47 +381,47 @@ pub fn into_contiguous_packed_ref<R: Runtime>(
     client: &ComputeClient<R>,
     input: &TensorHandleRef<'_, R>,
     output: &TensorHandleRef<'_, R>,
-    packed_dim: usize,
+    packed_axis: usize,
     shape: &[usize],
     packing: usize,
     dtype: StorageType,
 ) -> Result<(), LaunchError> {
-    let num_elems: usize = input.shape.iter().product();
+    let elem_count: usize = input.shape.iter().product();
 
-    // Vectorization is only enabled when the last dimension is contiguous.
+    // Line size is only enabled when the last dimension is contiguous.
     let in_rank = input.strides.len();
     let out_rank = output.strides.len();
-    let in_packed_dim = in_rank - packed_dim - 1;
+    let in_packed_axis = in_rank - packed_axis - 1;
     let line_size = tensor_line_size_parallel(
         client.io_optimized_line_sizes(&dtype),
         output.shape,
         output.strides,
         out_rank - 1,
     );
-    let num_vecs = num_elems / line_size as usize;
-    let num_sm = client
+    let vec_count = elem_count / line_size as usize;
+    let sm_count = client
         .properties()
         .hardware
-        .num_streaming_multiprocessors
-        .unwrap_or(NUM_SM_APPROX);
+        .streaming_multiprocessor_count
+        .unwrap_or(SM_COUNT_APPROX);
 
-    let cube_dim = CubeDim::new(client, num_vecs);
-    let simul_vecs = num_sm * cube_dim.num_elems();
-    let mut elems_per_unit = match num_vecs / simul_vecs as usize {
+    let cube_dim = CubeDim::new(client, vec_count);
+    let simul_vecs = sm_count * cube_dim.num_elems();
+    let mut elems_per_unit = match vec_count / simul_vecs as usize {
         0..2 => 1,
         2..4 => 2,
         4..8 => 4,
         8.. => 8,
     };
 
-    let mut num_elems_per_unit = line_size as usize * elems_per_unit;
+    let mut elems_per_unit_count = line_size as usize * elems_per_unit;
 
-    let last_dim = output.shape[out_rank - 1];
+    let last_axis = output.shape[out_rank - 1];
 
-    // If tensor is strided, elems_per_unit must be compatible with last dim
-    while !last_dim.is_multiple_of(num_elems_per_unit as usize) {
+    // If tensor is strided, elems_per_unit must be compatible with last axis
+    while !last_axis.is_multiple_of(elems_per_unit_count as usize) {
         elems_per_unit /= 2;
-        num_elems_per_unit /= 2;
+        elems_per_unit_count /= 2;
     }
 
     let out_layout = LinearLayoutArgs::from_handle(client, output, line_size);
@@ -431,7 +431,7 @@ pub fn into_contiguous_packed_ref<R: Runtime>(
         .max(output.required_address_type());
     let cube_count = calculate_cube_count_elemwise(
         client,
-        num_elems.div_ceil(num_elems_per_unit as usize),
+        elem_count.div_ceil(elems_per_unit_count as usize),
         cube_dim,
     );
 
@@ -449,7 +449,7 @@ pub fn into_contiguous_packed_ref<R: Runtime>(
         output.as_tensor_arg(line_size),
         out_layout,
         in_shape,
-        in_packed_dim,
+        in_packed_axis,
         packing,
         in_rank,
         elems_per_unit,
diff --git a/crates/cubecl-std/src/tensor/contiguous/launch.rs b/crates/cubecl-std/src/tensor/contiguous/launch.rs
index 23bf55756..bd77eeac0 100644
--- a/crates/cubecl-std/src/tensor/contiguous/launch.rs
+++ b/crates/cubecl-std/src/tensor/contiguous/launch.rs
@@ -9,9 +9,9 @@ pub fn into_contiguous_ref<R: Runtime>(
     input: &TensorHandleRef<'_, R>,
     dtype: StorageType,
 ) -> Result<TensorHandle<R>, LaunchError> {
-    let num_elems: usize = input.shape.iter().product();
+    let elem_count: usize = input.shape.iter().product();
 
-    let handle = client.empty(num_elems * dtype.size());
+    let handle = client.empty(elem_count * dtype.size());
     let output = TensorHandle::new_contiguous(input.shape.to_vec(), handle, dtype);
 
     copy_into(client, input, &output.as_ref(), dtype)?;
@@ -48,7 +48,7 @@ pub fn copy_into<R: Runtime>(
 
     // It's normally faster on all devices, but since it doesn't parallelize on an axis, it
     // might be worst on GPU. Should tune at some point.
-    let is_cpu = client.properties().hardware.num_cpu_cores.is_some();
+    let is_cpu = client.properties().hardware.cpu_core_count.is_some();
     if input.strides[rank - 1] != 1 && is_cpu {
         launch_copy_perpendicular_ref(client, input, output, dtype)?;
     } else {
diff --git a/crates/cubecl-std/src/tensor/contiguous/perpendicular.rs b/crates/cubecl-std/src/tensor/contiguous/perpendicular.rs
index 20cbb17a3..6179696bd 100644
--- a/crates/cubecl-std/src/tensor/contiguous/perpendicular.rs
+++ b/crates/cubecl-std/src/tensor/contiguous/perpendicular.rs
@@ -7,7 +7,7 @@ use cubecl_core::{
 use std::cmp::min;
 
 /// Kernel for converting a non-contiguous tensor into a contiguous one when
-/// the vectorization axis is perpendicular to the last dimension.
+/// the line-size axis is perpendicular to the last dimension.
 ///
 /// This kernel handles the case where memory is laid out such that the unit-stride
 /// is not on the last dimension, requiring a "gather-and-transpose" pattern
@@ -16,13 +16,13 @@ use std::cmp::min;
 fn copy_perpendicular<N: Numeric>(
     input: &Tensor<Line<N>>,
     output: &mut Tensor<Line<N>>,
-    axis_vectorized: usize,
+    axis_lined: usize,
     #[define(N)] _elem: StorageType,
 ) {
     let line_size = input.line_size();
     let last_axis = input.rank() - 1;
 
-    // Calculate how many vectorized lines fit into the last dimension's shape.
+    // Calculate how many line-sized chunks fit into the last axis shape.
     let num_batch = output.shape(last_axis) / line_size;
 
     // Local registers to perform a small in-register transpose.
@@ -34,9 +34,9 @@ fn copy_perpendicular<N: Numeric>(
     }
 
     let channel_input_stride_elem = input.stride(last_axis);
-    let channel_output_stride_elem = output.stride(axis_vectorized);
+    let channel_output_stride_elem = output.stride(axis_lined);
 
-    // Strides adjusted for vectorization (line_size).
+    // Strides adjusted for line size.
     let channel_input_stride = channel_input_stride_elem / line_size;
     let channel_output_stride = channel_output_stride_elem / line_size;
 
@@ -96,8 +96,8 @@ fn copy_perpendicular<N: Numeric>(
 /// Launches the perpendicular contiguous kernel.
 ///
 /// This is used when the input tensor's memory layout is such that the last dimension
-/// is not the one with a stride of 1 (the vectorized dimension). It optimizes
-/// the copy by using hardware vectorization (Lines) and an in-register transpose.
+/// is not the one with a stride of 1 (the line-size axis). It optimizes
+/// the copy by using hardware line sizes (`Line`) and an in-register transpose.
 pub fn launch_into_contiguous_perpendicular<R: Runtime>(
     client: &ComputeClient<R>,
     input: &TensorHandleRef<'_, R>,
@@ -117,8 +117,8 @@ pub fn launch_into_contiguous_perpendicular<R: Runtime>(
 /// Launches the perpendicular contiguous kernel.
 ///
 /// This is used when the input tensor's memory layout is such that the last dimension
-/// is not the one with a stride of 1 (the vectorized dimension). It optimizes
-/// the copy by using hardware vectorization (Lines) and an in-register transpose.
+/// is not the one with a stride of 1 (the line-size axis). It optimizes
+/// the copy by using hardware line sizes (`Line`) and an in-register transpose.
 pub fn launch_copy_perpendicular_ref<R: Runtime>(
     client: &ComputeClient<R>,
     input: &TensorHandleRef<'_, R>,
@@ -149,8 +149,8 @@ pub fn launch_copy_perpendicular_ref<R: Runtime>(
     );
     let line_size = min(line_size_perpendicular, line_size_parallel);
 
-    let num_elems = output.shape.iter().product::<usize>();
-    let working_units = num_elems / (line_size as usize * output.shape[rank - 1]);
+    let elem_count = output.shape.iter().product::<usize>();
+    let working_units = elem_count / (line_size as usize * output.shape[rank - 1]);
     let cube_dim = CubeDim::new(client, working_units);
     let cube_count = calculate_cube_count_elemwise(client, working_units, cube_dim);
     let address_type = input
diff --git a/crates/cubecl-std/src/tensor/identity.rs b/crates/cubecl-std/src/tensor/identity.rs
index 26a843e7c..3e18ad2dc 100644
--- a/crates/cubecl-std/src/tensor/identity.rs
+++ b/crates/cubecl-std/src/tensor/identity.rs
@@ -54,7 +54,7 @@ pub fn launch_ref<R: Runtime>(
         "input should be a square matrix"
     );
 
-    let vectorization_factor = tensor_line_size_parallel(
+    let line_size = tensor_line_size_parallel(
         R::supported_line_sizes().iter().cloned(),
         output.shape,
         output.strides,
@@ -62,7 +62,7 @@ pub fn launch_ref<R: Runtime>(
     );
 
     let cube_dim = CubeDim::new_2d(16, 16);
-    let lines_x = output.shape[1] as u32 / vectorization_factor as u32;
+    let lines_x = output.shape[1] as u32 / line_size as u32;
     let cube_count_x = lines_x.div_ceil(cube_dim.x);
     let cube_count_y = (output.shape[0] as u32).div_ceil(cube_dim.y);
     let cube_count = CubeCount::new_2d(cube_count_x, cube_count_y);
@@ -77,7 +77,7 @@ pub fn launch_ref<R: Runtime>(
                 output.handle,
                 output.strides,
                 output.shape,
-                vectorization_factor,
+                line_size,
                 dtype.size(),
             ),
             ScalarArg::new(output.strides[0] + 1),
diff --git a/crates/cubecl-std/src/tensor/virtual.rs b/crates/cubecl-std/src/tensor/virtual.rs
index 9aa488511..cf1bbb3bc 100644
--- a/crates/cubecl-std/src/tensor/virtual.rs
+++ b/crates/cubecl-std/src/tensor/virtual.rs
@@ -329,9 +329,9 @@ impl<E: Numeric> VirtualTensor<E, ReadWrite> {
 
 #[cube]
 impl<E: Numeric, IO: Clone + 'static> VirtualTensor<E, IO> {
-    pub fn coordinate(&self, index: usize, dim: usize) -> usize {
-        let num_strides = index / self.stride(dim);
-        num_strides % self.shape(dim)
+    pub fn coordinate(&self, index: usize, axis: usize) -> usize {
+        let stride_count = index / self.stride(axis);
+        stride_count % self.shape(axis)
     }
 }
 
diff --git a/crates/cubecl-wgpu/src/compiler/wgsl/base.rs b/crates/cubecl-wgpu/src/compiler/wgsl/base.rs
index a855d6c98..c179e178c 100644
--- a/crates/cubecl-wgpu/src/compiler/wgsl/base.rs
+++ b/crates/cubecl-wgpu/src/compiler/wgsl/base.rs
@@ -251,10 +251,10 @@ impl Item {
     }
 
     pub fn size(&self) -> usize {
-        self.elem().size() * self.vectorization_factor()
+        self.elem().size() * self.line_size()
     }
 
-    pub fn vectorization_factor(&self) -> usize {
+    pub fn line_size(&self) -> usize {
         match self {
             Item::Vec4(_) => 4,
             Item::Vec3(_) => 3,
diff --git a/crates/cubecl-wgpu/src/compiler/wgsl/compiler.rs b/crates/cubecl-wgpu/src/compiler/wgsl/compiler.rs
index 6127419ab..e7eef4585 100644
--- a/crates/cubecl-wgpu/src/compiler/wgsl/compiler.rs
+++ b/crates/cubecl-wgpu/src/compiler/wgsl/compiler.rs
@@ -104,7 +104,7 @@ impl WgslCompiler {
 
         self.strategy = mode;
 
-        let num_meta = value.buffers.len();
+        let meta_count = value.buffers.len();
 
         self.ext_meta_pos = Vec::new();
         let mut num_ext = 0;
@@ -116,7 +116,7 @@ impl WgslCompiler {
             }
         }
 
-        self.metadata = Metadata::new(num_meta as u32, num_ext);
+        self.metadata = Metadata::new(meta_count as u32, num_ext);
 
         let address_type = self.compile_storage_type(address_type);
         let instructions = self.compile_scope(&mut value.body);
@@ -685,21 +685,21 @@ impl WgslCompiler {
                     info_offset: self.compile_variable(offset.into()),
                 }
             }
-            cube::Metadata::Stride { dim, var } => {
+            cube::Metadata::Stride { axis, var } => {
                 let position = self.ext_meta_pos(&var);
                 let offset = self.metadata.stride_offset_index(position);
                 wgsl::Instruction::ExtendedMeta {
                     info_offset: self.compile_variable(offset.into()),
-                    dim: self.compile_variable(dim),
+                    dim: self.compile_variable(axis),
                     out: self.compile_variable(out),
                 }
             }
-            cube::Metadata::Shape { dim, var } => {
+            cube::Metadata::Shape { axis, var } => {
                 let position = self.ext_meta_pos(&var);
                 let offset = self.metadata.shape_offset_index(position);
                 wgsl::Instruction::ExtendedMeta {
                     info_offset: self.compile_variable(offset.into()),
-                    dim: self.compile_variable(dim),
+                    dim: self.compile_variable(axis),
                     out: self.compile_variable(out),
                 }
             }
diff --git a/crates/cubecl-wgpu/src/compiler/wgsl/extension.rs b/crates/cubecl-wgpu/src/compiler/wgsl/extension.rs
index 370514706..022a6f2f5 100644
--- a/crates/cubecl-wgpu/src/compiler/wgsl/extension.rs
+++ b/crates/cubecl-wgpu/src/compiler/wgsl/extension.rs
@@ -150,7 +150,7 @@ pub fn call_safe_tanh(
 }
 
 fn should_use_scalar_powf(rhs: &Variable) -> bool {
-    rhs.is_always_scalar() || rhs.item().vectorization_factor() == 1
+    rhs.is_always_scalar() || rhs.item().line_size() == 1
 }
 
 pub fn call_is_nan(
@@ -174,7 +174,7 @@ pub fn call_is_inf(
 }
 
 fn construct_vectorized_name(base_name: &str, item: Item) -> String {
-    let vec_factor = item.vectorization_factor();
+    let vec_factor = item.line_size();
     let elem = item.elem();
     format!("{base_name}_{vec_factor}_{elem}")
 }
@@ -196,7 +196,7 @@ fn construct_vector(
     output: Item,
 ) -> core::fmt::Result {
     let in_item = inputs[0].item;
-    let vec_factor = output.vectorization_factor();
+    let vec_factor = output.line_size();
     let function_name = construct_vectorized_name(base_name, in_item);
     let primitive_name = construct_primitive_name(primitive_name, *in_item.elem());
     write!(f, "fn {function_name}(")?;
diff --git a/crates/cubecl-wgpu/src/compiler/wgsl/instructions.rs b/crates/cubecl-wgpu/src/compiler/wgsl/instructions.rs
index 83077f095..171979f86 100644
--- a/crates/cubecl-wgpu/src/compiler/wgsl/instructions.rs
+++ b/crates/cubecl-wgpu/src/compiler/wgsl/instructions.rs
@@ -519,7 +519,7 @@ impl Display for Instruction {
                 }
             }
             Instruction::And { lhs, rhs, out } => {
-                let line_size = out.item().vectorization_factor();
+                let line_size = out.item().line_size();
                 if out.is_atomic() {
                     assert_eq!(lhs, out, "Can't use regular and on atomic");
                     writeln!(f, "atomicAnd({out}, {rhs});")
@@ -539,7 +539,7 @@ impl Display for Instruction {
                 }
             }
             Instruction::Or { lhs, rhs, out } => {
-                let line_size = out.item().vectorization_factor();
+                let line_size = out.item().line_size();
                 if out.is_atomic() {
                     assert_eq!(lhs, out, "Can't use regular or on atomic");
                     writeln!(f, "atomicOr({out}, {rhs});")
@@ -749,8 +749,8 @@ impl Display for Instruction {
             Instruction::GreaterEqual { lhs, rhs, out } => comparison(lhs, rhs, out, ">=", f),
             Instruction::NotEqual { lhs, rhs, out } => comparison(lhs, rhs, out, "!=", f),
             Instruction::Assign { input, out } => {
-                let vec_left = out.item().vectorization_factor();
-                let vec_right = input.item().vectorization_factor();
+                let vec_left = out.item().line_size();
+                let vec_right = input.item().line_size();
 
                 if out.elem().is_atomic() {
                     if !input.is_atomic() {
@@ -1087,7 +1087,7 @@ for (var {i}: {i_ty} = {start}; {i} {cmp} {end}; {increment}) {{
                 writeln!(f, "{out} = length({input});")
             }
             Instruction::Normalize { input, out } => {
-                if input.item().vectorization_factor() == 1 {
+                if input.item().line_size() == 1 {
                     // We need a check for vectorization factor 1 here, for compatibility with cuda.
                     // You can almost use sign here, however that does not correctly handle the case for x == 0.0.
                     // Therefore we use normalize with vec2, as there is no way to use a NaN literal in wgsl.
@@ -1101,7 +1101,7 @@ for (var {i}: {i_ty} = {start}; {i} {cmp} {end}; {increment}) {{
             }
             Instruction::Dot { lhs, rhs, out } => {
                 let out = out.fmt_left();
-                if lhs.item().vectorization_factor() == 1 {
+                if lhs.item().line_size() == 1 {
                     writeln!(f, "{out} = {lhs} * {rhs};")
                 } else {
                     writeln!(f, "{out} = dot({lhs}, {rhs});")
@@ -1176,8 +1176,8 @@ fn index(
     len: Option<&Variable>,
 ) -> core::fmt::Result {
     let is_scalar = match lhs {
-        Variable::LocalMut { item, .. } => item.vectorization_factor() == 1,
-        Variable::LocalConst { item, .. } => item.vectorization_factor() == 1,
+        Variable::LocalMut { item, .. } => item.line_size() == 1,
+        Variable::LocalConst { item, .. } => item.line_size() == 1,
         Variable::Constant(..) => true,
         _ => false,
     };
@@ -1288,14 +1288,14 @@ fn index_assign(
                 let item_out = out.item();
                 let lhs = IndexOffset::new(lhs, &offset, 0);
 
-                let vectorization_factor = item_out.vectorization_factor();
-                if vectorization_factor > item_rhs.vectorization_factor() {
+                let line_size = item_out.line_size();
+                if line_size > item_rhs.line_size() {
                     let casting_type = Item::Scalar(*item_out.elem());
-                    write!(f, "{out}[{lhs}] = vec{vectorization_factor}(")?;
-                    for i in 0..vectorization_factor {
+                    write!(f, "{out}[{lhs}] = vec{line_size}(")?;
+                    for i in 0..line_size {
                         f.write_str(&rhs.index(i).fmt_cast(casting_type))?;
 
-                        if i < vectorization_factor - 1 {
+                        if i < line_size - 1 {
                             f.write_str(",")?;
                         }
                     }
diff --git a/crates/cubecl-wgpu/src/runtime.rs b/crates/cubecl-wgpu/src/runtime.rs
index a5bd96707..5bb09d336 100644
--- a/crates/cubecl-wgpu/src/runtime.rs
+++ b/crates/cubecl-wgpu/src/runtime.rs
@@ -244,10 +244,10 @@ pub(crate) fn create_server(setup: WgpuSetup, options: RuntimeOptions) -> WgpuSe
             adapter_limits.max_compute_workgroup_size_y,
             adapter_limits.max_compute_workgroup_size_z,
         ),
-        num_streaming_multiprocessors: None,
-        num_tensor_cores: None,
+        streaming_multiprocessor_count: None,
+        tensor_core_count: None,
         min_tensor_cores_dim: None,
-        num_cpu_cores: None, // TODO: Check if device is CPU.
+        cpu_core_count: None, // TODO: Check if device is CPU.
     };
 
     let mut compilation_options = Default::default();