diff --git a/alioth-cli/src/boot.rs b/alioth-cli/src/boot.rs index f34f6acf..8204bb8b 100644 --- a/alioth-cli/src/boot.rs +++ b/alioth-cli/src/boot.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::ffi::CString; use std::path::{Path, PathBuf}; -use alioth::board::BoardConfig; +use alioth::board::{BoardConfig, CpuConfig}; #[cfg(target_arch = "x86_64")] use alioth::device::fw_cfg::FwCfgItemParam; use alioth::errors::{DebugTrace, trace_error}; @@ -184,10 +184,15 @@ pub struct BootArgs { #[arg(short, long, value_name = "PATH")] initramfs: Option>, - /// Number of VCPUs assigned to the guest. + /// DEPRECATED: Use --cpu instead. #[arg(long, default_value_t = 1)] num_cpu: u16, + #[arg(short('p'), long, help( + help_text::("Configure the VCPUs of the guest.") + ))] + cpu: Option>, + /// DEPRECATED: Use --memory instead. #[arg(long, default_value = "1G")] mem_size: String, @@ -379,9 +384,18 @@ pub fn boot(args: BootArgs) -> Result<(), Error> { ..Default::default() } }; + let cpu_config = if let Some(s) = args.cpu { + serde_aco::from_args(&s, &objects).context(error::ParseArg { arg: s })? + } else { + eprintln!("Please update the cmd line to --cpu count={}", args.num_cpu); + CpuConfig { + count: args.num_cpu, + ..Default::default() + } + }; let board_config = BoardConfig { mem: mem_config, - num_cpu: args.num_cpu, + cpu: cpu_config, coco, }; let vm = Machine::new(hypervisor, board_config).context(error::CreateVm)?; diff --git a/alioth/src/arch/aarch64/reg.rs b/alioth/src/arch/aarch64/reg.rs index 27a21e4d..9c259fb8 100644 --- a/alioth/src/arch/aarch64/reg.rs +++ b/alioth/src/arch/aarch64/reg.rs @@ -174,10 +174,10 @@ bitfield! { #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] pub struct MpidrEl1(u64); impl Debug; - pub aff3, set_aff3: 39, 32; + pub u8, aff3, set_aff3: 39, 32; pub u, set_u: 30; pub mt, set_mt: 24; - pub aff2, set_aff2: 23, 16; - pub aff1, set_aff1: 15, 8; - pub aff0, set_aff0: 7, 0; + pub u8, aff2, set_aff2: 23, 16; + pub u8, aff1, set_aff1: 15, 8; + pub u8, aff0, set_aff0: 7, 0; } diff --git a/alioth/src/board/board.rs b/alioth/src/board/board.rs index 6e1a427f..34a47ebe 100644 --- a/alioth/src/board/board.rs +++ b/alioth/src/board/board.rs @@ -28,6 +28,8 @@ use std::thread::JoinHandle; use libc::{MAP_PRIVATE, MAP_SHARED}; use parking_lot::{Condvar, Mutex, RwLock, RwLockReadGuard}; +use serde::Deserialize; +use serde_aco::Help; use snafu::{ResultExt, Snafu}; #[cfg(target_arch = "x86_64")] @@ -68,6 +70,8 @@ pub enum Error { Memory { source: Box }, #[snafu(display("Failed to load payload"), context(false))] Loader { source: Box }, + #[snafu(display("Invalid CPU topology"))] + InvalidCpuTopology, #[snafu(display("Failed to create VCPU-{index}"))] CreateVcpu { index: u16, @@ -97,6 +101,61 @@ pub enum Error { type Result = std::result::Result; +#[derive(Clone, Copy, Debug, PartialEq, Deserialize, Default, Help)] +pub struct CpuTopology { + #[serde(default)] + /// Enable SMT (Hyperthreading). + pub smt: bool, + #[serde(default)] + /// Number of cores per socket. + pub cores: u16, + #[serde(default)] + /// Number of sockets. + pub sockets: u8, +} + +impl CpuTopology { + pub fn encode(&self, index: u16) -> (u8, u16, u8) { + let total_cores = self.cores * self.sockets as u16; + let thread_id = index / total_cores; + let core_id = index % total_cores % self.cores; + let socket_id = index % total_cores / self.cores; + (thread_id as u8, core_id, socket_id as u8) + } +} + +const fn default_cpu_count() -> u16 { + 1 +} + +#[derive(Debug, Deserialize, Default, Help)] +pub struct CpuConfig { + /// Number of VCPUs assigned to the guest. [default: 1] + #[serde(default = "default_cpu_count")] + pub count: u16, + /// Architecture specific CPU topology. + #[serde(default)] + pub topology: CpuTopology, +} + +impl CpuConfig { + pub fn fixup(&mut self) -> Result<()> { + if self.topology.sockets == 0 { + self.topology.sockets = 1; + } + let vcpus_per_core = 1 + self.topology.smt as u16; + if self.topology.cores == 0 { + self.topology.cores = self.count / self.topology.sockets as u16 / vcpus_per_core; + } + let vcpus_per_socket = self.topology.cores * vcpus_per_core; + let count = self.topology.sockets as u16 * vcpus_per_socket; + if count != self.count { + return error::InvalidCpuTopology.fail(); + } + Ok(()) + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum BoardState { Created, @@ -116,7 +175,7 @@ pub const PCIE_MMIO_64_SIZE: u64 = 1 << 40; pub struct BoardConfig { pub mem: MemConfig, - pub num_cpu: u16, + pub cpu: CpuConfig, pub coco: Option, } @@ -124,6 +183,10 @@ impl BoardConfig { pub fn pcie_mmio_64_start(&self) -> u64 { (self.mem.size.saturating_sub(RAM_32_SIZE) + MEM_64_START).next_power_of_two() } + + pub fn config_fixup(&mut self) -> Result<()> { + self.cpu.fixup() + } } type VcpuGuard<'a> = RwLockReadGuard<'a, Vec<(JoinHandle>, Sender<()>)>>; @@ -444,3 +507,7 @@ where Ok(pages) } } + +#[cfg(test)] +#[path = "board_test.rs"] +mod tests; diff --git a/alioth/src/board/board_aarch64.rs b/alioth/src/board/board_aarch64.rs index c3e5da3e..637e15f0 100644 --- a/alioth/src/board/board_aarch64.rs +++ b/alioth/src/board/board_aarch64.rs @@ -24,7 +24,7 @@ use crate::arch::layout::{ RAM_32_SIZE, RAM_32_START, }; use crate::arch::reg::MpidrEl1; -use crate::board::{Board, BoardConfig, PCIE_MMIO_64_SIZE, Result, VcpuGuard}; +use crate::board::{Board, BoardConfig, CpuTopology, PCIE_MMIO_64_SIZE, Result, VcpuGuard}; use crate::firmware::dt::{DeviceTree, Node, PropVal}; use crate::hv::{GicV2, GicV2m, GicV3, Hypervisor, Its, Vcpu, Vm}; use crate::loader::{Executable, InitState, Payload}; @@ -59,7 +59,7 @@ impl ArchBoard { where H: Hypervisor, { - let gic = match vm.create_gic_v3(GIC_DIST_START, GIC_V3_REDIST_START, config.num_cpu) { + let gic = match vm.create_gic_v3(GIC_DIST_START, GIC_V3_REDIST_START, config.cpu.count) { Ok(v3) => Gic::V3(v3), Err(e) => { log::error!("Cannot create GIC v3: {e:?}trying v2..."); @@ -91,22 +91,59 @@ impl ArchBoard { } } -fn encode_mpidr(index: u16) -> MpidrEl1 { +fn encode_mpidr(topology: &CpuTopology, index: u16) -> MpidrEl1 { + let (thread_id, core_id, socket_id) = topology.encode(index); let mut mpidr = MpidrEl1(0); - let index = index as u64; - mpidr.set_aff0(index & 0xf); - mpidr.set_aff1(index >> 4); - mpidr.set_aff2(index >> 12); - mpidr.set_aff3(index >> 20); + mpidr.set_aff0(thread_id); + mpidr.set_aff1(core_id as u8); + mpidr.set_aff2(socket_id); mpidr } +fn dt_cpu_node(topology: &CpuTopology, socket: u8, core: u16) -> Node { + let mut mpidr = MpidrEl1(0); + mpidr.set_aff1(core as u8); + mpidr.set_aff2(socket); + if topology.smt { + Node { + props: HashMap::new(), + nodes: vec![ + ( + "thread0".to_owned(), + Node { + props: HashMap::from([( + "cpu", + PropVal::PHandle(PHANDLE_CPU | mpidr.0 as u32), + )]), + nodes: Vec::new(), + }, + ), + ( + "thread1".to_owned(), + Node { + props: HashMap::from([( + "cpu", + PropVal::PHandle(PHANDLE_CPU | mpidr.0 as u32 | 1), + )]), + nodes: Vec::new(), + }, + ), + ], + } + } else { + Node { + nodes: Vec::new(), + props: HashMap::from([("cpu", PropVal::PHandle(PHANDLE_CPU | mpidr.0 as u32))]), + } + } +} + impl Board where V: Vm, { pub fn encode_cpu_identity(&self, index: u16) -> u64 { - encode_mpidr(index).0 + encode_mpidr(&self.config.cpu.topology, index).0 } pub fn setup_firmware(&self, _: &Path, _: &Payload) -> Result { @@ -222,7 +259,7 @@ where } pub fn create_cpu_nodes(&self, root: &mut Node) { - let mut cpu_nodes: Vec<_> = (0..(self.config.num_cpu)) + let mut cpu_nodes: Vec<_> = (0..(self.config.cpu.count)) .map(|index| { let mpidr = self.encode_cpu_identity(index); ( @@ -233,42 +270,44 @@ where ("compatible", PropVal::Str("arm,arm-v8")), ("enable-method", PropVal::Str("psci")), ("reg", PropVal::U64(mpidr)), - ("phandle", PropVal::PHandle(PHANDLE_CPU | index as u32)), + ("phandle", PropVal::PHandle(PHANDLE_CPU | mpidr as u32)), ]), nodes: Vec::new(), }, ) }) .collect(); - let cores = (0..(self.config.num_cpu)) - .map(|index| { - ( - format!("core{index}"), - Node { - props: HashMap::from([( - "cpu", - PropVal::PHandle(PHANDLE_CPU | index as u32), - )]), - nodes: Vec::new(), - }, - ) - }) - .collect(); + let cpu_map = Node { props: HashMap::new(), - nodes: vec![( - "socket0".to_owned(), - Node { - props: HashMap::new(), - nodes: vec![( - "cluster0".to_owned(), + nodes: (0..self.config.cpu.topology.sockets) + .map(|socket| { + ( + format!("socket{socket}",), Node { props: HashMap::new(), - nodes: cores, + nodes: vec![( + "cluster0".to_owned(), + Node { + props: HashMap::new(), + nodes: (0..self.config.cpu.topology.cores) + .map(|core| { + ( + format!("core{core}"), + dt_cpu_node( + &self.config.cpu.topology, + socket, + core, + ), + ) + }) + .collect(), + }, + )], }, - )], - }, - )], + ) + }) + .collect(), }; cpu_nodes.push(("cpu-map".to_owned(), cpu_map)); let cpus = Node { @@ -335,7 +374,7 @@ where let ppi = 1; let level_trigger = 4; let cpu_mask = match self.arch.gic { - Gic::V2(_) => (1 << self.config.num_cpu) - 1, + Gic::V2(_) => (1 << self.config.cpu.count) - 1, Gic::V3 { .. } => 0, }; for pin in irq_pins { @@ -422,7 +461,7 @@ where GIC_DIST_START, 64 << 10, GIC_V3_REDIST_START, - self.config.num_cpu as u64 * (128 << 10), + self.config.cpu.count as u64 * (128 << 10), ]), ), ("phandle", PropVal::U32(PHANDLE_GIC)), diff --git a/alioth/src/board/board_aarch64_test.rs b/alioth/src/board/board_aarch64_test.rs index f0651032..4e61a89e 100644 --- a/alioth/src/board/board_aarch64_test.rs +++ b/alioth/src/board/board_aarch64_test.rs @@ -15,12 +15,13 @@ use rstest::rstest; use crate::arch::reg::MpidrEl1; +use crate::board::CpuTopology; use crate::board::aarch64::encode_mpidr; #[rstest] -#[case(1, 1)] -#[case(8, 8)] -#[case(23, (1 << 8) | 7)] -fn test_encode_mpidr(#[case] index: u16, #[case] mpidr: u64) { - assert_eq!(encode_mpidr(index), MpidrEl1(mpidr)); +#[case(CpuTopology{smt: false, cores: 1, sockets: 1}, 1, 1)] +#[case(CpuTopology{smt: true, cores: 8, sockets: 1}, 8, 1)] +#[case(CpuTopology{smt: true, cores: 8, sockets: 4}, 45, (1 << 16) | (5 << 8) | 1)] +fn test_encode_mpidr(#[case] topology: CpuTopology, #[case] index: u16, #[case] mpidr: u64) { + assert_eq!(encode_mpidr(&topology, index), MpidrEl1(mpidr)); } diff --git a/alioth/src/board/board_test.rs b/alioth/src/board/board_test.rs new file mode 100644 index 00000000..6aaac5a4 --- /dev/null +++ b/alioth/src/board/board_test.rs @@ -0,0 +1,47 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use assert_matches::assert_matches; + +use crate::board::{CpuConfig, CpuTopology, Error}; + +#[test] +fn test_cpu_topology_fixup() { + let mut empty = CpuConfig { + count: 2, + topology: CpuTopology::default(), + }; + empty.fixup().unwrap(); + assert_matches!( + empty, + CpuConfig { + count: 2, + topology: CpuTopology { + smt: false, + cores: 2, + sockets: 1 + } + } + ); + + let mut invalid = CpuConfig { + count: 2, + topology: CpuTopology { + smt: true, + cores: 2, + sockets: 1, + }, + }; + assert_matches!(invalid.fixup(), Err(Error::InvalidCpuTopology { .. })) +} diff --git a/alioth/src/board/board_x86_64.rs b/alioth/src/board/board_x86_64.rs index 16749e2d..76c6e474 100644 --- a/alioth/src/board/board_x86_64.rs +++ b/alioth/src/board/board_x86_64.rs @@ -33,7 +33,7 @@ use crate::arch::layout::{ use crate::arch::msr::{IA32_MISC_ENABLE, MiscEnable}; use crate::arch::reg::{Reg, SegAccess, SegReg, SegRegVal}; use crate::arch::sev::SnpPageType; -use crate::board::{Board, BoardConfig, PCIE_MMIO_64_SIZE, Result, VcpuGuard, error}; +use crate::board::{Board, BoardConfig, CpuTopology, PCIE_MMIO_64_SIZE, Result, VcpuGuard, error}; use crate::firmware::acpi::bindings::{ AcpiTableFadt, AcpiTableHeader, AcpiTableRsdp, AcpiTableXsdt3, }; @@ -53,12 +53,63 @@ pub struct ArchBoard { _phantom: PhantomData, } +fn add_topology(cpuids: &mut HashMap, func: u32, levels: &[(u8, u16)]) { + let edx = 0; // patched later in init_vcpu() + for (index, (level, count)) in levels.iter().chain(&[(0, 0)]).enumerate() { + let eax = count.next_power_of_two().trailing_zeros(); + let ebx = *count as u32; + let ecx = ((*level as u32) << 8) | (index as u32); + cpuids.insert( + CpuidIn { + func, + index: Some(index as u32), + }, + CpuidResult { eax, ebx, ecx, edx }, + ); + } +} + impl ArchBoard { pub fn new(hv: &H, _vm: &V, config: &BoardConfig) -> Result where H: Hypervisor, { let mut cpuids = hv.get_supported_cpuids()?; + + let threads_per_core = 1 + config.cpu.topology.smt as u16; + let threads_per_socket = config.cpu.topology.cores * threads_per_core; + + add_topology( + &mut cpuids, + 0xb, + &[(1, threads_per_core), (2, threads_per_socket)], + ); + let leaf0 = CpuidIn { + func: 0, + index: None, + }; + if let Some(func0) = cpuids.get(&leaf0) { + let vendor = [func0.ebx, func0.edx, func0.ecx]; + match vendor.as_bytes() { + b"GenuineIntel" => add_topology( + &mut cpuids, + 0x1f, + &[(1, threads_per_core), (2, threads_per_socket)], + ), + b"AuthenticAMD" => add_topology( + &mut cpuids, + 0x8000_0026, + &[ + (1, threads_per_core), + (2, threads_per_socket), + (3, threads_per_socket), + (4, threads_per_socket), + ], + ), + _ => {} + } + } + for (in_, out) in &mut cpuids { if in_.func == 0x1 { out.ecx |= (1 << 24) | (1 << 31); @@ -81,11 +132,18 @@ impl ArchBoard { } } } - let highest = unsafe { __cpuid(0x8000_0000) }.eax; + let leaf_8000_0000 = unsafe { __cpuid(0x8000_0000) }; + cpuids.insert( + CpuidIn { + func: 0x8000_0000, + index: None, + }, + leaf_8000_0000, + ); // 0x8000_0002 to 0x8000_0004: processor name // 0x8000_0005: L1 cache/LTB // 0x8000_0006: L2 cache/TLB and L3 cache - for func in 0x8000_0002..=std::cmp::min(highest, 0x8000_0006) { + for func in 0x8000_0002..=0x8000_0006 { let host_cpuid = unsafe { __cpuid(func) }; cpuids.insert(CpuidIn { func, index: None }, host_cpuid); } @@ -97,12 +155,24 @@ impl ArchBoard { } } +fn encode_x2apic_id(topology: &CpuTopology, index: u16) -> u32 { + let (thread_id, core_id, socket_id) = topology.encode(index); + + let thread_width = topology.smt as u32; + let cores_per_socket = topology.cores as u32; + let core_width = cores_per_socket.next_power_of_two().trailing_zeros(); + + (socket_id as u32) << (core_width + thread_width) + | (core_id as u32) << thread_width + | (thread_id as u32) +} + impl Board where V: Vm, { pub fn encode_cpu_identity(&self, index: u16) -> u64 { - index as u64 + encode_x2apic_id(&self.config.cpu.topology, index) as u64 } fn fill_snp_cpuid(&self, entries: &mut [SnpCpuidFunc]) { @@ -271,7 +341,7 @@ where if in_.func == 0x1 { out.ebx &= 0x00ff_ffff; out.ebx |= apic_id << 24; - } else if in_.func == 0xb || in_.func == 0x1f { + } else if in_.func == 0xb || in_.func == 0x1f || in_.func == 0x80000026 { out.edx = apic_id; } } @@ -405,7 +475,7 @@ where let offset_madt = offset_fadt + size_of_val(&fadt); debug_assert_eq!(offset_madt % 4, 0); - let apic_ids: Vec = (0..self.config.num_cpu) + let apic_ids: Vec = (0..self.config.cpu.count) .map(|index| self.encode_cpu_identity(index) as u32) .collect(); let (madt, madt_ioapic, madt_apics) = create_madt(&apic_ids); @@ -587,3 +657,7 @@ pub struct SnpCpuidInfo { pub _reserved2: u64, pub entries: [SnpCpuidFunc; 64], } + +#[cfg(test)] +#[path = "board_x86_64_test.rs"] +mod tests; diff --git a/alioth/src/board/board_x86_64_test.rs b/alioth/src/board/board_x86_64_test.rs new file mode 100644 index 00000000..1f5af7d9 --- /dev/null +++ b/alioth/src/board/board_x86_64_test.rs @@ -0,0 +1,32 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use rstest::rstest; + +use crate::board::CpuTopology; +use crate::board::x86_64::encode_x2apic_id; + +#[rstest] +#[case(CpuTopology{smt: false, cores: 1, sockets: 1}, 0, 0)] +#[case(CpuTopology{smt: true, cores: 2, sockets: 1}, 0, 0)] +#[case(CpuTopology{smt: true, cores: 2, sockets: 1}, 1, 2)] +#[case(CpuTopology{smt: true, cores: 2, sockets: 1}, 2, 1)] +#[case(CpuTopology{smt: true, cores: 2, sockets: 1}, 3, 3)] +#[case(CpuTopology{smt: true, cores: 6, sockets: 2}, 4, 8)] +#[case(CpuTopology{smt: true, cores: 6, sockets: 2}, 11, 26)] +#[case(CpuTopology{smt: true, cores: 6, sockets: 2}, 14, 5)] +#[case(CpuTopology{smt: true, cores: 6, sockets: 2}, 23, 27)] +fn test_encode_x2apic(#[case] topology: CpuTopology, #[case] index: u16, #[case] x2apic: u32) { + assert_eq!(encode_x2apic_id(&topology, index), x2apic) +} diff --git a/alioth/src/vm.rs b/alioth/src/vm.rs index 6f65e94d..cd0bc34b 100644 --- a/alioth/src/vm.rs +++ b/alioth/src/vm.rs @@ -123,7 +123,9 @@ impl Machine where H: Hypervisor + 'static, { - pub fn new(hv: H, config: BoardConfig) -> Result { + pub fn new(hv: H, mut config: BoardConfig) -> Result { + config.config_fixup()?; + let vm_config = VmConfig { coco: config.coco.clone(), }; @@ -137,7 +139,7 @@ where let (event_tx, event_rx) = mpsc::channel(); let mut vcpus = board.vcpus.write(); - for index in 0..board.config.num_cpu { + for index in 0..board.config.cpu.count { let (boot_tx, boot_rx) = mpsc::channel(); let event_tx = event_tx.clone(); let board = board.clone();