diff --git a/build_sdk.py b/build_sdk.py index ec81a5527..b1e8ff180 100644 --- a/build_sdk.py +++ b/build_sdk.py @@ -588,6 +588,7 @@ def main() -> None: for arch in KernelArch: arch_str = arch.name.lower() parser.add_argument(f"--toolchain-prefix-{arch_str}", default=arch.c_toolchain(), help=f"C toolchain prefix when compiling for {arch_str}, e.g {arch_str}-none-elf") + parser.add_argument("--experimental-domain-support", action="store_true", help="Enable experimental support for seL4's domain scheduler") args = parser.parse_args() @@ -618,6 +619,11 @@ def main() -> None: else: selected_configs = SUPPORTED_CONFIGS + if args.experimental_domain_support: + for config in selected_configs: + config.kernel_options["KernelNumDomains"] = 256 + config.kernel_options["KernelDomainSchedule"] = Path("domain_schedule.c") + sel4_dir = args.sel4.expanduser() if not sel4_dir.exists(): raise Exception(f"sel4_dir: {sel4_dir} does not exist") diff --git a/docs/manual.md b/docs/manual.md index 434fed1e6..aefa527bd 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -102,6 +102,7 @@ This document attempts to clearly describe all of these terms, however as the co * [notification](#notification) * [interrupt](#irq) * [fault](#fault) +* [domain scheduling](#domain) ## System {#system} @@ -173,6 +174,10 @@ Runnable PDs of the same priority are scheduled in a round-robin manner. The **passive** determines whether the PD is passive. A passive PD will have its scheduling context revoked after initialisation and then bound instead to the PD's notification object. This means the PD will be scheduled on receiving a notification, whereby it will run on the notification's scheduling context. When the PD receives a *protected procedure* by another PD or a *fault* caused by a child PD, the passive PD will run on the scheduling context of the callee. +#### Domain scheduling (experimental) + +If the SDK is built with experimental domain support, the PD can be assigned to a scheduling **domain** in the system description. If a PD is assigned to a domain, then the PD will only be allowed to execute when that domain is active. Which domain is active at any given point in time is determined by the [domain schedule](#domain). + ## Virtual Machine {#vm} A *virtual machine* (VM) is a runtime abstraction for running guest operating systems in Microkit. It is similar @@ -311,6 +316,10 @@ protection domain. The same applies for a virtual machine. This means that whenever a fault is caused by a child, it will be delivered to the parent PD instead of the system fault handler via the `fault` entry point. It is then up to the parent to decide how the fault is handled. +## Domain scheduling (experimental) {#domain} + +Microkit can be built with experimental support for a method of temporally isolating different groups of PDs called domain scheduling. On a Microkit system, only one domain is active at a time, and the kernel alternates between domains according to a round-robin schedule. A domain schedule consists of an ordered list of domains, each with an associated length of time to run. The kernel will then activate a domain for the specified length of time; after that time has passed, it will deactivate that domain and activate the next domain for its length of time, and so on, proceeding through the list until it wraps back to the first domain. PDs are assigned to domains, such that when a certain domain is active, only PDs belonging to that domain will be scheduled to run. + # SDK {#sdk} Microkit is distributed as a software development kit (SDK). @@ -640,6 +649,7 @@ Within the `system` root element the following child elements are supported: * `protection_domain` * `memory_region` * `channel` +* `domain_schedule` (if SDK is built with domain scheduling support) ## `protection_domain` @@ -655,6 +665,7 @@ It supports the following attributes: * `stack_size`: (optional) Number of bytes that will be used for the PD's stack. Must be be between 4KiB and 16MiB and be 4K page-aligned. Defaults to 4KiB. * `smc`: (optional, only on ARM) Allow the PD to give an SMC call for the kernel to perform. Only available when the kernel has been configured with `KernelAllowSMCCalls`. Defaults to false. +* `domain`: (optional, experimental) Specifies the name of the scheduling domain the PD belongs to. Additionally, it supports the following child elements: @@ -749,6 +760,19 @@ The `end` element has the following attributes: The `id` is passed to the PD in the `notified` and `protected` entry points. The `id` should be passed to the `microkit_notify` and `microkit_ppcall` functions. +## `domain_schedule` (experimental) + +The `domain_schedule` element has has a list of up to 256 `domain` child elements. Each child specifies a particular timeslice in the domain schedule and the order of the child elements specifies the order in which the timeslices will be scheduled. A domain may be named more than once in the schedule, in which case the domain will have multiple timeslices in the schedule. + +The `domain` element has the following attributes: + +* `name`: Name of the domain. +* `length`: Length of time the domain will run each time it is active, in milliseconds. + +The `name` attribute of each `domain` element can be referenced in the `domain` attribute of a `protection_domain` element. + +The `domain_schedule` element is only valid if the SDK is built with the `--experimental-domain-support` flag. + # Board Support Packages {#bsps} This chapter describes the board support packages that are available in the SDK. diff --git a/domain_schedule.c b/domain_schedule.c new file mode 100644 index 000000000..332e6eb51 --- /dev/null +++ b/domain_schedule.c @@ -0,0 +1,16 @@ +/* + * Copyright 2017, Data61, CSIRO (ABN 41 687 119 230) + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +/* + * This domain schedule is intended to be filled in via ELF patching + */ + +#include +#include +#include + +dschedule_t ksDomSchedule[256]; +word_t ksDomScheduleLength; diff --git a/tool/microkit/src/main.rs b/tool/microkit/src/main.rs index b27d4561f..ee5346440 100644 --- a/tool/microkit/src/main.rs +++ b/tool/microkit/src/main.rs @@ -72,6 +72,7 @@ const INIT_VSPACE_CAP_ADDRESS: u64 = 3; const IRQ_CONTROL_CAP_ADDRESS: u64 = 4; // Singleton const INIT_ASID_POOL_CAP_ADDRESS: u64 = 6; const SMC_CAP_ADDRESS: u64 = 15; +const DOMAIN_CAP_ADDRESS: u64 = 11; // const ASID_CONTROL_CAP_ADDRESS: u64 = 5; // Singleton // const IO_PORT_CONTROL_CAP_ADDRESS: u64 = 7; // Null on this platform @@ -809,6 +810,7 @@ fn build_system( cap_address_names.insert(INIT_ASID_POOL_CAP_ADDRESS, "ASID Pool: init".to_string()); cap_address_names.insert(IRQ_CONTROL_CAP_ADDRESS, "IRQ Control".to_string()); cap_address_names.insert(SMC_CAP_IDX, "SMC".to_string()); + cap_address_names.insert(DOMAIN_CAP_ADDRESS, "Domain Cap".to_string()); let system_cnode_bits = system_cnode_size.ilog2() as u64; @@ -2584,6 +2586,35 @@ fn build_system( system_invocations.push(tcb_cap_copy_invocation); } + for (pd_idx, pd) in system.protection_domains.iter().enumerate() { + if let Some(domain_id) = pd.domain_id { + system_invocations.push(Invocation::new( + config, + InvocationArgs::DomainSetSet { + domain_set: DOMAIN_CAP_ADDRESS, + domain: domain_id as u8, + tcb: pd_tcb_objs[pd_idx].cap_addr, + }, + )); + if let Some(pd_vm) = &pd.virtual_machine { + for (vm_idx, vm) in virtual_machines.iter().enumerate() { + if pd_vm.name == vm.name { + for vcpu_idx in 0..vm.vcpus.len() { + system_invocations.push(Invocation::new( + config, + InvocationArgs::DomainSetSet { + domain_set: DOMAIN_CAP_ADDRESS, + domain: domain_id as u8, + tcb: vcpu_tcb_objs[vm_idx + vcpu_idx].cap_addr, + }, + )); + } + } + } + } + } + } + // Set VSpace and CSpace let mut pd_set_space_invocation = Invocation::new( config, @@ -3276,6 +3307,7 @@ fn main() -> Result<(), String> { invocations_labels, device_regions: kernel_platform_config.devices, normal_regions: kernel_platform_config.memory, + domain_scheduler: json_str_as_u64(&kernel_config_json, "NUM_DOMAINS")? != 1, }; if let Arch::Aarch64 = kernel_config.arch { @@ -3305,9 +3337,26 @@ fn main() -> Result<(), String> { system_invocation_count_symbol_name: "system_invocation_count", }; - let kernel_elf = ElfFile::from_path(&kernel_elf_path)?; + let mut kernel_elf = ElfFile::from_path(&kernel_elf_path)?; let mut monitor_elf = ElfFile::from_path(&monitor_elf_path)?; + if let Some(domain_schedule) = &system.domain_schedule { + let schedule = &domain_schedule.schedule; + kernel_elf.write_symbol( + "ksDomScheduleLength", + &(schedule.len() as u64).to_le_bytes(), + )?; + + let mut out = Vec::with_capacity(schedule.len() * 16); + + for timeslice in schedule.iter() { + out.extend(timeslice.id.to_le_bytes()); + out.extend(timeslice.length.to_le_bytes()); + } + + kernel_elf.write_symbol("ksDomSchedule", &out)?; + } + if monitor_elf.segments.iter().filter(|s| s.loadable).count() > 1 { eprintln!( "Monitor ({}) has {} segments, it must only have one", diff --git a/tool/microkit/src/sdf.rs b/tool/microkit/src/sdf.rs index cde2ea57a..83b0c9599 100644 --- a/tool/microkit/src/sdf.rs +++ b/tool/microkit/src/sdf.rs @@ -20,6 +20,7 @@ use crate::sel4::{Config, IrqTrigger, PageSize}; use crate::util::str_to_bool; use crate::MAX_PDS; use std::path::{Path, PathBuf}; +use std::collections::HashMap; /// Events that come through entry points (e.g notified or protected) are given an /// identifier that is used as the badge at runtime. @@ -42,6 +43,9 @@ const PD_DEFAULT_STACK_SIZE: u64 = 0x1000; const PD_MIN_STACK_SIZE: u64 = 0x1000; const PD_MAX_STACK_SIZE: u64 = 1024 * 1024 * 16; +/// The maximum length of domain schedule supported by the kernel (in milliseconds) +const DOMAIN_SCHEDULE_MAX_LENGTH: usize = 256; + /// The purpose of this function is to parse an integer that could /// either be in decimal or hex format, unlike the normal parsing /// functionality that the Rust standard library provides. @@ -180,6 +184,8 @@ pub struct ProtectionDomain { pub parent: Option, /// Location in the parsed SDF file text_pos: roxmltree::TextPos, + /// Index into the domain schedule vector if the system is using domain scheduling + pub domain_id: Option, } #[derive(Debug, PartialEq, Eq, Hash)] @@ -224,6 +230,18 @@ impl ExecutionContext for VirtualMachine { } } +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct DomainTimeslice { + pub id: u64, + pub length: u64, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct DomainSchedule { + pub domain_ids: HashMap, + pub schedule: Vec, +} + impl SysMapPerms { fn from_str(s: &str) -> Result { let mut perms = 0; @@ -331,6 +349,7 @@ impl ProtectionDomain { xml_sdf: &XmlSystemDescription, node: &roxmltree::Node, is_child: bool, + domain_schedule: &Option, ) -> Result { let mut attrs = vec![ "name", @@ -342,6 +361,7 @@ impl ProtectionDomain { // The SMC field is only available in certain configurations // but we do the error-checking further down. "smc", + "domain", ]; if is_child { attrs.push("id"); @@ -456,6 +476,27 @@ impl ProtectionDomain { )); } + let mut domain_id = None; + match (domain_schedule, checked_lookup(xml_sdf, node, "domain")) { + (Some(domain_schedule), Ok(domain_name)) => { + domain_id = domain_schedule.domain_ids.get(domain_name); + if domain_id.is_none() { + return Err(format!("Protection domain {} specifies a domain {} that is not in the domain schedule", name, domain_name)); + } + } + (Some(_), _) => { + return Err(format!("System specifies a domain schedule but protection domain {} does not specify a domain", name)) + } + (_, Ok(domain)) => { + if config.domain_scheduler { + return Err(format!("Protection domain {} specifies a domain {} but system does not specify a domain schedule", name, domain)); + } else { + return Err("Assigning PDs to domains is only supported if SDK is built with --experimental-domain-support".to_string()); + } + } + (_, _) => {} + } + let mut maps = Vec::new(); let mut irqs = Vec::new(); let mut setvars: Vec = Vec::new(); @@ -602,9 +643,13 @@ impl ProtectionDomain { kind: SysSetVarKind::Paddr { region }, }) } - "protection_domain" => { - child_pds.push(ProtectionDomain::from_xml(config, xml_sdf, &child, true)?) - } + "protection_domain" => child_pds.push(ProtectionDomain::from_xml( + config, + xml_sdf, + &child, + true, + domain_schedule, + )?), "virtual_machine" => { if virtual_machine.is_some() { return Err(value_error( @@ -656,6 +701,7 @@ impl ProtectionDomain { has_children, parent: None, text_pos: xml_sdf.doc.text_pos_at(node.range().start), + domain_id: domain_id.copied(), }) } } @@ -940,6 +986,74 @@ impl Channel { } } +impl DomainSchedule { + fn from_xml( + xml_sdf: &XmlSystemDescription, + node: &roxmltree::Node, + ) -> Result { + let pos = xml_sdf.doc.text_pos_at(node.range().start); + + check_attributes(xml_sdf, node, &[])?; + + let mut next_domain_id = 0; + let mut domain_ids = HashMap::new(); + let mut schedule = Vec::new(); + for child in node.children() { + if !child.is_element() { + continue; + } + + let child_name = child.tag_name().name(); + if child_name != "domain" { + return Err(format!( + "Error: invalid XML element '{}': {}", + child_name, + loc_string(xml_sdf, pos) + )); + } + + if schedule.len() == DOMAIN_SCHEDULE_MAX_LENGTH { + return Err(format!( + "Error: length of domain schedule exceeds maximum of {}: {}", + DOMAIN_SCHEDULE_MAX_LENGTH, + loc_string(xml_sdf, pos) + )); + } + + check_attributes(xml_sdf, &child, &["name", "length"])?; + let name = checked_lookup(xml_sdf, &child, "name")?; + let length = checked_lookup(xml_sdf, &child, "length")?.parse::(); + if length.is_err() { + return Err(format!( + "Error: invalid domain timeslice length '{}': {}", + name, + loc_string(xml_sdf, pos) + )); + } + + let id = match domain_ids.get(name) { + Some(&id) => id, + None => { + let id = next_domain_id; + next_domain_id += 1; + domain_ids.insert(name.to_string(), id); + id + } + }; + + schedule.push(DomainTimeslice { + id, + length: length.unwrap(), + }); + } + + Ok(DomainSchedule { + domain_ids, + schedule, + }) + } +} + struct XmlSystemDescription<'a> { filename: &'a str, doc: &'a roxmltree::Document<'a>, @@ -947,6 +1061,7 @@ struct XmlSystemDescription<'a> { #[derive(Debug)] pub struct SystemDescription { + pub domain_schedule: Option, pub protection_domains: Vec, pub memory_regions: Vec, pub channels: Vec, @@ -1175,6 +1290,7 @@ pub fn parse(filename: &str, xml: &str, config: &Config) -> Result Result Result { - root_pds.push(ProtectionDomain::from_xml(config, &xml_sdf, &child, false)?) - } + "protection_domain" => root_pds.push(ProtectionDomain::from_xml( + config, + &xml_sdf, + &child, + false, + &domain_schedule, + )?), "channel" => channel_nodes.push(child), "memory_region" => mrs.push(SysMemoryRegion::from_xml(config, &xml_sdf, &child)?), "virtual_machine" => { @@ -1211,7 +1341,13 @@ pub fn parse(filename: &str, xml: &str, config: &Config) -> Result { + if !config.domain_scheduler { + let pos = xml_sdf.doc.text_pos_at(child.range().start); + return Err(format!("Domain schedule is only supported if SDK is built with --experimental-domain-support: {}", loc_string(&xml_sdf, pos))); + } + }, _ => { let pos = xml_sdf.doc.text_pos_at(child.range().start); return Err(format!( @@ -1446,6 +1582,7 @@ pub fn parse(filename: &str, xml: &str, config: &Config) -> Result, pub normal_regions: Vec, + pub domain_scheduler: bool, } impl Config { @@ -1062,6 +1063,15 @@ impl Invocation { arg_strs.push(Invocation::fmt_field_cap("tcb", tcb, cap_lookup)); (vcpu, &cap_lookup[&vcpu]) } + InvocationArgs::DomainSetSet { + domain_set, + domain, + tcb, + } => { + arg_strs.push(Invocation::fmt_field("domain", domain as u64)); + arg_strs.push(Invocation::fmt_field_cap("tcb", tcb, cap_lookup)); + (domain_set, cap_lookup.get(&domain_set).unwrap().as_str()) + } }; _ = writeln!( f, @@ -1097,6 +1107,7 @@ impl Invocation { InvocationLabel::CNodeCopy | InvocationLabel::CNodeMint => "CNode", InvocationLabel::SchedControlConfigureFlags => "SchedControl", InvocationLabel::ARMVCPUSetTCB => "VCPU", + InvocationLabel::DomainSetSet => "DomainSet", _ => panic!( "Internal error: unexpected label when getting object type '{:?}'", self.label @@ -1125,6 +1136,7 @@ impl Invocation { InvocationLabel::CNodeMint => "Mint", InvocationLabel::SchedControlConfigureFlags => "ConfigureFlags", InvocationLabel::ARMVCPUSetTCB => "VCPUSetTcb", + InvocationLabel::DomainSetSet => "Set", _ => panic!( "Internal error: unexpected label when getting method name '{:?}'", self.label @@ -1166,6 +1178,7 @@ impl InvocationArgs { InvocationLabel::SchedControlConfigureFlags } InvocationArgs::ArmVcpuSetTcb { .. } => InvocationLabel::ARMVCPUSetTCB, + InvocationArgs::DomainSetSet { .. } => InvocationLabel::DomainSetSet, } } @@ -1317,6 +1330,11 @@ impl InvocationArgs { vec![sched_context], ), InvocationArgs::ArmVcpuSetTcb { vcpu, tcb } => (vcpu, vec![], vec![tcb]), + InvocationArgs::DomainSetSet { + domain_set, + domain, + tcb, + } => (domain_set, vec![domain as u64], vec![tcb]), } } } @@ -1430,4 +1448,9 @@ pub enum InvocationArgs { vcpu: u64, tcb: u64, }, + DomainSetSet { + domain_set: u64, + domain: u8, + tcb: u64, + }, } diff --git a/tool/microkit/tests/test.rs b/tool/microkit/tests/test.rs index 24be4fb17..762071701 100644 --- a/tool/microkit/tests/test.rs +++ b/tool/microkit/tests/test.rs @@ -26,6 +26,7 @@ const DEFAULT_KERNEL_CONFIG: sel4::Config = sel4::Config { invocations_labels: json!(null), device_regions: vec![], normal_regions: vec![], + domain_scheduler: false, }; fn check_error(test_name: &str, expected_err: &str) {