From 00ea6e83b0f104be386dbcad2b3adf43de3a5eb9 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 9 Dec 2023 11:29:32 +0100 Subject: [PATCH 001/114] feat: added Generic PVH boot --- sys/arch/amd64/amd64/genassym.cf | 1 + sys/arch/amd64/amd64/locore.S | 78 +++++++++++++++++++++++++++--- sys/arch/amd64/amd64/machdep.c | 2 +- sys/arch/x86/acpi/acpi_machdep.c | 2 +- sys/arch/x86/include/cpu.h | 1 + sys/arch/x86/x86/consinit.c | 3 +- sys/arch/x86/x86/identcpu.c | 2 + sys/arch/x86/x86/pmap.c | 2 +- sys/arch/x86/x86/x86_autoconf.c | 2 +- sys/arch/x86/x86/x86_machdep.c | 2 +- sys/arch/xen/x86/pvh_consinit.c | 4 ++ sys/arch/xen/x86/xen_mainbus.c | 7 ++- sys/arch/xen/xen/hypervisor.c | 56 +++++++++++++-------- sys/arch/xen/xen/shutdown_xenbus.c | 5 +- 14 files changed, 129 insertions(+), 38 deletions(-) diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index b5c7330a22963..3512148a6acbe 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -373,6 +373,7 @@ define BST_TYPE offsetof(struct bus_space_tag, bst_type) define VM_GUEST_XENPV VM_GUEST_XENPV define VM_GUEST_XENPVH VM_GUEST_XENPVH +define VM_GUEST_GENPVH VM_GUEST_GENPVH ifdef XEN define CPU_INFO_VCPU offsetof(struct cpu_info, ci_vcpu) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 089c7388e18c0..fef2e34ddcc79 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -282,7 +282,7 @@ #include #define ELFNOTE(name, type, desctype, descdata...) \ -.pushsection .note.name ; \ +.pushsection .note.name, "a", @note ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ @@ -973,7 +973,6 @@ longmode_hi: movl %eax,_C_LABEL(cpuid_level) movl $VM_GUEST_XENPV, _C_LABEL(vm_guest) - /* * Initialize cpu_info_primary.ci_self := &cpu_info_primary, * and initialize some MSRs with @@ -1058,16 +1057,78 @@ ENTRY(start_xen32) rep stosb +/* + * https://github.com/Xilinx/xen/blob/master/xen/include/public/arch-x86/hvm/start_info.h + */ + + /* copy start_info from its current location to end of kernel */ + movl $RELOC(_end), %edi + movl $PAGE_SIZE, %ecx + movl %ebx, %esi + rep movsb /* copy %esi to %edi byte per byte %ecx times */ + + movl $RELOC(_end), %ebx /* now %ebx holds what's expected */ + + /* map memmap entries at the end of start_info */ + movl %ebx, %edx /* after kernel */ + addl $40, %edx /* memmap physical address */ + movl (%edx), %esi /* address of memmap entries */ + addl $8, %edx /* memmap_entries */ + movl (%edx), %ecx /* counter */ + movl $23, %eax /* struct hvm_memmap_table_entry is 24 bytes long */ + movl %ecx, %edx +mul_loop: /* 24 bytes * memmap_entries */ + addl %edx, %ecx + subl $1, %eax + jnz mul_loop + + pushl %ecx /* save count as offset */ + + movl %ebx, %edi /* to the beginning */ + addl $64, %edi /* destination end of struct */ + + rep movsb + + /* point hvm_memmap_table_entry to the end of ebx */ + movl %ebx, %edx + addl $40, %edx /* memmap_paddr */ + movl %ebx, %eax + addl $64, %eax /* aligned end of hvm_start_info */ + movl %eax, (%edx) /* update memmap_paddr */ + + /* copy command line */ + movl %ebx, %edx + addl $24, %edx /* cmdline address */ + movl (%edx), %esi + movl %ebx, %edi + addl $64, %edi /* end of hvm_start_info */ + popl %ecx + addl %ecx, %edi /* add size of memmap table */ + pushl %edi /* save new cmdline position */ +cmdcpy: + movb (%esi), %al + movb %al, (%edi) + addl $1, %esi + addl $1, %edi + cmpl $0, (%esi) /* NULL terminated */ + jne cmdcpy + movb $0, (%edi) + + popl (%edx) /* point to new location */ + /* * save addr of the hvm_start_info structure. This is also the end * of the symbol table */ - movl %ebx, RELOC(hvm_start_paddr) + movl %ebx, RELOC(hvm_start_paddr) /* copy start_info addr to hvm_start_paddr */ + /* below is the build of esym based on the false asumption ebx == end of esym */ movl %ebx, %eax - addl $KERNBASE_LO,%eax - movl $RELOC(esym),%ebp - movl %eax,(%ebp) - movl $KERNBASE_HI,4(%ebp) + addl $KERNBASE_LO,%eax /* 0x80000000 + ebx */ + /* for further mapping */ + movl $RELOC(esym),%ebp /* address of esym in ebp */ + movl %eax,(%ebp) /* esym points to %ebx assuming it's the end of esym but it's not */ + movl $KERNBASE_HI,4(%ebp) /* ffffffff80000000 + ebx, make it 64 bits */ + /* get a page for HYPERVISOR_shared_info */ addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx @@ -1085,7 +1146,8 @@ ENTRY(start_xen32) movl $KERNBASE_HI,4(%ebp) /* announce ourself */ - movl $VM_GUEST_XENPVH, RELOC(vm_guest) + /* movl $VM_GUEST_XENPVH, RELOC(vm_guest) */ + movl $VM_GUEST_GENPVH, RELOC(vm_guest) jmp .Lbiosbasemem_finished END(start_xen32) .code64 diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 375d7a41e7fe4..d932e04c46e28 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1710,7 +1710,7 @@ init_x86_64(paddr_t first_avail) #endif #ifdef XEN - if (vm_guest == VM_GUEST_XENPVH) + if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH) xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); #endif init_pte(); diff --git a/sys/arch/x86/acpi/acpi_machdep.c b/sys/arch/x86/acpi/acpi_machdep.c index 9dbfe49ca5f01..b75767fe615e0 100644 --- a/sys/arch/x86/acpi/acpi_machdep.c +++ b/sys/arch/x86/acpi/acpi_machdep.c @@ -158,7 +158,7 @@ acpi_md_OsGetRootPointer(void) } #else #ifdef XEN - if (vm_guest == VM_GUEST_XENPVH) { + if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH) { PhysicalAddress = hvm_start_info->rsdp_paddr; if (PhysicalAddress) return PhysicalAddress; diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h index 0dae029202a56..c82da8311574a 100644 --- a/sys/arch/x86/include/cpu.h +++ b/sys/arch/x86/include/cpu.h @@ -516,6 +516,7 @@ typedef enum vm_guest { VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_VIRTUALBOX, + VM_GUEST_GENPVH, VM_LAST } vm_guest_t; extern vm_guest_t vm_guest; diff --git a/sys/arch/x86/x86/consinit.c b/sys/arch/x86/x86/consinit.c index 7a65f1bfa096a..147c1788ffe8d 100644 --- a/sys/arch/x86/x86/consinit.c +++ b/sys/arch/x86/x86/consinit.c @@ -171,8 +171,7 @@ consinit(void) #if (NCOM > 0) int rv; #endif - -#ifdef XENPVHVM +#if defined(XENPVHVM) && !defined(GENPVH) if (vm_guest == VM_GUEST_XENPVH) { if (xen_pvh_consinit() != 0) return; diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index ec2fa0620b751..65ce7afe7c589 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -1044,6 +1044,7 @@ static const struct vm_name_guest vm_bios_vendors[] = { { "BHYVE", VM_GUEST_VM }, /* bhyve */ { "Seabios", VM_GUEST_VM }, /* KVM */ { "innotek GmbH", VM_GUEST_VIRTUALBOX }, /* Oracle VirtualBox */ + { "Generic PVH", VM_GUEST_GENPVH}, }; static const struct vm_name_guest vm_system_products[] = { @@ -1065,6 +1066,7 @@ identify_hypervisor(void) switch (vm_guest) { case VM_GUEST_XENPV: case VM_GUEST_XENPVH: + case VM_GUEST_GENPVH: /* guest type already known, no bios info */ return; default: diff --git a/sys/arch/x86/x86/pmap.c b/sys/arch/x86/x86/pmap.c index 9caddac9e79c6..1b911c79e331d 100644 --- a/sys/arch/x86/x86/pmap.c +++ b/sys/arch/x86/x86/pmap.c @@ -1383,7 +1383,7 @@ pmap_bootstrap(vaddr_t kva_start) pentium_idt_vaddr = pmap_bootstrap_valloc(1); #endif -#if defined(XENPVHVM) +#if defined(XENPVHVM) && !defined(GENPVH) /* XXX: move to hypervisor.c with appropriate API adjustments */ extern paddr_t HYPERVISOR_shared_info_pa; extern volatile struct xencons_interface *xencons_interface; /* XXX */ diff --git a/sys/arch/x86/x86/x86_autoconf.c b/sys/arch/x86/x86/x86_autoconf.c index fd8688fe9ccfb..ba4b698d0676a 100644 --- a/sys/arch/x86/x86/x86_autoconf.c +++ b/sys/arch/x86/x86/x86_autoconf.c @@ -540,7 +540,7 @@ void cpu_bootconf(void) { #ifdef XEN - if (vm_guest == VM_GUEST_XENPVH) { + if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH) { xen_bootconf(); return; } diff --git a/sys/arch/x86/x86/x86_machdep.c b/sys/arch/x86/x86/x86_machdep.c index fabd76b115522..8a2364b6eccc7 100644 --- a/sys/arch/x86/x86/x86_machdep.c +++ b/sys/arch/x86/x86/x86_machdep.c @@ -911,7 +911,7 @@ init_x86_clusters(void) * the boot program). */ #ifdef XEN - if (vm_guest == VM_GUEST_XENPVH) { + if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH) { x86_add_xen_clusters(); } #endif /* XEN */ diff --git a/sys/arch/xen/x86/pvh_consinit.c b/sys/arch/xen/x86/pvh_consinit.c index 17a5ca2bf07dd..0562a20ce2231 100644 --- a/sys/arch/xen/x86/pvh_consinit.c +++ b/sys/arch/xen/x86/pvh_consinit.c @@ -46,6 +46,9 @@ __KERNEL_RCSID(0, "$NetBSD: pvh_consinit.c,v 1.6 2023/10/17 13:27:58 bouyer Exp int xen_pvh_consinit(void) { +#ifdef GENPVH + return 0; +#else /* * hugly hack because we're called multiple times at different * boot stage. @@ -110,4 +113,5 @@ xen_pvh_consinit(void) xenconscn_attach(); #endif return 1; +#endif /* GENPVH */ } diff --git a/sys/arch/xen/x86/xen_mainbus.c b/sys/arch/xen/x86/xen_mainbus.c index 97c030f21f9dc..0dabb28a9cab4 100644 --- a/sys/arch/xen/x86/xen_mainbus.c +++ b/sys/arch/xen/x86/xen_mainbus.c @@ -100,7 +100,9 @@ extern bool acpi_present; extern bool mpacpi_active; void xen_mainbus_attach(device_t, device_t, void *); +#ifndef GENPVH static int xen_mainbus_print(void *, const char *); +#endif union xen_mainbus_attach_args { const char *mba_busname; /* first elem of all */ @@ -119,6 +121,7 @@ union xen_mainbus_attach_args { void xen_mainbus_attach(device_t parent, device_t self, void *aux) { +#ifndef GENPVH union xen_mainbus_attach_args mba; switch(vm_guest) { @@ -148,8 +151,9 @@ xen_mainbus_attach(device_t parent, device_t self, void *aux) aprint_error_dev(self, "couldn't establish power handler\n"); } +#endif } - +#ifndef GENPVH static int xen_mainbus_print(void *aux, const char *pnp) { @@ -159,3 +163,4 @@ xen_mainbus_print(void *aux, const char *pnp) aprint_normal("%s at %s", mba->mba_busname, pnp); return UNCONF; } +#endif diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index 47b7dc3ec0a54..0d3e2b815150e 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -127,7 +127,9 @@ void hypervisor_attach(device_t, device_t, void *); CFATTACH_DECL_NEW(hypervisor, 0, hypervisor_match, hypervisor_attach, NULL, NULL); +#ifndef GENPVH static int hypervisor_print(void *, const char *); +#endif union hypervisor_attach_cookie { const char *hac_device; /* first elem of all */ @@ -166,7 +168,7 @@ struct x86_isa_chipset x86_isa_chipset; #endif #endif -#if defined(XENPVHVM) || defined(XENPVH) +#if defined(XENPVHVM) || defined(XENPVH) || defined(GENPVH) #include #include #include @@ -194,9 +196,11 @@ static int xen_hvm_vec = 0; int xen_version; +#ifndef GENPVH /* power management, for save/restore */ static bool hypervisor_suspend(device_t, const pmf_qual_t *); static bool hypervisor_resume(device_t, const pmf_qual_t *); +#endif /* from FreeBSD */ #define XEN_MAGIC_IOPORT 0x10 @@ -241,27 +245,31 @@ void init_xen_early(void) { const char *cmd_line; - if (vm_guest != VM_GUEST_XENPVH) + if (vm_guest != VM_GUEST_XENPVH && vm_guest != VM_GUEST_GENPVH) return; - xen_init_hypercall_page(); + hvm_start_info = (void *)((uintptr_t)hvm_start_paddr + KERNBASE); - HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE); - struct xen_add_to_physmap xmap = { - .domid = DOMID_SELF, - .space = XENMAPSPACE_shared_info, - .idx = 0, /* Important - XEN checks for this */ - .gpfn = atop(HYPERVISOR_shared_info_pa) - }; + if (vm_guest == VM_GUEST_XENPVH) { + xen_init_hypercall_page(); + + HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE); + struct xen_add_to_physmap xmap = { + .domid = DOMID_SELF, + .space = XENMAPSPACE_shared_info, + .idx = 0, /* Important - XEN checks for this */ + .gpfn = atop(HYPERVISOR_shared_info_pa) + }; - int err; + int err; - if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap)) < 0) { - printk( - "Xen HVM: Unable to register HYPERVISOR_shared_info %d\n", err); + if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap)) < 0) { + printk( + "Xen HVM: Unable to register HYPERVISOR_shared_info %d\n", err); + } + delay_func = x86_delay = xen_delay; + x86_initclock_func = xen_initclocks; } - delay_func = x86_delay = xen_delay; - x86_initclock_func = xen_initclocks; if (hvm_start_info->cmdline_paddr != 0) { cmd_line = (void *)((uintptr_t)hvm_start_info->cmdline_paddr + KERNBASE); @@ -296,6 +304,7 @@ xen_check_hypervisordev(void) static int xen_hvm_init_late(void) { +#ifndef GENPVH struct idt_vec *iv = &(cpu_info_primary.ci_idtvec); if (HYPERVISOR_xen_version(XENVER_version, NULL) < 0) { @@ -357,6 +366,7 @@ xen_hvm_init_late(void) idt_vec_set(iv, xen_hvm_vec, &IDTVEC(hypervisor_pvhvm_callback)); events_default_setup(); +#endif /* GENPVH */ return 1; } @@ -409,10 +419,11 @@ xen_hvm_init(void) if (xen_hvm_init_late() == 0) return 0; +#ifndef GENPVH struct xen_hvm_param xen_hvm_param; xen_hvm_param.domid = DOMID_SELF; xen_hvm_param.index = HVM_PARAM_CONSOLE_PFN; - + if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) { aprint_debug( "Xen HVM: Unable to obtain xencons page address\n"); @@ -438,6 +449,7 @@ xen_hvm_init(void) xen_start_info.console.domU.evtchn = xen_hvm_param.value; } +#endif /* * PR port-amd64/55543 @@ -445,11 +457,12 @@ xen_hvm_init(void) * fully functional here. This version also doesn't support * HVM_PARAM_CONSOLE_PFN. */ +#ifndef GENPVH if (xencons_interface != 0) { delay_func = x86_delay = xen_delay; x86_initclock_func = xen_initclocks; } - +#endif vm_guest = VM_GUEST_XENPVHVM; /* Be more specific */ return 1; } @@ -566,7 +579,7 @@ hypervisor_vcpu_print(void *aux, const char *parent) void hypervisor_attach(device_t parent, device_t self, void *aux) { - +#ifndef GENPVH #if NPCI >0 #ifdef PCI_BUS_FIXUP int pci_maxbus = 0; @@ -772,9 +785,9 @@ hypervisor_attach(device_t parent, device_t self, void *aux) if (!pmf_device_register(self, hypervisor_suspend, hypervisor_resume)) aprint_error_dev(self, "couldn't establish power handler\n"); - +#endif /* GENPVH */ } - +#ifndef GENPVH static bool hypervisor_suspend(device_t dev, const pmf_qual_t *qual) { @@ -806,6 +819,7 @@ hypervisor_print(void *aux, const char *parent) aprint_normal("%s at %s", hac->hac_device, parent); return (UNCONF); } +#endif /* GENPVH */ #define DIR_MODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) diff --git a/sys/arch/xen/xen/shutdown_xenbus.c b/sys/arch/xen/xen/shutdown_xenbus.c index 264a72a0d9a94..217182f527292 100644 --- a/sys/arch/xen/xen/shutdown_xenbus.c +++ b/sys/arch/xen/xen/shutdown_xenbus.c @@ -68,6 +68,7 @@ __KERNEL_RCSID(0, "$NetBSD: shutdown_xenbus.c,v 1.9 2020/05/13 22:13:49 jdolecek #define SHUTDOWN_PATH "control" #define SHUTDOWN_NAME "shutdown" +#ifndef GENPVH static struct sysmon_pswitch xenbus_power = { .smpsw_type = PSWITCH_TYPE_POWER, .smpsw_name = "xenbus", @@ -85,7 +86,6 @@ static void xenbus_shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { - struct xenbus_transaction *xbt; int error; char reqstr[32]; @@ -140,10 +140,12 @@ static struct xenbus_watch xenbus_shutdown_watch = { .node = __UNCONST(SHUTDOWN_PATH "/" SHUTDOWN_NAME), /* XXX */ .xbw_callback = xenbus_shutdown_handler, }; +#endif /* GENPVH */ void shutdown_xenbus_setup(void) { +#ifndef GENPVH xen_suspend_allow = false; if (sysmon_pswitch_register(&xenbus_power) != 0 || @@ -155,4 +157,5 @@ shutdown_xenbus_setup(void) if (register_xenbus_watch(&xenbus_shutdown_watch)) { aprint_error("%s: unable to watch control/shutdown\n", __func__); } +#endif } From 9c2f2baa02b5e7f759fd022082897d60729e94f7 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 9 Dec 2023 20:49:51 +0100 Subject: [PATCH 002/114] fix: beginning asm cleanup using genassym.cf instead of plain values --- sys/arch/amd64/amd64/genassym.cf | 5 +++++ sys/arch/amd64/amd64/locore.S | 33 +++++++++++++------------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index 3512148a6acbe..12b609bcc78cc 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -382,6 +382,11 @@ define SIR_XENIPL_VM SIR_XENIPL_VM define SIR_XENIPL_SCHED SIR_XENIPL_SCHED define SIR_XENIPL_HIGH SIR_XENIPL_HIGH define EVTCHN_UPCALL_MASK offsetof(struct vcpu_info, evtchn_upcall_mask) +define MMAP_PADDR offsetof(struct hvm_start_info, memmap_paddr) +define MMAP_ENTRIES offsetof(struct hvm_start_info, memmap_entries) +define CMDLINE_PADDR offsetof(struct hvm_start_info, cmdline_paddr) +define MMAP_ENT_SZ sizeof(struct hvm_memmap_table_entry) +define MMAP_SI_SZ sizeof(struct hvm_start_info) ifdef XENPV define XEN_PT_BASE offsetof(struct start_info, pt_base) define XEN_NR_PT_FRAMES offsetof(struct start_info, nr_pt_frames) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index fef2e34ddcc79..ea3af08a233e5 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1058,7 +1058,7 @@ ENTRY(start_xen32) stosb /* - * https://github.com/Xilinx/xen/blob/master/xen/include/public/arch-x86/hvm/start_info.h + * sys/external/mit/xen-include-public/dist/xen/include/public/arch-x86/hvm/start_info.h */ /* copy start_info from its current location to end of kernel */ @@ -1069,39 +1069,32 @@ ENTRY(start_xen32) movl $RELOC(_end), %ebx /* now %ebx holds what's expected */ - /* map memmap entries at the end of start_info */ - movl %ebx, %edx /* after kernel */ - addl $40, %edx /* memmap physical address */ - movl (%edx), %esi /* address of memmap entries */ - addl $8, %edx /* memmap_entries */ - movl (%edx), %ecx /* counter */ - movl $23, %eax /* struct hvm_memmap_table_entry is 24 bytes long */ - movl %ecx, %edx -mul_loop: /* 24 bytes * memmap_entries */ - addl %edx, %ecx - subl $1, %eax - jnz mul_loop + /* copy memmap entries at the end of start_info */ + imul $MMAP_ENT_SZ, (MMAP_ENTRIES)(%ebx), %ecx /* sizeof hvm_memmap_table_entry * entries */ pushl %ecx /* save count as offset */ - movl %ebx, %edi /* to the beginning */ - addl $64, %edi /* destination end of struct */ + movl %ebx, %edx + addl $MMAP_PADDR, %edx /* address of mmap entries */ + movl (%edx), %esi + movl %ebx, %edi /* to the end of start_info */ + addl $MMAP_SI_SZ, %edi /* ebx + sizeof(start_info) */ - rep movsb + rep movsb /* esi (mmap entries addr) to end of start_info */ /* point hvm_memmap_table_entry to the end of ebx */ movl %ebx, %edx - addl $40, %edx /* memmap_paddr */ + addl $MMAP_PADDR, %edx movl %ebx, %eax - addl $64, %eax /* aligned end of hvm_start_info */ + addl $MMAP_SI_SZ, %eax /* aligned end of hvm_start_info */ movl %eax, (%edx) /* update memmap_paddr */ /* copy command line */ movl %ebx, %edx - addl $24, %edx /* cmdline address */ + addl $CMDLINE_PADDR, %edx /* cmdline address */ movl (%edx), %esi movl %ebx, %edi - addl $64, %edi /* end of hvm_start_info */ + addl $MMAP_SI_SZ, %edi /* end of hvm_start_info */ popl %ecx addl %ecx, %edi /* add size of memmap table */ pushl %edi /* save new cmdline position */ From de1ee90ebaf9950d91d36be073384581a1b9ec86 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 10:38:39 +0100 Subject: [PATCH 003/114] fix: assembly cleanup --- sys/arch/amd64/amd64/locore.S | 74 +++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index ea3af08a233e5..1bb8d5ef7f947 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1034,33 +1034,11 @@ END(start) # if !defined(XENPV) /* entry point for Xen PVH */ .code32 -ENTRY(start_xen32) - /* Xen doesn't start us with a valid gdt */ - movl $RELOC(gdtdesc32), %eax - lgdt (%eax) - jmp $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs) - -.Lreload_cs: - movw $GSEL(GDATA_SEL, SEL_KPL), %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - - /* we need a valid stack */ - movl $RELOC(tmpstk),%esp - - /* clear BSS */ - xorl %eax,%eax - movl $RELOC(__bss_start),%edi - movl $RELOC(_end),%ecx - subl %edi,%ecx - rep - stosb - +#ifdef GENPVH +ENTRY(copy_start_info) /* * sys/external/mit/xen-include-public/dist/xen/include/public/arch-x86/hvm/start_info.h */ - /* copy start_info from its current location to end of kernel */ movl $RELOC(_end), %edi movl $PAGE_SIZE, %ecx @@ -1095,20 +1073,44 @@ ENTRY(start_xen32) movl (%edx), %esi movl %ebx, %edi addl $MMAP_SI_SZ, %edi /* end of hvm_start_info */ - popl %ecx + popl %ecx /* previously saved end of data offset */ addl %ecx, %edi /* add size of memmap table */ pushl %edi /* save new cmdline position */ -cmdcpy: - movb (%esi), %al - movb %al, (%edi) - addl $1, %esi - addl $1, %edi - cmpl $0, (%esi) /* NULL terminated */ - jne cmdcpy - movb $0, (%edi) + + repnz movsb popl (%edx) /* point to new location */ + ret +END(copy_start_info) +#endif /* GENPVH */ + +ENTRY(start_xen32) + /* Xen doesn't start us with a valid gdt */ + movl $RELOC(gdtdesc32), %eax + lgdt (%eax) + jmp $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs) + +.Lreload_cs: + movw $GSEL(GDATA_SEL, SEL_KPL), %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + /* we need a valid stack */ + movl $RELOC(tmpstk),%esp + + /* clear BSS */ + xorl %eax,%eax + movl $RELOC(__bss_start),%edi + movl $RELOC(_end),%ecx + subl %edi,%ecx + rep + stosb + +#ifdef GENPVH + call copy_start_info +#endif /* * save addr of the hvm_start_info structure. This is also the end * of the symbol table @@ -1139,8 +1141,11 @@ cmdcpy: movl $KERNBASE_HI,4(%ebp) /* announce ourself */ - /* movl $VM_GUEST_XENPVH, RELOC(vm_guest) */ +#ifdef GENPVH movl $VM_GUEST_GENPVH, RELOC(vm_guest) +#else + movl $VM_GUEST_XENPVH, RELOC(vm_guest) +#endif jmp .Lbiosbasemem_finished END(start_xen32) .code64 @@ -1154,6 +1159,7 @@ ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */ retq .align HYPERCALL_PAGE_OFFSET, 0x90 END(hypercall_page) + #endif /* XEN */ /* From f5d2a1bbb94546575e4d2f6efca548f44b3fc7c5 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 11:41:34 +0100 Subject: [PATCH 004/114] fix: ifdef'ed pretty much all Xen related code --- sys/arch/amd64/amd64/locore.S | 14 +++++- sys/arch/x86/x86/cpu.c | 6 +-- sys/arch/x86/x86/mainbus.c | 4 +- sys/arch/xen/include/hypervisor.h | 2 + sys/arch/xen/xen/hypervisor.c | 81 +++++++++++++++--------------- sys/arch/xen/xen/shutdown_xenbus.c | 6 +-- 6 files changed, 62 insertions(+), 51 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 1bb8d5ef7f947..83de717a0a521 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1047,8 +1047,11 @@ ENTRY(copy_start_info) movl $RELOC(_end), %ebx /* now %ebx holds what's expected */ - /* copy memmap entries at the end of start_info */ - imul $MMAP_ENT_SZ, (MMAP_ENTRIES)(%ebx), %ecx /* sizeof hvm_memmap_table_entry * entries */ + /* + * copy memmap entries at the end of start_info + * sizeof hvm_memmap_table_entry * entries + */ + imul $MMAP_ENT_SZ, (MMAP_ENTRIES)(%ebx), %ecx pushl %ecx /* save count as offset */ @@ -1109,6 +1112,13 @@ ENTRY(start_xen32) stosb #ifdef GENPVH + /* + * Xen PVH expects start_info to be located at esym, after __kernel_end. + * Neither qemu nor firecracker do this, as the ABI doesn't expect this + * behavior https://xenbits.xen.org/docs/unstable/misc/pvh.html + * In order not to revamp all kernel memory layout, simply copy + * start_info where it is expected. + */ call copy_start_info #endif /* diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 3d2feee61ec5c..721a2c1fe1c6c 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -483,7 +483,7 @@ cpu_attach(device_t parent, device_t self, void *aux) cpu_identify(ci); x86_errata(); x86_cpu_idle_init(); -#ifdef XENPVHVM +#if defined(XENPVHVM) && !defined(GENPVH) xen_hvm_init_cpu(ci); #endif break; @@ -493,7 +493,7 @@ cpu_attach(device_t parent, device_t self, void *aux) cpu_identify(ci); x86_errata(); x86_cpu_idle_init(); -#ifdef XENPVHVM +#if defined(XENPVHVM) && !defined(GENPVH) xen_hvm_init_cpu(ci); #endif break; @@ -1034,7 +1034,7 @@ cpu_hatch(void *v) * above. */ cpu_init(ci); -#ifdef XENPVHVM +#if defined(XENPVHVM) && !defined(GENPVH) xen_hvm_init_cpu(ci); #endif (*x86_initclock_func)(); diff --git a/sys/arch/x86/x86/mainbus.c b/sys/arch/x86/x86/mainbus.c index a74fbbe31b931..ac2c56758af9e 100644 --- a/sys/arch/x86/x86/mainbus.c +++ b/sys/arch/x86/x86/mainbus.c @@ -218,7 +218,7 @@ mainbus_attach(device_t parent, device_t self, void *aux) aprint_naive("\n"); aprint_normal("\n"); -#if defined(XENPVHVM) +#if defined(XENPVHVM) && !defined(GENPVH) xen_hvm_init(); /* before attaching CPUs */ #endif @@ -230,7 +230,7 @@ mainbus_attach(device_t parent, device_t self, void *aux) #if defined(XENPV) } #endif /* XENPV */ -#if defined(XEN) +#if defined(XEN) && !defined(GENPVH) /* * before isa/pci probe, so that PV devices are not probed again * as emulated diff --git a/sys/arch/xen/include/hypervisor.h b/sys/arch/xen/include/hypervisor.h index fdc66e18e4bb3..012641c58d65d 100644 --- a/sys/arch/xen/include/hypervisor.h +++ b/sys/arch/xen/include/hypervisor.h @@ -62,9 +62,11 @@ struct cpu_info; +#ifndef GENPVH int xen_hvm_init(void); int xen_hvm_init_cpu(struct cpu_info *); void xen_mainbus_attach(device_t, device_t, void *); +#endif struct hypervisor_attach_args { const char *haa_busname; diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index 0d3e2b815150e..e57f63ffb0cc9 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -190,9 +190,10 @@ volatile shared_info_t *HYPERVISOR_shared_info __read_mostly; paddr_t HYPERVISOR_shared_info_pa; union start_info_union start_info_union __aligned(PAGE_SIZE); struct hvm_start_info *hvm_start_info; - +#ifndef GENPVH static int xen_hvm_vec = 0; #endif +#endif int xen_version; @@ -216,6 +217,7 @@ enum { bool xenhvm_use_percpu_callback = 0; +#ifndef GENPVH static void xen_init_hypercall_page(void) { @@ -237,6 +239,7 @@ xen_init_hypercall_page(void) /* XXX: vtophys(&hypercall_page) */ wrmsr(descs[1], (uintptr_t)&hypercall_page - KERNBASE); } +#endif uint32_t hvm_start_paddr; @@ -250,26 +253,6 @@ init_xen_early(void) hvm_start_info = (void *)((uintptr_t)hvm_start_paddr + KERNBASE); - if (vm_guest == VM_GUEST_XENPVH) { - xen_init_hypercall_page(); - - HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE); - struct xen_add_to_physmap xmap = { - .domid = DOMID_SELF, - .space = XENMAPSPACE_shared_info, - .idx = 0, /* Important - XEN checks for this */ - .gpfn = atop(HYPERVISOR_shared_info_pa) - }; - - int err; - - if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap)) < 0) { - printk( - "Xen HVM: Unable to register HYPERVISOR_shared_info %d\n", err); - } - delay_func = x86_delay = xen_delay; - x86_initclock_func = xen_initclocks; - } if (hvm_start_info->cmdline_paddr != 0) { cmd_line = (void *)((uintptr_t)hvm_start_info->cmdline_paddr + KERNBASE); @@ -279,9 +262,33 @@ init_xen_early(void) xen_start_info.cmd_line[0] = '\0'; } xen_start_info.flags = hvm_start_info->flags; -} + if (vm_guest == VM_GUEST_GENPVH) + return; + +#ifndef GENPVH + xen_init_hypercall_page(); +#endif + + HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE); + struct xen_add_to_physmap xmap = { + .domid = DOMID_SELF, + .space = XENMAPSPACE_shared_info, + .idx = 0, /* Important - XEN checks for this */ + .gpfn = atop(HYPERVISOR_shared_info_pa) + }; + + int err; + + if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap)) < 0) { + printk( + "Xen HVM: Unable to register HYPERVISOR_shared_info %d\n", err); + } + delay_func = x86_delay = xen_delay; + x86_initclock_func = xen_initclocks; +} +#ifndef GENPVH static bool xen_check_hypervisordev(void) { @@ -304,7 +311,6 @@ xen_check_hypervisordev(void) static int xen_hvm_init_late(void) { -#ifndef GENPVH struct idt_vec *iv = &(cpu_info_primary.ci_idtvec); if (HYPERVISOR_xen_version(XENVER_version, NULL) < 0) { @@ -366,7 +372,6 @@ xen_hvm_init_late(void) idt_vec_set(iv, xen_hvm_vec, &IDTVEC(hypervisor_pvhvm_callback)); events_default_setup(); -#endif /* GENPVH */ return 1; } @@ -419,7 +424,6 @@ xen_hvm_init(void) if (xen_hvm_init_late() == 0) return 0; -#ifndef GENPVH struct xen_hvm_param xen_hvm_param; xen_hvm_param.domid = DOMID_SELF; xen_hvm_param.index = HVM_PARAM_CONSOLE_PFN; @@ -449,7 +453,6 @@ xen_hvm_init(void) xen_start_info.console.domU.evtchn = xen_hvm_param.value; } -#endif /* * PR port-amd64/55543 @@ -457,12 +460,10 @@ xen_hvm_init(void) * fully functional here. This version also doesn't support * HVM_PARAM_CONSOLE_PFN. */ -#ifndef GENPVH if (xencons_interface != 0) { delay_func = x86_delay = xen_delay; x86_initclock_func = xen_initclocks; } -#endif vm_guest = VM_GUEST_XENPVHVM; /* Be more specific */ return 1; } @@ -540,7 +541,7 @@ xen_hvm_init_cpu(struct cpu_info *ci) again = 1; return 1; } - +#endif /* GENPVH */ #endif /* XENPVHVM */ /* @@ -564,22 +565,13 @@ hypervisor_match(device_t parent, cfdata_t match, void *aux) return 1; } -#if defined(MULTIPROCESSOR) && defined(XENPV) -static int -hypervisor_vcpu_print(void *aux, const char *parent) -{ - /* Unconfigured cpus are ignored quietly. */ - return (QUIET); -} -#endif /* MULTIPROCESSOR && XENPV */ - /* * Attach the hypervisor. */ void hypervisor_attach(device_t parent, device_t self, void *aux) { -#ifndef GENPVH +#ifndef GENPVH /* we don't need Xen hypervisor in generic PVH mode */ #if NPCI >0 #ifdef PCI_BUS_FIXUP int pci_maxbus = 0; @@ -788,6 +780,15 @@ hypervisor_attach(device_t parent, device_t self, void *aux) #endif /* GENPVH */ } #ifndef GENPVH +#if defined(MULTIPROCESSOR) && defined(XENPV) +static int +hypervisor_vcpu_print(void *aux, const char *parent) +{ + /* Unconfigured cpus are ignored quietly. */ + return (QUIET); +} +#endif /* MULTIPROCESSOR && XENPV */ + static bool hypervisor_suspend(device_t dev, const pmf_qual_t *qual) { @@ -819,7 +820,6 @@ hypervisor_print(void *aux, const char *parent) aprint_normal("%s at %s", hac->hac_device, parent); return (UNCONF); } -#endif /* GENPVH */ #define DIR_MODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) @@ -883,3 +883,4 @@ xen_map_vcpu(struct cpu_info *ci) ci->ci_vcpuid, ret); } } +#endif /* GENPVH */ diff --git a/sys/arch/xen/xen/shutdown_xenbus.c b/sys/arch/xen/xen/shutdown_xenbus.c index 217182f527292..23889c11bf061 100644 --- a/sys/arch/xen/xen/shutdown_xenbus.c +++ b/sys/arch/xen/xen/shutdown_xenbus.c @@ -58,6 +58,7 @@ #include __KERNEL_RCSID(0, "$NetBSD: shutdown_xenbus.c,v 1.9 2020/05/13 22:13:49 jdolecek Exp $"); +#ifndef GENPVH #include #include @@ -68,7 +69,6 @@ __KERNEL_RCSID(0, "$NetBSD: shutdown_xenbus.c,v 1.9 2020/05/13 22:13:49 jdolecek #define SHUTDOWN_PATH "control" #define SHUTDOWN_NAME "shutdown" -#ifndef GENPVH static struct sysmon_pswitch xenbus_power = { .smpsw_type = PSWITCH_TYPE_POWER, .smpsw_name = "xenbus", @@ -140,12 +140,10 @@ static struct xenbus_watch xenbus_shutdown_watch = { .node = __UNCONST(SHUTDOWN_PATH "/" SHUTDOWN_NAME), /* XXX */ .xbw_callback = xenbus_shutdown_handler, }; -#endif /* GENPVH */ void shutdown_xenbus_setup(void) { -#ifndef GENPVH xen_suspend_allow = false; if (sysmon_pswitch_register(&xenbus_power) != 0 || @@ -157,5 +155,5 @@ shutdown_xenbus_setup(void) if (register_xenbus_watch(&xenbus_shutdown_watch)) { aprint_error("%s: unable to watch control/shutdown\n", __func__); } -#endif } +#endif /* GENPVH */ From 1b4c8bce7235c3bb61f88f36c04a503fc6b940e1 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 11:45:50 +0100 Subject: [PATCH 005/114] fix: ditched pvh_consinit and xen_mainbus --- sys/arch/xen/x86/pvh_consinit.c | 6 ++---- sys/arch/xen/x86/xen_mainbus.c | 6 +----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sys/arch/xen/x86/pvh_consinit.c b/sys/arch/xen/x86/pvh_consinit.c index 0562a20ce2231..95463179f8a85 100644 --- a/sys/arch/xen/x86/pvh_consinit.c +++ b/sys/arch/xen/x86/pvh_consinit.c @@ -43,12 +43,10 @@ __KERNEL_RCSID(0, "$NetBSD: pvh_consinit.c,v 1.6 2023/10/17 13:27:58 bouyer Exp #include "xen_def_cons.h" +#ifndef GENPVH int xen_pvh_consinit(void) { -#ifdef GENPVH - return 0; -#else /* * hugly hack because we're called multiple times at different * boot stage. @@ -113,5 +111,5 @@ xen_pvh_consinit(void) xenconscn_attach(); #endif return 1; -#endif /* GENPVH */ } +#endif /* GENPVH */ diff --git a/sys/arch/xen/x86/xen_mainbus.c b/sys/arch/xen/x86/xen_mainbus.c index 0dabb28a9cab4..bf98b8df9c0af 100644 --- a/sys/arch/xen/x86/xen_mainbus.c +++ b/sys/arch/xen/x86/xen_mainbus.c @@ -35,6 +35,7 @@ #include __KERNEL_RCSID(0, "$NetBSD: xen_mainbus.c,v 1.10 2021/08/07 16:19:08 thorpej Exp $"); +#ifndef GENPVH #include #include #include @@ -100,9 +101,7 @@ extern bool acpi_present; extern bool mpacpi_active; void xen_mainbus_attach(device_t, device_t, void *); -#ifndef GENPVH static int xen_mainbus_print(void *, const char *); -#endif union xen_mainbus_attach_args { const char *mba_busname; /* first elem of all */ @@ -121,7 +120,6 @@ union xen_mainbus_attach_args { void xen_mainbus_attach(device_t parent, device_t self, void *aux) { -#ifndef GENPVH union xen_mainbus_attach_args mba; switch(vm_guest) { @@ -151,9 +149,7 @@ xen_mainbus_attach(device_t parent, device_t self, void *aux) aprint_error_dev(self, "couldn't establish power handler\n"); } -#endif } -#ifndef GENPVH static int xen_mainbus_print(void *aux, const char *pnp) { From 3ba2088cf67debd010c8a607e633098a0cbce64d Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 18:56:28 +0100 Subject: [PATCH 006/114] fix: no more dependency on hypervisor --- sys/arch/amd64/amd64/locore.S | 2 ++ sys/arch/xen/conf/files.xen | 4 +++- sys/arch/xen/xen/hypervisor.c | 16 ++++++---------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 83de717a0a521..ccdec75922e90 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1134,6 +1134,7 @@ ENTRY(start_xen32) movl %eax,(%ebp) /* esym points to %ebx assuming it's the end of esym but it's not */ movl $KERNBASE_HI,4(%ebp) /* ffffffff80000000 + ebx, make it 64 bits */ +#ifndef GENPVH /* get a page for HYPERVISOR_shared_info */ addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx @@ -1141,6 +1142,7 @@ ENTRY(start_xen32) movl $RELOC(HYPERVISOR_shared_info_pa),%ebp movl %ebx,(%ebp) movl $0,4(%ebp) +#endif /* XXX assume hvm_start_info+dependant structure fits in a single page */ addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen index 3c30954627b54..6f8b5dfe5ff8b 100644 --- a/sys/arch/xen/conf/files.xen +++ b/sys/arch/xen/conf/files.xen @@ -13,6 +13,8 @@ file arch/xen/xen/xen_clock.c xen file arch/xen/x86/xen_bus_dma.c xen file arch/xen/xen/genfb_xen.c xen & genfb +file arch/xen/xen/hypervisor.c xen needs-flag + file arch/xen/x86/pvh_consinit.c xenpvhvm define hypervisorbus {} @@ -21,7 +23,7 @@ define xendevbus {} # Xen hypervisor device hypervisor { [apid = -1]}: isabus, pcibus, sysmon_power, xendevbus, acpibus attach hypervisor at hypervisorbus -file arch/xen/xen/hypervisor.c hypervisor needs-flag +#file arch/xen/xen/hypervisor.c hypervisor needs-flag file arch/xen/xen/shutdown_xenbus.c hypervisor # Xenbus diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index e57f63ffb0cc9..5754c16f5e3eb 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -121,15 +121,14 @@ __KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.96 2022/06/23 14:32:16 bouyer Exp $ #include #endif +#ifndef GENPVH int hypervisor_match(device_t, cfdata_t, void *); void hypervisor_attach(device_t, device_t, void *); CFATTACH_DECL_NEW(hypervisor, 0, hypervisor_match, hypervisor_attach, NULL, NULL); -#ifndef GENPVH static int hypervisor_print(void *, const char *); -#endif union hypervisor_attach_cookie { const char *hac_device; /* first elem of all */ @@ -156,6 +155,7 @@ union hypervisor_attach_cookie { #endif /* NPCI */ struct vcpu_attach_args hac_vcaa; }; +#endif /* * This is set when the ISA bus is attached. If it's not set by the @@ -190,18 +190,15 @@ volatile shared_info_t *HYPERVISOR_shared_info __read_mostly; paddr_t HYPERVISOR_shared_info_pa; union start_info_union start_info_union __aligned(PAGE_SIZE); struct hvm_start_info *hvm_start_info; -#ifndef GENPVH -static int xen_hvm_vec = 0; -#endif #endif int xen_version; #ifndef GENPVH +static int xen_hvm_vec = 0; /* power management, for save/restore */ static bool hypervisor_suspend(device_t, const pmf_qual_t *); static bool hypervisor_resume(device_t, const pmf_qual_t *); -#endif /* from FreeBSD */ #define XEN_MAGIC_IOPORT 0x10 @@ -211,6 +208,7 @@ enum { XMI_UNPLUG_NICS = 0x02, XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 }; +#endif #ifdef XENPVHVM @@ -268,7 +266,6 @@ init_xen_early(void) #ifndef GENPVH xen_init_hypercall_page(); -#endif HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE); struct xen_add_to_physmap xmap = { @@ -286,6 +283,7 @@ init_xen_early(void) } delay_func = x86_delay = xen_delay; x86_initclock_func = xen_initclocks; +#endif } #ifndef GENPVH @@ -544,6 +542,7 @@ xen_hvm_init_cpu(struct cpu_info *ci) #endif /* GENPVH */ #endif /* XENPVHVM */ +#ifndef GENPVH /* we don't need Xen hypervisor in generic PVH mode */ /* * Probe for the hypervisor; always succeeds. */ @@ -571,7 +570,6 @@ hypervisor_match(device_t parent, cfdata_t match, void *aux) void hypervisor_attach(device_t parent, device_t self, void *aux) { -#ifndef GENPVH /* we don't need Xen hypervisor in generic PVH mode */ #if NPCI >0 #ifdef PCI_BUS_FIXUP int pci_maxbus = 0; @@ -777,9 +775,7 @@ hypervisor_attach(device_t parent, device_t self, void *aux) if (!pmf_device_register(self, hypervisor_suspend, hypervisor_resume)) aprint_error_dev(self, "couldn't establish power handler\n"); -#endif /* GENPVH */ } -#ifndef GENPVH #if defined(MULTIPROCESSOR) && defined(XENPV) static int hypervisor_vcpu_print(void *aux, const char *parent) From 68ca718fc144224dcddab8f8eaa118f2d10260a0 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 20:12:31 +0100 Subject: [PATCH 007/114] fix: cleaner inclusion --- sys/arch/xen/conf/files.xen | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen index 6f8b5dfe5ff8b..0343aa5167a0a 100644 --- a/sys/arch/xen/conf/files.xen +++ b/sys/arch/xen/conf/files.xen @@ -13,8 +13,6 @@ file arch/xen/xen/xen_clock.c xen file arch/xen/x86/xen_bus_dma.c xen file arch/xen/xen/genfb_xen.c xen & genfb -file arch/xen/xen/hypervisor.c xen needs-flag - file arch/xen/x86/pvh_consinit.c xenpvhvm define hypervisorbus {} @@ -23,7 +21,7 @@ define xendevbus {} # Xen hypervisor device hypervisor { [apid = -1]}: isabus, pcibus, sysmon_power, xendevbus, acpibus attach hypervisor at hypervisorbus -#file arch/xen/xen/hypervisor.c hypervisor needs-flag +file arch/xen/xen/hypervisor.c xen | hypervisor needs-flag file arch/xen/xen/shutdown_xenbus.c hypervisor # Xenbus From ccbb86e9269a7eca960fbfeb0d9f1ee02dc5e059 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 20:17:05 +0100 Subject: [PATCH 008/114] first commit --- sys/arch/amd64/conf/MICROVM | 1226 +++++++++++++++++++++++++++++++++++ 1 file changed, 1226 insertions(+) create mode 100644 sys/arch/amd64/conf/MICROVM diff --git a/sys/arch/amd64/conf/MICROVM b/sys/arch/amd64/conf/MICROVM new file mode 100644 index 0000000000000..2fc54355b3d2f --- /dev/null +++ b/sys/arch/amd64/conf/MICROVM @@ -0,0 +1,1226 @@ +# MICROVM + +machine amd64 x86 xen +include "conf/std" # MI standard options +include "arch/xen/conf/std.xenversion" + +options CPU_IN_CKSUM +options EXEC_ELF64 # exec ELF binaries +options EXEC_SCRIPT # exec #! scripts +options MTRR +options MULTIPROCESSOR + +options CHILD_MAX=1024 # 160 is too few +options OPEN_MAX=1024 # 128 is too few + +#options SELFRELOC + +options KGDB # remote debugger +options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600 +makeoptions DEBUG="-g" # compile full symbol table + +options CONSDEVNAME="\"com\"" +#options CONS_OVERRIDE + +mainbus0 at root +cpu* at mainbus? +ioapic* at mainbus? apid ? + +options INCLUDE_CONFIG_FILE # embed config file in kernel binary +maxusers 8 # estimated number of users + +options INSECURE # disable kernel security levels - X needs this + +options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT +#options NTP # NTP phase/frequency locked loop + +#options KTRACE # system call tracing via ktrace(1) + +#options CPU_UCODE # cpu ucode loading support + +# Note: SysV IPC parameters could be changed dynamically, see sysctl(8). +#options SYSVMSG # System V-like message queues +#options SYSVSEM # System V-like semaphores +#options SYSVSHM # System V-like memory sharing + +#options MODULAR # new style module(7) framework +#options MODULAR_DEFAULT_AUTOLOAD +#options USERCONF # userconf(4) support +options PIPE_SOCKETPAIR # smaller, but slower pipe(2) +#options SYSCTL_INCLUDE_DESCR # Include sysctl descriptions in kernel + +# CPU-related options +#options USER_LDT # User-settable LDT, used by Wine +#options SVS # Separate Virtual Space +#options PCPU_IDT # Per CPU IDTs + +# GCC Spectre variant 2 mitigation +#makeoptions SPECTRE_V2_GCC_MITIGATION=1 +#options SPECTRE_V2_GCC_MITIGATION + +# CPU features +#acpicpu* at cpu? # ACPI CPU (including frequency scaling) +#coretemp* at cpu? # Intel on-die thermal sensor +#est0 at cpu0 # Intel Enhanced SpeedStep (non-ACPI) +#hyperv0 at cpu0 # Microsoft Hyper-V +#odcm0 at cpu0 # On-demand clock modulation +#powernow0 at cpu0 # AMD PowerNow! and Cool'n'Quiet (non-ACPI) +#vmt0 at cpu0 # VMware Tools + +#Xen PV support for PVH and HVM guests +options XENPVHVM +options XEN +# Generic PVH support (qemu, firecracker...) +options GENPVH +#hypervisor* at mainbus? # Xen hypervisor +#xenbus* at hypervisor? # Xen virtual bus +#xencons* at hypervisor? # Xen virtual console +#xennet* at xenbus? # Xen virtual network interface +#xbd* at xenbus? # Xen virtual block device +# experimental: PVH dom0 support +#options DOM0OPS +#pseudo-device xenevt +#pseudo-device xvif +#pseudo-device xbdback + + +# Alternate buffer queue strategies for better responsiveness under high +# disk I/O load. +#options BUFQ_READPRIO +#options BUFQ_PRIOCSCAN + +# Diagnostic/debugging support options +#options DIAGNOSTIC # inexpensive kernel consistency checks + # XXX to be commented out on release branch +#options DEBUG # expensive debugging checks/support +#options LOCKDEBUG # expensive locking checks/support + +# +# Because gcc omits the frame pointer for any -O level, the line below +# is needed to make backtraces in DDB work. +# +makeoptions COPTS="-O2 -fno-omit-frame-pointer" +#options DDB # in-kernel debugger +#options DDB_COMMANDONENTER="bt" # execute command when ddb is entered +#options DDB_ONPANIC=1 # see also sysctl(7): `ddb.onpanic' +#options DDB_HISTORY_SIZE=512 # enable history editing in DDB +#options KGDB # remote debugger +#options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600 +#makeoptions DEBUG="-g" # compile full symbol table for CTF +#options DDB_COMMANDONENTER="trace;show registers" +#options SYSCALL_STATS # per syscall counts +#options SYSCALL_TIMES # per syscall times +#options SYSCALL_TIMES_HASCOUNTER # use 'broken' rdtsc (soekris) +#options KDTRACE_HOOKS # kernel DTrace hooks + +# Kernel Undefined Behavior Sanitizer (kUBSan). +#options KUBSAN # mandatory +#options UBSAN_ALWAYS_FATAL # optional: panic on all kUBSan reports + +# Kernel Address Sanitizer (kASan). You need to disable SVS to use it. +# The quarantine is optional and can help KASAN find more use-after-frees. +# Use KASAN_PANIC if you want panics instead of warnings. +#makeoptions KASAN=1 # mandatory +#options KASAN # mandatory +#no options SVS # mandatory +#options POOL_QUARANTINE # optional +#options KASAN_PANIC # optional + +# Kernel Concurrency Sanitizer (kCSan). +#makeoptions KCSAN=1 # mandatory +#options KCSAN # mandatory +#options KCSAN_PANIC # optional + +# Kernel Memory Sanitizer (kMSan). You need to disable SVS and kernel modules +# to use it. POOL_NOCACHE is optional and can help KMSAN find uninitialized +# memory in pool caches. Note that KMSAN requires at least 4GB of RAM. +#makeoptions KMSAN=1 # mandatory +#options KMSAN # mandatory +#no options SVS # mandatory +#no options MODULAR # mandatory +#no options MODULAR_DEFAULT_AUTOLOAD # mandatory +#options POOL_NOCACHE # optional +#options KMSAN_PANIC # optional + +# Kernel Code Coverage Driver. +#makeoptions KCOV=1 +#options KCOV + +# Fault Injection Driver. +#options FAULT + +# Heartbeat checks +#options HEARTBEAT +#options HEARTBEAT_MAX_PERIOD_DEFAULT=15 + +# Compatibility options +# x86_64 never shipped with a.out binaries; the two options below are +# only relevant to 32-bit i386 binaries +#options EXEC_AOUT # required by binaries from before 1.5 +#options COMPAT_NOMID # NetBSD 0.8, 386BSD, and BSDI + +# NetBSD backward compatibility. Support goes from COMPAT_15 up until +# the latest release. Note that really old compat (< COMPAT_16) is only +# useful for 32-bit i386 binaries. +#include "conf/compat_netbsd15.config" + +#options COMPAT_386BSD_MBRPART # recognize old partition ID + +#options COMPAT_NETBSD32 +#options EXEC_ELF32 + +# Wedge support +#options DKWEDGE_AUTODISCOVER # Automatically add dk(4) instances +#options DKWEDGE_METHOD_GPT # Supports GPT partitions as wedges +#options DKWEDGE_METHOD_BSDLABEL # Support disklabel entries as wedges +#options DKWEDGE_METHOD_MBR # Support MBR partitions as wedges +#options DKWEDGE_METHOD_APPLE # Support Apple partitions as wedges +#options DKWEDGE_METHOD_RDB # Support RDB partitions as wedges + +# File systems +#include "conf/filesystems.config" +file-system FFS +file-system EXT2FS +file-system KERNFS + +# File system options +# ffs +#options FFS_EI # FFS Endian Independent support +options FFS_NO_SNAPSHOT # No FFS snapshot support +#options QUOTA # legacy UFS quotas +#options QUOTA2 # new, in-filesystem UFS quotas +#options UFS_ACL # UFS Access Control Lists +#options UFS_DIRHASH # UFS Large Directory Hashing +#options UFS_EXTATTR # Extended attribute support for UFS1 +options WAPBL # File system journaling support +# lfs +#options LFS_DIRHASH # LFS version of UFS_DIRHASH +# ext2fs +#options EXT2FS_SYSTEM_FLAGS # makes ext2fs file flags (append and + # immutable) behave as system flags. +# other +#options DISKLABEL_EI # disklabel Endian Independent support +#options NFSSERVER # Network File System server + +# Networking options +#options GATEWAY # packet forwarding +options INET # IP + ICMP + TCP + UDP +options INET6 # IPV6 +#options IPSEC # IP security +#options IPSEC_DEBUG # debug for IP security +#options MPLS # MultiProtocol Label Switching (needs mpls) +#options MROUTING # IP multicast routing +#options PIM # Protocol Independent Multicast +#options NETATALK # AppleTalk networking protocols +#options CAN # Controller Area Network protocol +#options PPP_BSDCOMP # BSD-Compress compression support for PPP +#options PPP_DEFLATE # Deflate compression support for PPP +#options PPP_FILTER # Active filter support for PPP (requires bpf) +#options TCP_DEBUG # Record last TCP_NDEBUG packets with SO_DEBUG +#options TCP_SIGNATURE # Enable RFC-2385 TCP md5 signatures + +#options ALTQ # Manipulate network interfaces' output queues +#options ALTQ_BLUE # Stochastic Fair Blue +#options ALTQ_CBQ # Class-Based Queueing +#options ALTQ_CDNR # Diffserv Traffic Conditioner +#options ALTQ_FIFOQ # First-In First-Out Queue +#options ALTQ_FLOWVALVE # RED/flow-valve (red-penalty-box) +#options ALTQ_HFSC # Hierarchical Fair Service Curve +#options ALTQ_LOCALQ # Local queueing discipline +#options ALTQ_PRIQ # Priority Queueing +#options ALTQ_RED # Random Early Detection +#options ALTQ_RIO # RED with IN/OUT +#options ALTQ_WFQ # Weighted Fair Queueing + +# These options enable verbose messages for several subsystems. +# Warning, these may compile large string tables into the kernel! +#options ACPIVERBOSE # verbose ACPI configuration messages +#options MIIVERBOSE # verbose PHY autoconfig messages +#options PCIVERBOSE # verbose PCI device autoconfig messages +#options PCI_CONFIG_DUMP # verbosely dump PCI config space +#options PCMCIAVERBOSE # verbose PCMCIA configuration messages +#options SCSIVERBOSE # human readable SCSI error messages +#options USBVERBOSE # verbose USB device autoconfig messages +#options HDAUDIOVERBOSE # human readable HDAUDIO device names + +#options NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM + +# +# wscons options +# +# builtin terminal emulations +#options WSEMUL_VT100 # VT100 / VT220 emulation +#options WSEMUL_SUN # sun terminal emulation +#options WSEMUL_DEFAULT="\"vt100\"" # NB: default is "sun" if enabled +# different kernel output - see dev/wscons/wsdisplayvar.h +#options WSDISPLAY_CUSTOM_OUTPUT # color customization from wsconsctl(8) +#options WS_KERNEL_FG=WSCOL_GREEN +#options WS_KERNEL_BG=WSCOL_BLACK +# customization of console border color +#options WSDISPLAY_CUSTOM_BORDER # custom border colors via wsconsctl(8) +# compatibility to other console drivers +#options WSDISPLAY_COMPAT_PCVT # emulate some ioctls +#options WSDISPLAY_COMPAT_SYSCONS # emulate some ioctls +#options WSDISPLAY_COMPAT_USL # wsconscfg VT handling +#options WSDISPLAY_COMPAT_RAWKBD # can get raw scancodes +# don't attach pckbd as the console if no PS/2 keyboard is found +#options PCKBD_CNATTACH_MAY_FAIL +# see dev/pckbport/wskbdmap_mfii.c for implemented layouts +#options PCKBD_LAYOUT="(KB_DE | KB_NODEAD)" +# allocate a number of virtual screens at autoconfiguration time +#options WSDISPLAY_DEFAULTSCREENS=4 +# use a large software cursor that doesn't blink +#options PCDISPLAY_SOFTCURSOR +# modify the screen type of the console; defaults to "80x25" +#options VGA_CONSOLE_SCREENTYPE="\"80x24\"" +# work around a hardware bug that loaded fonts don't work; found on ATI cards +#options VGA_CONSOLE_ATI_BROKEN_FONTSEL +# console scrolling support. +#options WSDISPLAY_SCROLLSUPPORT +# enable VGA raster mode capable of displaying multilingual text on console +#options VGA_RASTERCONSOLE +# enable splash screen support; requires genfb or radeonfb +#options SPLASHSCREEN + +# Kernel root file system and dump configuration. +config netbsd root on ? type ? +#config netbsd root on sd0a type ffs +#config netbsd root on ? type nfs + +# +# Device configuration +# + +# IPMI support +#ipmi0 at mainbus? +#ipmi_acpi* at acpi? +#ipmi0 at ipmi_acpi? + +# ACPI will be used if present. If not it will fall back to MPBIOS +#acpi0 at mainbus0 +#options ACPI_SCANPCI # find PCI roots using ACPI +options MPBIOS # configure CPUs and APICs using MPBIOS +options MPBIOS_SCANPCI # MPBIOS configures PCI roots +#options PCI_INTR_FIXUP # fixup PCI interrupt routing via ACPI +#options PCI_BUS_FIXUP # fixup PCI bus numbering +#options PCI_ADDR_FIXUP # fixup PCI I/O addresses +#options ACPI_ACTIVATE_DEV # If set, activate inactive devices +#options VGA_POST # in-kernel support for VGA POST + +# ACPI devices +#acpiacad* at acpi? # ACPI AC Adapter +#acpibat* at acpi? # ACPI Battery +#acpibut* at acpi? # ACPI Button +#acpidalb* at acpi? # ACPI Direct Application Launch Button +#acpiec* at acpi? # ACPI Embedded Controller (late) +#acpiecdt* at acpi? # ACPI Embedded Controller (early) +#acpifan* at acpi? # ACPI Fan +#acpilid* at acpi? # ACPI Lid Switch +#acpipmtr* at acpi? # ACPI Power Meter (experimental) +#acpismbus* at acpi? # ACPI SMBus CMI (experimental) +#acpitz* at acpi? # ACPI Thermal Zone +#acpivga* at acpi? # ACPI Display Adapter +#acpiout* at acpivga? # ACPI Display Output Device +#acpiwdrt* at acpi? # ACPI Watchdog Resource Table +#acpiwmi* at acpi? # ACPI WMI Mapper + +# Mainboard devices +#aibs* at acpi? # ASUSTeK AI Booster hardware monitor +#asus* at acpi? # ASUS hotkeys +#attimer* at acpi? # AT Timer +#com0 at acpi? # Serial communications interface +#com1 at acpi? # Serial communications interface +#com* at acpi? # Serial communications interface +#fdc* at acpi? # Floppy disk controller +#fd* at fdc? drive ? # the drives themselves +#fujbp* at acpi? # Fujitsu Brightness & Pointer +#fujhk* at acpi? # Fujitsu Hotkeys +##hpacel* at acpi? # HP 3D DriveGuard accelerometer +##hpqlb* at acpi? # HP Quick Launch Buttons +#hpet* at acpihpetbus? # High Precision Event Timer (table) +#hpet* at acpinodebus? # High Precision Event Timer (device) +#joy* at acpi? # Joystick/Game port +#lpt0 at acpi? # Parallel port +#lpt1 at acpi? # Parallel port +#lpt* at acpi? # Parallel port +#mpu* at acpi? # Roland MPU-401 MIDI UART +#pckbc* at acpi? # PC keyboard controller +#pcppi* at acpi? # AT-style speaker sound +#qemufwcfg* at acpi? # QEMU Firmware Configuration device +#sdhc* at acpi? # SD Host Controller +#sony* at acpi? # Sony Notebook Controller +#spic* at acpi? # Sony Programmable I/O Controller +#wsmouse* at spic? # mouse +#thinkpad* at acpi? # IBM/Lenovo Thinkpad hotkeys +#tpm* at acpi? # ACPI TPM (Experimental) +#ug* at acpi? # Abit uGuru Hardware monitor +#valz* at acpi? # Toshiba Dynabook hotkeys +#wb* at acpi? # Winbond W83L518D SD/MMC reader +#sdmmc* at wb? # SD/MMC bus +#wmidell* at acpiwmibus? # Dell WMI mappings +#wmieeepc* at acpiwmibus? # Asus Eee PC WMI mappings +#wmihp* at acpiwmibus? # HP WMI mappings +#wmimsi* at acpiwmibus? # MSI WMI mappings + +# Basic Bus Support + +# PCI bus support +pci* at mainbus? bus ? +#pci* at pchb? bus ? +#pci* at ppb? bus ? + +# PCI bridges +#pchb* at pci? dev ? function ? # PCI-Host bridges +#options AGP_X86 +#pcib* at pci? dev ? function ? # PCI-ISA bridges +#ppb* at pci? dev ? function ? # PCI-PCI bridges +## XXX 'puc's aren't really bridges, but there's no better place for them here +#puc* at pci? dev ? function ? # PCI "universal" comm. cards +# +#amdpcib* at pci? dev ? function ? # AMD 8111 PCI-ISA w/ HPET +#hpet* at amdpcib? +# +#pwdog* at pci? dev ? function ? # QUANCOM PWDOG1 +# +#ichlpcib* at pci? dev ? function ? # Intel ICH PCI-LPC w/ timecounter, +# # watchdog, gpio, Speedstep and HPET +#fwhrng* at ichlpcib? # Intel 82802 FWH Random Number Generator +##hpet* at ichlpcib? +#tco* at tcoichbus? # TCO watch dog timer +# +#aapic* at pci? dev ? function ? # AMD 8131 IO apic +# +#agp* at pchb? + +# ISA bus support +isa0 at mainbus? +#isa0 at pcib? +#isa0 at amdpcib? +#isa0 at ichlpcib? + +# CardBus bridge support +#cbb* at pci? dev ? function ? +#cardslot* at cbb? +# +## CardBus bus support +#cardbus* at cardslot? +#pcmcia* at cardslot? +# +## Console Devices +# +## wscons +#pckbc0 at isa? # pc keyboard controller +#pckbd* at pckbc? # PC keyboard +#pms* at pckbc? # PS/2 mouse for wsmouse +##options PMS_DISABLE_POWERHOOK # Disable PS/2 reset on resume +#options PMS_SYNAPTICS_TOUCHPAD # Enable support for Synaptics Touchpads +#options PMS_ELANTECH_TOUCHPAD # Enable support for Elantech Touchpads +#options PMS_ALPS_TOUCHPAD # Enable support for Alps Touchpads +#vga* at pci? dev ? function ? +#genfb* at pci? dev ? function ? +#options VCONS_DRAW_INTR +#wsdisplay* at vga? console ? +#wsdisplay* at wsemuldisplaydev? +#wskbd* at pckbd? console ? +#wsmouse* at pms? mux 0 +#wsmouse* at wsmousedev? +# +#attimer0 at isa? +#pcppi0 at isa? +#sysbeep0 at pcppi? +# +## DRI legacy drivers +##i915drm* at drm? # Intel i915, i945 DRM driver +##mach64drm* at drm? # mach64 (3D Rage Pro, Rage) DRM driver +##mgadrm* at drm? # Matrox G[24]00, G[45]50 DRM driver +##r128drm* at drm? # ATI Rage 128 DRM driver +##radeondrm* at drm? # ATI Radeon DRM driver +##savagedrm* at drm? # S3 Savage DRM driver +##sisdrm* at drm? # SiS DRM driver +##tdfxdrm* at drm? # 3dfx (voodoo) DRM driver +# +## DRMKMS drivers +#i915drmkms* at pci? dev ? function ? +#intelfb* at intelfbbus? +# +#radeon* at pci? dev ? function ? +#radeondrmkmsfb* at radeonfbbus? +# +##amdgpu* at pci? dev ? function ? +##amdgpufb* at amdgpufbbus? +# +#nouveau* at pci? dev ? function ? +#nouveaufb* at nouveaufbbus? +# +## DRMUMS drivers -- make them loadable, but not statically linked in +#options DRM_LEGACY +##viadrmums* at drm? +# +##options DRM_MAX_RESOLUTION_HORIZONTAL=1920 # Limit DRM size in horizontal dimension +##options DRM_MAX_RESOLUTION_VERTICAL=1080 # Limit DRM size in vertical dimension +# +## Cryptographic Devices +# +## PCI cryptographic devices +#amdccp* at pci? dev ? function ? # AMD Cryptographic Coprocessor +#hifn* at pci? dev ? function ? # Hifn 7755/7811/795x +##qat* at pci? dev ? function ? # Intel QuickAssist +#ubsec* at pci? dev ? function ? # Broadcom 5501/5601/580x/582x +# +## Trusted Platform Module +#tpm* at isa? iomem 0xfed40000 irq 7 +# +## Serial Devices +# +## PCI serial interfaces +#com* at puc? port ? # 16x50s on "universal" comm boards +#cy* at pci? dev ? function ? # Cyclades Cyclom-Y serial boards +#cz* at pci? dev ? function ? # Cyclades-Z multi-port serial boards +# +## PCMCIA serial interfaces +#com* at pcmcia? function ? # Modems and serial cards +# +#pcmcom* at pcmcia? function ? # PCMCIA multi-port serial cards +#com* at pcmcom? slave ? # ...and the slave devices +# +## CardBus serial interfaces +#com* at cardbus? function ? # Modems and serial cards + +# ISA serial interfaces +#options COM_HAYESP # adds Hayes ESP serial board support +com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports +#com1 at isa? port 0x2f8 irq 3 + +# Parallel Printer Interfaces + +# PCI parallel printer interfaces +#lpt* at puc? port ? # || ports on "universal" comm boards +# +## ISA parallel printer interfaces +#lpt0 at isa? port 0x378 irq 7 # standard PC parallel ports +#lpt1 at isa? port 0x278 +# +## Hardware monitors +# +#amdnb_misc* at pci? # AMD NB Misc Configuration +#amdtemp* at amdnb_misc? # AMD CPU Temperature sensors +# +#amdsmn* at pci? # AMD SMN Configuration +#amdzentemp* at amdsmnbus? # AMD Ryzen Family 17h CPU temp sensors +# +## Winbond LPC Super I/O +##wbsio* at isa? port 0x2e +##wbsio* at isa? port 0x4e +# +## IBM Hawk Integrated Systems Management Processor +##ibmhawk0 at iic? addr 0x37 +# +## LM7[89] and compatible hardware monitors +## Use flags to select temp sensor type (see lm(4) man page for details) +##lm0 at isa? port 0x290 flags 0x0 # other common ports: 0x280, 0x310 +##lm* at wbsio? +# +## SMSC LPC47B397 hardware monitor functions +##smsc0 at isa? port 0x02e +# +## SMSC LPC47M192 hardware monitor +##smscmon* at iic? addr 0x2c +##smscmon* at iic? addr 0x2d # (alternate address) +# +## AMD 768 and 8111 power/ACPI controllers +#amdpm* at pci? dev ? function ? # RNG and SMBus 1.0 interface +##iic* at amdpm? # sensors below are on this bus +# +## NVIDIA nForce2/3/4 SMBus controller +#nfsmbc* at pci? dev ? function ? +#nfsmb* at nfsmbc? +#iic* at nfsmb? +# +## Intel PIIX4 power management controllers +#piixpm* at pci? dev ? function ? # PIIX4 compatible PM controller +#iic* at piixpm? # SMBus on PIIX4 +# +## Intel ICH SMBus controller +#ichsmb* at pci? dev ? function ? +#iic* at ichsmb? +# +## Intel S1200,C2000 (non-pch) SMBus controller +#ismt* at pci? dev ? function ? +#iic* at ismt? +# +## DesignWare I2C controller as found in some Intel PCH and AMD FCH devices. +#dwiic* at acpi? # DesignWare I2C controller +#dwiic* at pci? # DesignWare I2C controller +#iic* at dwiic? +# +## Thermal monitor and fan controller +##dbcool* at iic? addr 0x2C # Unknown other motherboard(s) +##dbcool* at iic? addr 0x2D # Tyan S2881 +##dbcool* at iic? addr 0x2E # Tyan S2882-D +# +## IBM Thinkpad Active Protection System +##aps0 at isa? port 0x1600 +# +## Fintek Super I/O with hardware monitor +##finsio0 at isa? port 0x4e +# +## iTE IT87xxF Super I/O with watchdog and sensors support +##itesio0 at isa? port 0x2e +# +## Abit uGuru Hardware system monitor +##ug0 at isa? port 0xe0 +# +## Serial Presence Detect capable memory modules +##spdmem* at iic? addr 0x50 +##spdmem* at iic? addr 0x51 +##spdmem* at iic? addr 0x52 +##spdmem* at iic? addr 0x53 +##spdmem* at iic? addr 0x54 +##spdmem* at iic? addr 0x55 +##spdmem* at iic? addr 0x56 +##spdmem* at iic? addr 0x57 +##sdtemp* at iic? addr 0x18 +##sdtemp* at iic? addr 0x19 +##sdtemp* at iic? addr 0x1a +##sdtemp* at iic? addr 0x1b +##sdtemp* at iic? addr 0x1c +##sdtemp* at iic? addr 0x1d +##sdtemp* at iic? addr 0x1e +##sdtemp* at iic? addr 0x1f +# +## Intel GPIO +#igpio* at acpi? +# +## I2C HID devices +#ihidev* at iic? +# +## I2C Mice +#ims* at ihidev? reportid ? +#wsmouse* at ims? mux 0 +# +## I2O devices +#iop* at pci? dev ? function ? # I/O processor +#iopsp* at iop? tid ? # SCSI/FC-AL ports +#ld* at iop? tid ? # block devices +## XXX dpti.c wants a processor type that is not assigned for x86-64 +##dpti* at iop? tid 0 # DPT/Adaptec control interface +# +## GPIO devices +#gpio* at gpiobus? +# +## 1- Wire support +##gpioow* at gpio? offset ? mask ? # 1-wire bitbanging via gpio +#gpioow* at gpio? +#onewire* at gpioow? +# +## 1-Wire devices +#owtemp* at onewire? # Temperature sensors +# +## I2C support +##gpioiic* at gpio? +##iic* at gpioiic? +# +## Keylock support +##gpiolock* at gpio? +# +## Pulsing GPIO pins in software +##gpiopwm* at gpio? +# +## Soekris 6501 GPIO/LED driver (provides gpiobus, needs gpio) +##soekrisgpio0 at isa? port 0x680 +# +## Nuvoton NCT5104D SuperIO providing GPIO +#nct0 at isa? port ? +# +## SCSI Controllers and Devices +# +## PCI SCSI controllers +#adv* at pci? dev ? function ? # AdvanSys 1200[A,B], 9xx[U,UA] SCSI +#adw* at pci? dev ? function ? # AdvanSys 9x0UW[D], 3940U[2,3]W SCSI +#ahc* at pci? dev ? function ? # Adaptec [23]94x, aic78x0 SCSI +#ahd* at pci? dev ? function ? # Adaptec aic790x SCSI +#bha* at pci? dev ? function ? # BusLogic 9xx SCSI +#dpt* at pci? dev ? function ? # DPT SmartCache/SmartRAID +#iha* at pci? dev ? function ? # Initio INIC-940/950 SCSI +#isp* at pci? dev ? function ? # Qlogic ISP [12]0x0 SCSI/FibreChannel +#mfi* at pci? dev ? function ? # LSI MegaRAID SAS +#mfii* at pci? dev ? function ? # LSI MegaRAID SAS (Fusion and newer) +#mly* at pci? dev ? function ? # Mylex AcceleRAID and eXtremeRAID +#mpt* at pci? dev ? function ? # LSILogic 9x9 and 53c1030 (Fusion-MPT) +#mpii* at pci? dev ? function ? # LSI Logic Fusion-MPT II +#njs* at pci? dev ? function ? # Workbit NinjaSCSI-32 +#pcscp* at pci? dev ? function ? # AMD 53c974 PCscsi-PCI SCSI +#siop* at pci? dev ? function ? # Symbios 53c8xx SCSI +#esiop* at pci? dev ? function ? # Symbios 53c875 and newer SCSI +##options SIOP_SYMLED # drive the act. LED in software +#trm* at pci? dev ? function ? # Tekram DC-395U/UW/F, DC-315/U SCSI +# +## PCMCIA SCSI controllers +#aic* at pcmcia? function ? # Adaptec APA-1460 SCSI +#esp* at pcmcia? function ? # Qlogic ESP406/FAS408 SCSI +#spc* at pcmcia? function ? # Fujitsu MB87030/MB89352 SCSI +# +## CardBus SCSI cards +#adv* at cardbus? function ? # AdvanSys 1200[A,B], 9xx[U,UA] SCSI +#ahc* at cardbus? function ? # Adaptec ADP-1480 +#njs* at cardbus? function ? # Workbit NinjaSCSI-32 +# +## SCSI bus support +#scsibus* at scsi? +# +## SCSI devices +#sd* at scsibus? target ? lun ? # SCSI disk drives +#st* at scsibus? target ? lun ? # SCSI tape drives +#cd* at scsibus? target ? lun ? # SCSI CD-ROM drives +#ch* at scsibus? target ? lun ? # SCSI autochangers +#ses* at scsibus? target ? lun ? # SCSI Enclosure Services devices +#ss* at scsibus? target ? lun ? # SCSI scanners +#uk* at scsibus? target ? lun ? # SCSI unknown +# +# +## RAID controllers and devices +#aac* at pci? dev ? function ? # Adaptec AAC family +#amr* at pci? dev ? function ? # AMI/LSI Logic MegaRAID +#arcmsr* at pci? dev ? function ? # Areca SATA RAID controllers +#cac* at pci? dev ? function ? # Compaq PCI array controllers +#ciss* at pci? dev ? function ? # HP Smart Array controllers +#icp* at pci? dev ? function ? # ICP-Vortex GDT & Intel RAID +#mlx* at pci? dev ? function ? # Mylex DAC960 & DEC SWXCR family +#twe* at pci? dev ? function ? # 3ware Escalade RAID controllers +#twa* at pci? dev ? function ? # 3ware Escalade 9xxx RAID controllers +# +#ld* at aac? unit ? +#ld* at amr? unit ? +#ld* at cac? unit ? +#ld* at icp? unit ? +#ld* at twe? unit ? +#ld* at twa? unit ? +#ld* at mlx? unit ? +# +#icpsp* at icp? unit ? # SCSI pass-through +# +## IDE and related devices +## PCI IDE controllers - see pciide(4) for supported hardware. +## The 0x0001 flag force the driver to use DMA, even if the driver doesn't know +## how to set up DMA modes for this chip. This may work, or may cause +## a machine hang with some controllers. +#pciide* at pci? dev ? function ? flags 0x0000 # GENERIC pciide driver +#acardide* at pci? dev ? function ? # Acard IDE controllers +#aceride* at pci? dev ? function ? # Acer Lab IDE controllers +#ahcisata* at pci? dev ? function ? # AHCI SATA controllers +#artsata* at pci? dev ? function ? # Intel i31244 SATA controller +#cmdide* at pci? dev ? function ? # CMD tech IDE controllers +#cypide* at pci? dev ? function ? # Cypress IDE controllers +#hptide* at pci? dev ? function ? # Triones/HighPoint IDE controllers +#iteide* at pci? dev ? function ? # IT Express IDE controllers +#ixpide* at pci? dev ? function ? # ATI IXP IDE controllers +#jmide* at pci? dev ? function ? # JMicron PCI-e PATA/SATA controllers +#ahcisata* at jmide? +#mvsata* at pci? dev ? function ? # Marvell Hercules-I/II +#optiide* at pci? dev ? function ? # Opti IDE controllers +#piixide* at pci? dev ? function ? # Intel IDE controllers +#pdcide* at pci? dev ? function ? # Promise IDE controllers +#pdcsata* at pci? dev ? function ? # Promise SATA150 controllers +#satalink* at pci? dev ? function ? # SiI SATALink controllers +#siisata* at pci? dev ? function ? # SiI SteelVine controllers +#siside* at pci? dev ? function ? # SiS IDE controllers +#slide* at pci? dev ? function ? # Symphony Labs IDE controllers +#svwsata* at pci? dev ? function ? # ServerWorks SATA controllers +#toshide* at pci? dev ? function ? # TOSHIBA PICCOLO controllers +#viaide* at pci? dev ? function ? # VIA/AMD/Nvidia IDE controllers +# +## PCMCIA IDE controllers +#wdc* at pcmcia? function ? +# +## CardBus IDE controllers +#njata* at cardbus? function ? flags 0x01 # Workbit NinjaATA-32 +#siisata* at cardbus? function ? # SiI SteelVine controllers +# +## ISA ST506, ESDI, and IDE controllers +## Use flags 0x01 if you want to try to use 32bits data I/O (the driver will +## fall back to 16bits I/O if 32bits I/O are not functional). +## Some controllers pass the initial 32bit test, but will fail later. +#wdc0 at isa? port 0x1f0 irq 14 flags 0x00 +#wdc1 at isa? port 0x170 irq 15 flags 0x00 +# +## ATA (IDE) bus support +#atabus* at ata? +#options ATADEBUG +# +## IDE drives +## Flags are used only with controllers that support DMA operations +## and mode settings (e.g. some pciide controllers) +## The lowest order four bits (rightmost digit) of the flags define the PIO +## mode to use, the next set of four bits the DMA mode and the third set the +## UltraDMA mode. For each set of four bits, the 3 lower bits define the mode +## to use, and the last bit must be 1 for this setting to be used. +## For DMA and UDMA, 0xf (1111) means 'disable'. +## 0x0fac means 'use PIO mode 4, DMA mode 2, disable UltraDMA'. +## (0xc=1100, 0xa=1010, 0xf=1111) +## 0x0000 means "use whatever the drive claims to support". +#wd* at atabus? drive ? flags 0x0000 +# +## ATAPI bus support +#atapibus* at atapi? +# +# +## ATA RAID configuration support, as found on some Promise controllers. +#pseudo-device ataraid +#ld* at ataraid? vendtype ? unit ? +# +## ATAPI devices +## flags have the same meaning as for IDE drives. +#cd* at atapibus? drive ? flags 0x0000 # ATAPI CD-ROM drives +#sd* at atapibus? drive ? flags 0x0000 # ATAPI disk drives +#st* at atapibus? drive ? flags 0x0000 # ATAPI tape drives +#uk* at atapibus? drive ? flags 0x0000 # ATAPI unknown +# +# +## NVM Express controllers and devices +#nvme* at pci? dev ? function ? +#ld* at nvme? nsid ? +# +# +## Miscellaneous mass storage devices +# +## ISA floppy +##fdc0 at isa? port 0x3f0 irq 6 drq 2 # standard PC floppy controllers +##fdc1 at isa? port 0x370 irq ? drq ? +# +## Network Interfaces +# +## PCI network interfaces +#age* at pci? dev ? function ? # Attansic/Atheros L1 Gigabit Ethernet +#alc* at pci? dev ? function ? # Attansic/Atheros L1C/L2C Ethernet +#ale* at pci? dev ? function ? # Attansic/Atheros L1E Ethernet +#an* at pci? dev ? function ? # Aironet PC4500/PC4800 (802.11) +#aq* at pci? dev ? function ? # Aquantia AQC 10 gigabit +#ath* at pci? dev ? function ? # Atheros 5210/5211/5212 802.11 +#athn* at pci? dev ? function ? # Atheros AR9k (802.11a/g/n) +#atw* at pci? dev ? function ? # ADMtek ADM8211 (802.11) +#bce* at pci? dev ? function ? # Broadcom 440x 10/100 Ethernet +#bge* at pci? dev ? function ? # Broadcom 570x gigabit Ethernet +#bnx* at pci? dev ? function ? # Broadcom NetXtremeII gigabit Ethernet +#bwi* at pci? dev ? function ? # Broadcom BCM43xx wireless +#bwfm* at pci? dev ? function ? # Broadcom FullMAC +#cas* at pci? dev ? function ? # Sun Cassini/Cassini+ Ethernet +#dge* at pci? dev ? function ? # Intel 82597 10GbE LR +#ena* at pci? dev ? function ? # Amazon.com Elastic Network Adapter +#ep* at pci? dev ? function ? # 3Com 3c59x +#epic* at pci? dev ? function ? # SMC EPIC/100 Ethernet +##eqos* at pci? dev ? function ? # DesignWare Ethernet QoS +#et* at pci? dev ? function ? # Agere/LSI ET1310/ET1301 Gigabit +#ex* at pci? dev ? function ? # 3Com 90x[BC] +#fxp* at pci? dev ? function ? # Intel EtherExpress PRO 10+/100B +#gem* at pci? dev ? function ? # Apple GMAC and Sun ERI gigabit enet +#gsip* at pci? dev ? function ? # NS83820 Gigabit Ethernet +#hme* at pci? dev ? function ? # Sun Microelectronics STP2002-STQ +#iavf* at pci? dev ? function ? # Intel Adaptive Virtual Function +#igc* at pci? dev ? function ? # Intel I225 2.5 gigabit +#ipw* at pci? dev ? function ? # Intel PRO/Wireless 2100 +#iwi* at pci? dev ? function ? # Intel PRO/Wireless 2200BG +#iwm* at pci? dev ? function ? # Intel Centrino 7260 +#iwn* at pci? dev ? function ? # Intel PRO/Wireless 4965AGN +#ixg* at pci? dev ? function ? # Intel 8259x 10 gigabit +#ixl* at pci? dev ? function ? # Intel Ethernet 700 Series +#ixv* at pci? dev ? function ? # Intel 8259x 10G virtual function +#jme* at pci? dev ? function ? # JMicron JMC2[56]0 ethernet +#kse* at pci? dev ? function ? # Micrel KSZ8841/8842 ethernet +#lii* at pci? dev ? function ? # Atheros L2 Fast-Ethernet +#malo* at pci? dev ? function ? # Marvell Libertas Wireless +#mcx* at pci? dev ? function ? # Mellanox 5th generation Ethernet +#mskc* at pci? dev ? function ? # Marvell Yukon 2 Gigabit Ethernet +#msk* at mskc? # Marvell Yukon 2 Gigabit Ethernet +#mtd* at pci? dev ? function ? # Myson MTD803 3-in-1 Ethernet +#ne* at pci? dev ? function ? # NE2000-compatible Ethernet +#nfe* at pci? dev ? function ? # NVIDIA nForce Ethernet +#ntwoc* at pci? dev ? function ? # Riscom/N2 PCI Sync Serial +#pcn* at pci? dev ? function ? # AMD PCnet-PCI Ethernet +#ral* at pci? dev ? function ? # Ralink Technology RT25x0 802.11a/b/g +#re* at pci? dev ? function ? # Realtek 8139C+/8169/8169S/8110S +#rge* at pci? dev ? function ? # Realtek 8125 +#rtk* at pci? dev ? function ? # Realtek 8129/8139 +#rtw* at pci? dev ? function ? # Realtek 8180L (802.11) +#rtwn* at pci? dev ? function ? # Realtek 8188CE/8192CE 802.11b/g/n +#sf* at pci? dev ? function ? # Adaptec AIC-6915 Ethernet +#sip* at pci? dev ? function ? # SiS 900/DP83815 Ethernet +#skc* at pci? dev ? function ? # SysKonnect SK9821 Gigabit Ethernet +#sk* at skc? # SysKonnect SK9821 Gigabit Ethernet +#ste* at pci? dev ? function ? # Sundance ST-201 Ethernet +#stge* at pci? dev ? function ? # Sundance/Tamarack TC9021 Gigabit +#ti* at pci? dev ? function ? # Alteon ACEnic gigabit Ethernet +#tl* at pci? dev ? function ? # ThunderLAN-based Ethernet +#tlp* at pci? dev ? function ? # DECchip 21x4x and clones +#txp* at pci? dev ? function ? # 3com 3cr990 +#vge* at pci? dev ? function ? # VIATech VT612X Gigabit Ethernet +#vmx* at pci? dev ? function ? # VMware VMXNET3 +#vr* at pci? dev ? function ? # VIA Rhine Fast Ethernet +#wi* at pci? dev ? function ? # Intersil Prism Mini-PCI (802.11b) +#wm* at pci? dev ? function ? # Intel 82543/82544 gigabit +#wpi* at pci? dev ? function ? # Intel PRO/Wireless 3945ABG +#xge* at pci? dev ? function ? # Neterion (S2io) Xframe-I 10GbE +# +## PCMCIA network interfaces +#an* at pcmcia? function ? # Aironet PC4500/PC4800 (802.11) +#awi* at pcmcia? function ? # BayStack 650/660 (802.11FH/DS) +#cnw* at pcmcia? function ? # Xircom/Netwave AirSurfer +#cs* at pcmcia? function ? # CS89xx Ethernet +#ep* at pcmcia? function ? # 3Com 3c589 and 3c562 Ethernet +#malo* at pcmcia? function ? # Marvell Libertas +#mbe* at pcmcia? function ? # MB8696x based Ethernet +#ne* at pcmcia? function ? # NE2000-compatible Ethernet +#ray* at pcmcia? function ? # Raytheon Raylink (802.11) +#sm* at pcmcia? function ? # Megahertz Ethernet +#wi* at pcmcia? function ? # Lucent/Intersil WaveLan IEEE (802.11) +#xirc* at pcmcia? function ? # Xircom CreditCard Ethernet +#com* at xirc? +#xi* at xirc? +# +#mhzc* at pcmcia? function ? # Megahertz Ethernet/Modem combo cards +#com* at mhzc? +#sm* at mhzc? +# +## CardBus network cards +#ath* at cardbus? function ? # Atheros 5210/5211/5212 802.11 +#athn* at cardbus? function ? # Atheros AR9k (802.11a/g/n) - UNTESTED +#atw* at cardbus? function ? # ADMtek ADM8211 (802.11) +#ex* at cardbus? function ? # 3Com 3C575TX +#fxp* at cardbus? function ? # Intel i8255x +#malo* at cardbus? function ? # Marvell Libertas Wireless +#ral* at cardbus? function ? # Ralink Technology RT25x0 802.11a/b/g +#re* at cardbus? function ? # Realtek 8139C+/8169/8169S/8110S +#rtk* at cardbus? function ? # Realtek 8129/8139 +#rtw* at cardbus? function ? # Realtek 8180L (802.11) +#tlp* at cardbus? function ? # DECchip 21143 +# +## MII/PHY support +#acphy* at mii? phy ? # DAltima AC101 and AMD Am79c874 PHYs +#amhphy* at mii? phy ? # AMD 79c901 Ethernet PHYs +#atphy* at mii? phy ? # Attansic/Atheros PHYs +#bmtphy* at mii? phy ? # Broadcom BCM5201 and BCM5202 PHYs +#brgphy* at mii? phy ? # Broadcom BCM5400-family PHYs +#ciphy* at mii? phy ? # Cicada CS8201 Gig-E PHYs +#dmphy* at mii? phy ? # Davicom DM9101 PHYs +#etphy* at mii? phy ? # Agere/LSI ET1011 TruePHY Gig-E PHYs +#exphy* at mii? phy ? # 3Com internal PHYs +#gentbi* at mii? phy ? # Generic Ten-Bit 1000BASE-[CLS]X PHYs +#glxtphy* at mii? phy ? # Level One LXT-1000 PHYs +#gphyter* at mii? phy ? # NS83861 Gig-E PHY +#icsphy* at mii? phy ? # Integrated Circuit Systems ICS189x +#igphy* at mii? phy ? # Intel IGP01E1000 +#ihphy* at mii? phy ? # Intel 82577 PHYs +#ikphy* at mii? phy ? # Intel 82563 PHYs +#inphy* at mii? phy ? # Intel 82555 PHYs +#iophy* at mii? phy ? # Intel 82553 PHYs +#ipgphy* at mii? phy ? # IC PLUS IP1000A/IP1001 PHYs +#jmphy* at mii? phy ? # Jmicron JMP202/211 PHYs +#lxtphy* at mii? phy ? # Level One LXT-970 PHYs +#makphy* at mii? phy ? # Marvell Semiconductor 88E1000 PHYs +#micphy* at mii? phy ? # Micrel KSZ[89]xxx PHYs +#nsphy* at mii? phy ? # NS83840 PHYs +#nsphyter* at mii? phy ? # NS83843 PHYs +#pnaphy* at mii? phy ? # generic HomePNA PHYs +#qsphy* at mii? phy ? # Quality Semiconductor QS6612 PHYs +#rgephy* at mii? phy ? # Realtek 8169S/8110 internal PHYs +#rlphy* at mii? phy ? # Realtek 8139/8201L PHYs +#smscphy* at mii? phy ? # SMSC LAN87xx PHYs +#sqphy* at mii? phy ? # Seeq 80220/80221/80223 PHYs +#tlphy* at mii? phy ? # ThunderLAN PHYs +#tqphy* at mii? phy ? # TDK Semiconductor PHYs +#ukphy* at mii? phy ? # generic unknown PHYs +#urlphy* at mii? phy ? # Realtek RTL8150L internal PHYs +# +# +## USB Controller and Devices +# +## Virtual USB controller +##pseudo-device vhci +# +## PCI USB controllers +#xhci* at pci? dev ? function ? # eXtensible Host Controller +#ehci* at pci? dev ? function ? # Enhanced Host Controller +#ohci* at pci? dev ? function ? # Open Host Controller +#uhci* at pci? dev ? function ? # Universal Host Controller (Intel) +# +## CardBus USB controllers +#ehci* at cardbus? function ? # Enhanced Host Controller +#ohci* at cardbus? function ? # Open Host Controller +#uhci* at cardbus? function ? # Universal Host Controller (Intel) +# +## ISA USB controllers +##slhci0 at isa? port 0x300 irq 5 # ScanLogic SL811HS +# +## PCMCIA USB controllers +#slhci* at pcmcia? function ? # ScanLogic SL811HS +# +## USB bus support +##usb* at vhci? +#usb* at xhci? +#usb* at ehci? +#usb* at ohci? +#usb* at uhci? +#usb* at slhci? +# +#include "dev/usb/usbdevices.config" +# +## PCI IEEE1394 controllers +#fwohci* at pci? dev ? function ? # IEEE1394 Open Host Controller +# +## CardBus IEEE1394 controllers +#fwohci* at cardbus? function ? # IEEE1394 Open Host Controller +# +#ieee1394if* at fwohci? +#fwip* at ieee1394if? # IP over IEEE1394 +#sbp* at ieee1394if? euihi ? euilo ? +# +## Audio Devices +# +## PCI audio devices +#auacer* at pci? dev ? function ? # ALi M5455 integrated AC'97 Audio +#auich* at pci? dev ? function ? # Intel/AMD/nVidia AC'97 Audio +#auixp* at pci? dev ? function ? # ATI IXP AC'97 Audio +#autri* at pci? dev ? function ? # Trident 4DWAVE based AC'97 Audio +#auvia* at pci? dev ? function ? # VIA AC'97 audio +#clcs* at pci? dev ? function ? # Cirrus Logic CS4280 +#clct* at pci? dev ? function ? # Cirrus Logic CS4281 +#cmpci* at pci? dev ? function ? # C-Media CMI8338/8738 +#eap* at pci? dev ? function ? # Ensoniq AudioPCI +#emuxki* at pci? dev ? function ? # Creative SBLive! and PCI512 +#esa* at pci? dev ? function ? # ESS Allegro-1 / Maestro-3 PCI Audio +#esm* at pci? dev ? function ? # ESS Maestro-1/2/2e PCI Audio Accelerator +#eso* at pci? dev ? function ? # ESS Solo-1 PCI AudioDrive +#fms* at pci? dev ? function ? # Forte Media FM801 +#neo* at pci? dev ? function ? # NeoMagic 256 AC'97 Audio +#sv* at pci? dev ? function ? # S3 SonicVibes +#yds* at pci? dev ? function ? # Yamaha DS-1 PCI Audio +# +## OPL[23] FM synthesizers +##opl0 at isa? port 0x388 # use only if not attached to sound card +#opl* at cmpci? flags 1 +#opl* at eso? +#opl* at fms? +#opl* at sv? +# +## High Definition Audio +#hdaudio* at pci? dev ? function ? # High Definition Audio +#hdafg* at hdaudiobus? +# +## Audio support +#audio* at audiobus? +# +## The spkr driver provides a simple tone interface to the built in speaker. +#spkr* at pcppi? # PC speaker +#spkr* at audio? # PC speaker (synthesized) +##wsbell* at spkr? # Bell for wscons display (module by default) +# +## MPU 401 UARTs +##mpu* at isa? port 0x330 irq 9 # MPU401 or compatible card +#mpu* at cmpci? +#mpu* at eso? +#mpu* at yds? +# +## MIDI support +#midi* at midibus? +#midi* at pcppi? # MIDI interface to the PC speaker +# +## FM-Radio devices +## PCI radio devices +##gtp* at pci? dev ? function ? # Guillemot Maxi Radio FM 2000 Radio Card +# +## Radio support +##radio* at gtp? +# +# +## Video capture devices +# +#coram* at pci? dev ? function ? # Conexant CX23885 PCI-E TV +#cxdtv* at pci? dev ? function ? # Conexant CX2388[0-3] PCI TV +# +#video* at videobus? # Analog capture interface +#dtv* at dtvbus? # Digital capture interface +# +# +## TV cards +# +## Brooktree 848/849/878/879 based TV cards +#bktr* at pci? dev ? function ? +#radio* at bktr? +# +# +## Bluetooth Controller and Device support +# +## Bluetooth PCMCIA Controllers +#bt3c* at pcmcia? function ? # 3Com 3CRWB6096-A +#btbc* at pcmcia? function ? # AnyCom BlueCard LSE041/039/139 +# +## Bluetooth SDIO Controllers +#sbt* at sdmmc? +# +## Bluetooth USB Controllers +#ubt* at uhub? port ? +#aubtfwl* at uhub? port ? +# +## Bluetooth Device Hub +#bthub* at bcsp? +#bthub* at bt3c? +#bthub* at btbc? +#bthub* at btuart? +#bthub* at sbt? +#bthub* at ubt? +# +## Bluetooth HID support +#bthidev* at bthub? +# +## Bluetooth Mouse +#btms* at bthidev? reportid ? +#wsmouse* at btms? mux 0 +# +## Bluetooth Keyboard +#btkbd* at bthidev? reportid ? +#wskbd* at btkbd? console ? mux 1 +# +## Bluetooth Apple Magic Mouse +#btmagic* at bthub? +#wsmouse* at btmagic? mux 0 +# +## Bluetooth Audio support +#btsco* at bthub? +# +# +## SD/MMC/SDIO Controller and Device support +# +## SD/MMC controller +#sdhc* at pci? # SD Host Controller +#rtsx* at pci? # Realtek RTS5209/RTS5229 Card Reader +#sdhc* at cardbus? # SD Host Controller +#sdmmc* at sdhc? # SD/MMC bus +#sdmmc* at rtsx? # SD/MMC bus +# +#ld* at sdmmc? +# +# +## Middle Digital, Inc. PCI-Weasel serial console board control +## devices (watchdog timer, etc.) +#weasel* at pci? +# +## Virtio devices +virtio* at pci? dev ? function ? # Virtio PCI device +#viomb* at virtio? # Virtio memory balloon device +ld* at virtio? # Virtio disk device +vioif* at virtio? # Virtio network device +#viornd* at virtio? # Virtio entropy device +#vioscsi* at virtio? # Virtio SCSI device +##vio9p* at virtio? # Virtio 9P device +# +## Hyper-V devices +#vmbus* at acpi? # Hyper-V VMBus +#genfb* at vmbus? # Hyper-V Synthetic Video Framebuffer +#hvkbd* at vmbus? # Hyper-V Synthetic Keyboard +#wskbd* at hvkbd? console ? mux 1 +#hvn* at vmbus? # Hyper-V NetVSC +#hvs* at vmbus? # Hyper-V StorVSC +#hvheartbeat* at vmbus? # Hyper-V Heartbeat Service +#hvshutdown* at vmbus? # Hyper-V Guest Shutdown Service +#hvtimesync* at vmbus? # Hyper-V Time Synchronization Service +##hvkvp* at vmbus? # Hyper-V Data Exchange Service +# +## Pseudo-Devices +# +#pseudo-device crypto # /dev/crypto device +#pseudo-device swcrypto # software crypto implementation +# +## disk/mass storage pseudo-devices +#pseudo-device bio # RAID control device driver +#pseudo-device ccd # concatenated/striped disk devices +#pseudo-device cgd # cryptographic disk devices +#pseudo-device raid # RAIDframe disk driver +#options RAID_AUTOCONFIG # auto-configuration of RAID components +## Options to enable various other RAIDframe RAID types. +##options RF_INCLUDE_EVENODD=1 +##options RF_INCLUDE_RAID5_RS=1 +##options RF_INCLUDE_PARITYLOGGING=1 +##options RF_INCLUDE_CHAINDECLUSTER=1 +##options RF_INCLUDE_INTERDECLUSTER=1 +##options RF_INCLUDE_PARITY_DECLUSTERING=1 +##options RF_INCLUDE_PARITY_DECLUSTERING_DS=1 +#pseudo-device fss # file system snapshot device +# +#pseudo-device md # memory disk device (ramdisk) +#options MEMORY_DISK_HOOKS # enable md specific hooks +#options MEMORY_DISK_DYNAMIC # enable dynamic resizing +# +#pseudo-device vnd # disk-like interface to files +#options VND_COMPRESSION # compressed vnd(4) +# +# +## network pseudo-devices +pseudo-device bpfilter # Berkeley packet filter +#pseudo-device carp # Common Address Redundancy Protocol +pseudo-device loop # network loopback +##pseudo-device mpls # MPLS pseudo-interface +#pseudo-device ppp # Point-to-Point Protocol +#pseudo-device pppoe # PPP over Ethernet (RFC 2516) +#pseudo-device sl # Serial Line IP +#pseudo-device irframetty # IrDA frame line discipline +#pseudo-device tun # network tunneling over tty +#pseudo-device tap # virtual Ethernet +#pseudo-device gre # generic L3 over IP tunnel +#pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) +#pseudo-device ipsecif # tunnel interface for routing based ipsec +##pseudo-device faith # IPv[46] tcp relay translation i/f +#pseudo-device stf # 6to4 IPv6 over IPv4 encapsulation +#pseudo-device vlan # IEEE 802.1q encapsulation +#pseudo-device bridge # simple inter-network bridging +#pseudo-device vether # Virtual Ethernet for bridge +#pseudo-device agr # IEEE 802.3ad link aggregation +#pseudo-device l2tp # L2TPv3 interface +#pseudo-device lagg # Link aggregation interface +#pseudo-device npf # NPF packet filter +# +##pseudo-device canloop # CAN loopback interface +# +## +## accept filters +#pseudo-device accf_data # "dataready" accept filter +#pseudo-device accf_http # "httpready" accept filter +# +## miscellaneous pseudo-devices +pseudo-device pty # pseudo-terminals +#pseudo-device sequencer # MIDI sequencer +## rnd works; RND_COM does not on port i386 yet. +##options RND_COM # use "com" randomness as well (BROKEN) +pseudo-device clockctl # user control of clock subsystem +pseudo-device ksyms # /dev/ksyms +#pseudo-device lockstat # lock profiling +#pseudo-device bcsp # BlueCore Serial Protocol +#pseudo-device btuart # Bluetooth HCI UART (H4) +##pseudo-device nvmm # NetBSD Virtual Machine Monitor +#pseudo-device swwdog # software watchdog timer -- swwdog(4) +# +## wscons pseudo-devices +#pseudo-device wsmux # mouse & keyboard multiplexor +#pseudo-device wsfont +## Give us a choice of fonts based on monitor size +#options FONT_BOLD8x16 +#options FONT_BOLD16x32 +# +## pseudo audio device driver +#pseudo-device pad +# +## userland interface to drivers, including autoconf and properties retrieval +#pseudo-device drvctl +# +## EFI runtime support +#options EFI_RUNTIME +#pseudo-device efi # /dev/efi +# +#include "dev/veriexec.config" +# +#options PAX_SEGVGUARD=0 # PaX Segmentation fault guard +#options PAX_MPROTECT=1 # PaX mprotect(2) restrictions +#options PAX_MPROTECT_DEBUG=1 # PaX mprotect debug +#options PAX_ASLR=1 # PaX Address Space Layout Randomization +#options PAX_ASLR_DEBUG=1 # PaX ASLR debug +# +## Pull in optional local configuration - always at end +#cinclude "arch/amd64/conf/GENERIC.local" From 78d46c9793304d2efa86f75e98c3b8f0f358c618 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 10 Dec 2023 20:43:34 +0100 Subject: [PATCH 009/114] fix: no need for GENPVH --- sys/arch/xen/xen/hypervisor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index 5754c16f5e3eb..6c8b66f87d012 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -168,7 +168,7 @@ struct x86_isa_chipset x86_isa_chipset; #endif #endif -#if defined(XENPVHVM) || defined(XENPVH) || defined(GENPVH) +#if defined(XENPVHVM) || defined(XENPVH) #include #include #include From bf1d9baf175fb3bbe26168e8340f0ae26468fff7 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 06:38:33 +0100 Subject: [PATCH 010/114] fix: merged the much classier asm from Greg --- sys/arch/amd64/amd64/genassym.cf | 4 +- sys/arch/amd64/amd64/locore.S | 153 +++++++++++++++---------------- 2 files changed, 77 insertions(+), 80 deletions(-) diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index 12b609bcc78cc..8a410c058e4c2 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -382,11 +382,11 @@ define SIR_XENIPL_VM SIR_XENIPL_VM define SIR_XENIPL_SCHED SIR_XENIPL_SCHED define SIR_XENIPL_HIGH SIR_XENIPL_HIGH define EVTCHN_UPCALL_MASK offsetof(struct vcpu_info, evtchn_upcall_mask) +define HVM_START_INFO_SIZE sizeof(struct hvm_start_info) define MMAP_PADDR offsetof(struct hvm_start_info, memmap_paddr) define MMAP_ENTRIES offsetof(struct hvm_start_info, memmap_entries) +define MMAP_ENTRY_SIZE sizeof(struct hvm_memmap_table_entry) define CMDLINE_PADDR offsetof(struct hvm_start_info, cmdline_paddr) -define MMAP_ENT_SZ sizeof(struct hvm_memmap_table_entry) -define MMAP_SI_SZ sizeof(struct hvm_start_info) ifdef XENPV define XEN_PT_BASE offsetof(struct start_info, pt_base) define XEN_NR_PT_FRAMES offsetof(struct start_info, nr_pt_frames) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index ccdec75922e90..4e988e0243f97 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -306,7 +306,7 @@ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, start) #else ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0) - ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_xen32)) + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_genpvh)) #endif /* XENPV */ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .quad, HYPERVISOR_VIRT_START) @@ -1034,61 +1034,7 @@ END(start) # if !defined(XENPV) /* entry point for Xen PVH */ .code32 -#ifdef GENPVH -ENTRY(copy_start_info) -/* - * sys/external/mit/xen-include-public/dist/xen/include/public/arch-x86/hvm/start_info.h - */ - /* copy start_info from its current location to end of kernel */ - movl $RELOC(_end), %edi - movl $PAGE_SIZE, %ecx - movl %ebx, %esi - rep movsb /* copy %esi to %edi byte per byte %ecx times */ - - movl $RELOC(_end), %ebx /* now %ebx holds what's expected */ - - /* - * copy memmap entries at the end of start_info - * sizeof hvm_memmap_table_entry * entries - */ - imul $MMAP_ENT_SZ, (MMAP_ENTRIES)(%ebx), %ecx - - pushl %ecx /* save count as offset */ - - movl %ebx, %edx - addl $MMAP_PADDR, %edx /* address of mmap entries */ - movl (%edx), %esi - movl %ebx, %edi /* to the end of start_info */ - addl $MMAP_SI_SZ, %edi /* ebx + sizeof(start_info) */ - - rep movsb /* esi (mmap entries addr) to end of start_info */ - - /* point hvm_memmap_table_entry to the end of ebx */ - movl %ebx, %edx - addl $MMAP_PADDR, %edx - movl %ebx, %eax - addl $MMAP_SI_SZ, %eax /* aligned end of hvm_start_info */ - movl %eax, (%edx) /* update memmap_paddr */ - - /* copy command line */ - movl %ebx, %edx - addl $CMDLINE_PADDR, %edx /* cmdline address */ - movl (%edx), %esi - movl %ebx, %edi - addl $MMAP_SI_SZ, %edi /* end of hvm_start_info */ - popl %ecx /* previously saved end of data offset */ - addl %ecx, %edi /* add size of memmap table */ - pushl %edi /* save new cmdline position */ - - repnz movsb - - popl (%edx) /* point to new location */ - - ret -END(copy_start_info) -#endif /* GENPVH */ - -ENTRY(start_xen32) +ENTRY(start_genpvh) /* Xen doesn't start us with a valid gdt */ movl $RELOC(gdtdesc32), %eax lgdt (%eax) @@ -1104,35 +1050,92 @@ ENTRY(start_xen32) movl $RELOC(tmpstk),%esp /* clear BSS */ - xorl %eax,%eax + xorl %eax,%eax movl $RELOC(__bss_start),%edi movl $RELOC(_end),%ecx subl %edi,%ecx rep stosb -#ifdef GENPVH /* - * Xen PVH expects start_info to be located at esym, after __kernel_end. - * Neither qemu nor firecracker do this, as the ABI doesn't expect this - * behavior https://xenbits.xen.org/docs/unstable/misc/pvh.html - * In order not to revamp all kernel memory layout, simply copy - * start_info where it is expected. + * Here, we have 2 cases : + * + * 1) We have been started by Xen + * 2) We have been started by another VMM (Qemu, Firecracker, ...) + * + * The main difference is that, when we are started by Xen, + * %ebx (addr of the hvm_start_info structure) is pointing to a location + * that will be mapped correctly later. + * + * In the second case, we have to copy this structure (and all the + * information + * contained in it) to a location that will be mapped later : + * __kernel_end + * + * To distinguish between the 2 cases, we'll use the 'cpuid' instruction */ - call copy_start_info -#endif + + push %ebx + xorl %eax, %eax + cpuid + cmp $0x566e6558, %ebx /* "VneX" */ + je .start_xen32 + + /* We have been started by a VMM that is *not* Xen */ + + /* First, copy the hvm_start_info structure to __kernel_end */ + pop %ebx + movl %ebx, %esi + movl $RELOC(__kernel_end), %edi + movl $HVM_START_INFO_SIZE, %ecx + shrl $2, %ecx + rep movsl + + /* Copy cmdline_paddr after hvm_start_info */ + movl CMDLINE_PADDR(%ebx), %esi + movl $RELOC(__kernel_end), %ecx + movl %edi, CMDLINE_PADDR(%ecx) /* Set new cmdline_paddr in hvm_start_info */ + .cmdline_copy: + movb (%esi), %al + movsb + cmp $0, %al + jne .cmdline_copy + + /* Copy memmap_paddr after cmdline */ + movl MMAP_PADDR(%ebx), %esi + movl $RELOC(__kernel_end), %ecx + movl %edi, MMAP_PADDR(%ecx) /* Set new memmap_paddr in hvm_start_info */ + movl MMAP_ENTRIES(%ebx), %eax /* Get memmap_entries */ + movl $MMAP_ENTRY_SIZE, %ebx + mull %ebx /* eax * ebx => edx:eax */ + movl %eax, %ecx + shll $2, %ecx + rep movsl + + movl $RELOC(__kernel_end), %ebx + + /* announce ourself */ + movl $VM_GUEST_GENPVH, RELOC(vm_guest) + + jmp .save_hvm_start_paddr + +.start_xen32: + pop %ebx + movl $VM_GUEST_XENPVH, RELOC(vm_guest) + +.save_hvm_start_paddr: + /* * save addr of the hvm_start_info structure. This is also the end * of the symbol table */ - movl %ebx, RELOC(hvm_start_paddr) /* copy start_info addr to hvm_start_paddr */ - /* below is the build of esym based on the false asumption ebx == end of esym */ + movl %ebx, RELOC(hvm_start_paddr) movl %ebx, %eax - addl $KERNBASE_LO,%eax /* 0x80000000 + ebx */ + addl $KERNBASE_LO,%eax /* for further mapping */ - movl $RELOC(esym),%ebp /* address of esym in ebp */ - movl %eax,(%ebp) /* esym points to %ebx assuming it's the end of esym but it's not */ - movl $KERNBASE_HI,4(%ebp) /* ffffffff80000000 + ebx, make it 64 bits */ + movl $RELOC(esym),%ebp + movl %eax,(%ebp) + movl $KERNBASE_HI,4(%ebp) #ifndef GENPVH /* get a page for HYPERVISOR_shared_info */ @@ -1152,14 +1155,8 @@ ENTRY(start_xen32) movl %ebx,(%ebp) movl $KERNBASE_HI,4(%ebp) - /* announce ourself */ -#ifdef GENPVH - movl $VM_GUEST_GENPVH, RELOC(vm_guest) -#else - movl $VM_GUEST_XENPVH, RELOC(vm_guest) -#endif jmp .Lbiosbasemem_finished -END(start_xen32) +END(start_genpvh) .code64 # endif /* !XENPV */ /* space for the hypercall call page */ From 58617e2b0719187c3035fd3ba2178d43380a355b Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 06:39:10 +0100 Subject: [PATCH 011/114] fix: return if not XENPVH --- sys/arch/xen/xen/hypervisor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index 6c8b66f87d012..edcac7295b1c9 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -261,7 +261,7 @@ init_xen_early(void) } xen_start_info.flags = hvm_start_info->flags; - if (vm_guest == VM_GUEST_GENPVH) + if (vm_guest != VM_GUEST_XENPVH) return; #ifndef GENPVH From 8881beb8df48357682ae1e4bc91215b8a6be2bc0 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 06:39:27 +0100 Subject: [PATCH 012/114] fix: added Generic PVH --- sys/arch/x86/x86/identcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index 65ce7afe7c589..9856cf9fb15e1 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -1044,7 +1044,7 @@ static const struct vm_name_guest vm_bios_vendors[] = { { "BHYVE", VM_GUEST_VM }, /* bhyve */ { "Seabios", VM_GUEST_VM }, /* KVM */ { "innotek GmbH", VM_GUEST_VIRTUALBOX }, /* Oracle VirtualBox */ - { "Generic PVH", VM_GUEST_GENPVH}, + { "Generic PVH", VM_GUEST_GENPVH}, /* Generic PVH */ }; static const struct vm_name_guest vm_system_products[] = { From 7f2f59cf914d9a6c15ab2a28ccac23710d2dfcbc Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 07:03:25 +0100 Subject: [PATCH 013/114] fix: ditched more Xen-only dependencies --- sys/arch/xen/conf/files.xen | 6 +++--- sys/arch/xen/include/hypervisor.h | 2 +- sys/arch/xen/x86/pvh_consinit.c | 2 -- sys/arch/xen/x86/xen_mainbus.c | 2 -- sys/arch/xen/xen/shutdown_xenbus.c | 2 -- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen index 0343aa5167a0a..204a6b7451740 100644 --- a/sys/arch/xen/conf/files.xen +++ b/sys/arch/xen/conf/files.xen @@ -8,12 +8,12 @@ file arch/xen/xen/xen_machdep.c xen file arch/xen/xen/evtchn.c xen file arch/xen/xen/xengnt.c xen file arch/xen/xen/xenmem.c xen & !xenpv -file arch/xen/x86/xen_mainbus.c xen +file arch/xen/x86/xen_mainbus.c xen & !genpvh file arch/xen/xen/xen_clock.c xen file arch/xen/x86/xen_bus_dma.c xen file arch/xen/xen/genfb_xen.c xen & genfb -file arch/xen/x86/pvh_consinit.c xenpvhvm +file arch/xen/x86/pvh_consinit.c xenpvhvm & !genpvh define hypervisorbus {} define xendevbus {} @@ -21,7 +21,7 @@ define xendevbus {} # Xen hypervisor device hypervisor { [apid = -1]}: isabus, pcibus, sysmon_power, xendevbus, acpibus attach hypervisor at hypervisorbus -file arch/xen/xen/hypervisor.c xen | hypervisor needs-flag +file arch/xen/xen/hypervisor.c genpvh | hypervisor needs-flag file arch/xen/xen/shutdown_xenbus.c hypervisor # Xenbus diff --git a/sys/arch/xen/include/hypervisor.h b/sys/arch/xen/include/hypervisor.h index 012641c58d65d..34ed324a7388a 100644 --- a/sys/arch/xen/include/hypervisor.h +++ b/sys/arch/xen/include/hypervisor.h @@ -66,7 +66,6 @@ struct cpu_info; int xen_hvm_init(void); int xen_hvm_init_cpu(struct cpu_info *); void xen_mainbus_attach(device_t, device_t, void *); -#endif struct hypervisor_attach_args { const char *haa_busname; @@ -79,6 +78,7 @@ struct xencons_attach_args { struct xen_npx_attach_args { const char *xa_device; }; +#endif #define u8 uint8_t diff --git a/sys/arch/xen/x86/pvh_consinit.c b/sys/arch/xen/x86/pvh_consinit.c index 95463179f8a85..17a5ca2bf07dd 100644 --- a/sys/arch/xen/x86/pvh_consinit.c +++ b/sys/arch/xen/x86/pvh_consinit.c @@ -43,7 +43,6 @@ __KERNEL_RCSID(0, "$NetBSD: pvh_consinit.c,v 1.6 2023/10/17 13:27:58 bouyer Exp #include "xen_def_cons.h" -#ifndef GENPVH int xen_pvh_consinit(void) { @@ -112,4 +111,3 @@ xen_pvh_consinit(void) #endif return 1; } -#endif /* GENPVH */ diff --git a/sys/arch/xen/x86/xen_mainbus.c b/sys/arch/xen/x86/xen_mainbus.c index bf98b8df9c0af..ae38dd8383b44 100644 --- a/sys/arch/xen/x86/xen_mainbus.c +++ b/sys/arch/xen/x86/xen_mainbus.c @@ -35,7 +35,6 @@ #include __KERNEL_RCSID(0, "$NetBSD: xen_mainbus.c,v 1.10 2021/08/07 16:19:08 thorpej Exp $"); -#ifndef GENPVH #include #include #include @@ -159,4 +158,3 @@ xen_mainbus_print(void *aux, const char *pnp) aprint_normal("%s at %s", mba->mba_busname, pnp); return UNCONF; } -#endif diff --git a/sys/arch/xen/xen/shutdown_xenbus.c b/sys/arch/xen/xen/shutdown_xenbus.c index 23889c11bf061..0e98f6fc02e6c 100644 --- a/sys/arch/xen/xen/shutdown_xenbus.c +++ b/sys/arch/xen/xen/shutdown_xenbus.c @@ -58,7 +58,6 @@ #include __KERNEL_RCSID(0, "$NetBSD: shutdown_xenbus.c,v 1.9 2020/05/13 22:13:49 jdolecek Exp $"); -#ifndef GENPVH #include #include @@ -156,4 +155,3 @@ shutdown_xenbus_setup(void) aprint_error("%s: unable to watch control/shutdown\n", __func__); } } -#endif /* GENPVH */ From dcdf5765b6e9b1436cf921dcd9b65916455ed643 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 07:08:27 +0100 Subject: [PATCH 014/114] docs: shorter comment lines --- sys/arch/amd64/amd64/locore.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 4e988e0243f97..4928cc34483c5 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1064,13 +1064,12 @@ ENTRY(start_genpvh) * 2) We have been started by another VMM (Qemu, Firecracker, ...) * * The main difference is that, when we are started by Xen, - * %ebx (addr of the hvm_start_info structure) is pointing to a location - * that will be mapped correctly later. + * %ebx (addr of the hvm_start_info structure) is pointing to a + * location that will be mapped correctly later. * - * In the second case, we have to copy this structure (and all the - * information - * contained in it) to a location that will be mapped later : - * __kernel_end + * In the second case, we have to copy this structure (and all + * the information contained in it) to a location that will be + * mapped later : __kernel_end * * To distinguish between the 2 cases, we'll use the 'cpuid' instruction */ From db94b7961d7e1eb6b7b4cfd74a9a27dd8790c7a9 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 07:10:34 +0100 Subject: [PATCH 015/114] fix: restore newline --- sys/arch/xen/x86/xen_mainbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/arch/xen/x86/xen_mainbus.c b/sys/arch/xen/x86/xen_mainbus.c index ae38dd8383b44..97c030f21f9dc 100644 --- a/sys/arch/xen/x86/xen_mainbus.c +++ b/sys/arch/xen/x86/xen_mainbus.c @@ -149,6 +149,7 @@ xen_mainbus_attach(device_t parent, device_t self, void *aux) "couldn't establish power handler\n"); } } + static int xen_mainbus_print(void *aux, const char *pnp) { From 35d51b9029aba6299f259b8a2617f0b566110256 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 07:11:56 +0100 Subject: [PATCH 016/114] fix: restore newline --- sys/arch/xen/xen/shutdown_xenbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/arch/xen/xen/shutdown_xenbus.c b/sys/arch/xen/xen/shutdown_xenbus.c index 0e98f6fc02e6c..264a72a0d9a94 100644 --- a/sys/arch/xen/xen/shutdown_xenbus.c +++ b/sys/arch/xen/xen/shutdown_xenbus.c @@ -85,6 +85,7 @@ static void xenbus_shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { + struct xenbus_transaction *xbt; int error; char reqstr[32]; From 70da5e41b8a5176f763bcf037d0121a8ba77573c Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 11:45:23 +0100 Subject: [PATCH 017/114] fix: reverted GENPVH option, handle with conditions --- sys/arch/amd64/amd64/locore.S | 4 ---- sys/arch/amd64/conf/MICROVM | 11 +++++------ sys/arch/x86/x86/consinit.c | 2 +- sys/arch/x86/x86/cpu.c | 6 +++--- sys/arch/x86/x86/mainbus.c | 4 ++-- sys/arch/x86/x86/pmap.c | 2 +- sys/arch/xen/conf/files.xen | 6 +++--- sys/arch/xen/include/hypervisor.h | 2 -- sys/arch/xen/xen/hypervisor.c | 31 ++++++++++--------------------- 9 files changed, 25 insertions(+), 43 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 4928cc34483c5..ec427ce3205ce 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1131,12 +1131,9 @@ ENTRY(start_genpvh) movl %ebx, RELOC(hvm_start_paddr) movl %ebx, %eax addl $KERNBASE_LO,%eax - /* for further mapping */ movl $RELOC(esym),%ebp movl %eax,(%ebp) movl $KERNBASE_HI,4(%ebp) - -#ifndef GENPVH /* get a page for HYPERVISOR_shared_info */ addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx @@ -1144,7 +1141,6 @@ ENTRY(start_genpvh) movl $RELOC(HYPERVISOR_shared_info_pa),%ebp movl %ebx,(%ebp) movl $0,4(%ebp) -#endif /* XXX assume hvm_start_info+dependant structure fits in a single page */ addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx diff --git a/sys/arch/amd64/conf/MICROVM b/sys/arch/amd64/conf/MICROVM index 2fc54355b3d2f..80cd343c206a2 100644 --- a/sys/arch/amd64/conf/MICROVM +++ b/sys/arch/amd64/conf/MICROVM @@ -71,10 +71,9 @@ options PIPE_SOCKETPAIR # smaller, but slower pipe(2) options XENPVHVM options XEN # Generic PVH support (qemu, firecracker...) -options GENPVH -#hypervisor* at mainbus? # Xen hypervisor -#xenbus* at hypervisor? # Xen virtual bus -#xencons* at hypervisor? # Xen virtual console +hypervisor* at mainbus? # Xen hypervisor +xenbus* at hypervisor? # Xen virtual bus +xencons* at hypervisor? # Xen virtual console #xennet* at xenbus? # Xen virtual network interface #xbd* at xenbus? # Xen virtual block device # experimental: PVH dom0 support @@ -666,7 +665,7 @@ com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports #njs* at cardbus? function ? # Workbit NinjaSCSI-32 # ## SCSI bus support -#scsibus* at scsi? +scsibus* at scsi? # ## SCSI devices #sd* at scsibus? target ? lun ? # SCSI disk drives @@ -1109,7 +1108,7 @@ virtio* at pci? dev ? function ? # Virtio PCI device ld* at virtio? # Virtio disk device vioif* at virtio? # Virtio network device #viornd* at virtio? # Virtio entropy device -#vioscsi* at virtio? # Virtio SCSI device +vioscsi* at virtio? # Virtio SCSI device ##vio9p* at virtio? # Virtio 9P device # ## Hyper-V devices diff --git a/sys/arch/x86/x86/consinit.c b/sys/arch/x86/x86/consinit.c index 147c1788ffe8d..5f1a39841c86d 100644 --- a/sys/arch/x86/x86/consinit.c +++ b/sys/arch/x86/x86/consinit.c @@ -171,7 +171,7 @@ consinit(void) #if (NCOM > 0) int rv; #endif -#if defined(XENPVHVM) && !defined(GENPVH) +#if defined(XENPVHVM) if (vm_guest == VM_GUEST_XENPVH) { if (xen_pvh_consinit() != 0) return; diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 721a2c1fe1c6c..e4631bb011f36 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -483,7 +483,7 @@ cpu_attach(device_t parent, device_t self, void *aux) cpu_identify(ci); x86_errata(); x86_cpu_idle_init(); -#if defined(XENPVHVM) && !defined(GENPVH) +#if defined(XENPVHVM) xen_hvm_init_cpu(ci); #endif break; @@ -493,7 +493,7 @@ cpu_attach(device_t parent, device_t self, void *aux) cpu_identify(ci); x86_errata(); x86_cpu_idle_init(); -#if defined(XENPVHVM) && !defined(GENPVH) +#if defined(XENPVHVM) xen_hvm_init_cpu(ci); #endif break; @@ -1034,7 +1034,7 @@ cpu_hatch(void *v) * above. */ cpu_init(ci); -#if defined(XENPVHVM) && !defined(GENPVH) +#if defined(XENPVHVM) xen_hvm_init_cpu(ci); #endif (*x86_initclock_func)(); diff --git a/sys/arch/x86/x86/mainbus.c b/sys/arch/x86/x86/mainbus.c index ac2c56758af9e..a74fbbe31b931 100644 --- a/sys/arch/x86/x86/mainbus.c +++ b/sys/arch/x86/x86/mainbus.c @@ -218,7 +218,7 @@ mainbus_attach(device_t parent, device_t self, void *aux) aprint_naive("\n"); aprint_normal("\n"); -#if defined(XENPVHVM) && !defined(GENPVH) +#if defined(XENPVHVM) xen_hvm_init(); /* before attaching CPUs */ #endif @@ -230,7 +230,7 @@ mainbus_attach(device_t parent, device_t self, void *aux) #if defined(XENPV) } #endif /* XENPV */ -#if defined(XEN) && !defined(GENPVH) +#if defined(XEN) /* * before isa/pci probe, so that PV devices are not probed again * as emulated diff --git a/sys/arch/x86/x86/pmap.c b/sys/arch/x86/x86/pmap.c index 1b911c79e331d..9caddac9e79c6 100644 --- a/sys/arch/x86/x86/pmap.c +++ b/sys/arch/x86/x86/pmap.c @@ -1383,7 +1383,7 @@ pmap_bootstrap(vaddr_t kva_start) pentium_idt_vaddr = pmap_bootstrap_valloc(1); #endif -#if defined(XENPVHVM) && !defined(GENPVH) +#if defined(XENPVHVM) /* XXX: move to hypervisor.c with appropriate API adjustments */ extern paddr_t HYPERVISOR_shared_info_pa; extern volatile struct xencons_interface *xencons_interface; /* XXX */ diff --git a/sys/arch/xen/conf/files.xen b/sys/arch/xen/conf/files.xen index 204a6b7451740..3c30954627b54 100644 --- a/sys/arch/xen/conf/files.xen +++ b/sys/arch/xen/conf/files.xen @@ -8,12 +8,12 @@ file arch/xen/xen/xen_machdep.c xen file arch/xen/xen/evtchn.c xen file arch/xen/xen/xengnt.c xen file arch/xen/xen/xenmem.c xen & !xenpv -file arch/xen/x86/xen_mainbus.c xen & !genpvh +file arch/xen/x86/xen_mainbus.c xen file arch/xen/xen/xen_clock.c xen file arch/xen/x86/xen_bus_dma.c xen file arch/xen/xen/genfb_xen.c xen & genfb -file arch/xen/x86/pvh_consinit.c xenpvhvm & !genpvh +file arch/xen/x86/pvh_consinit.c xenpvhvm define hypervisorbus {} define xendevbus {} @@ -21,7 +21,7 @@ define xendevbus {} # Xen hypervisor device hypervisor { [apid = -1]}: isabus, pcibus, sysmon_power, xendevbus, acpibus attach hypervisor at hypervisorbus -file arch/xen/xen/hypervisor.c genpvh | hypervisor needs-flag +file arch/xen/xen/hypervisor.c hypervisor needs-flag file arch/xen/xen/shutdown_xenbus.c hypervisor # Xenbus diff --git a/sys/arch/xen/include/hypervisor.h b/sys/arch/xen/include/hypervisor.h index 34ed324a7388a..fdc66e18e4bb3 100644 --- a/sys/arch/xen/include/hypervisor.h +++ b/sys/arch/xen/include/hypervisor.h @@ -62,7 +62,6 @@ struct cpu_info; -#ifndef GENPVH int xen_hvm_init(void); int xen_hvm_init_cpu(struct cpu_info *); void xen_mainbus_attach(device_t, device_t, void *); @@ -78,7 +77,6 @@ struct xencons_attach_args { struct xen_npx_attach_args { const char *xa_device; }; -#endif #define u8 uint8_t diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index edcac7295b1c9..281dcc825c248 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -121,7 +121,6 @@ __KERNEL_RCSID(0, "$NetBSD: hypervisor.c,v 1.96 2022/06/23 14:32:16 bouyer Exp $ #include #endif -#ifndef GENPVH int hypervisor_match(device_t, cfdata_t, void *); void hypervisor_attach(device_t, device_t, void *); @@ -155,7 +154,6 @@ union hypervisor_attach_cookie { #endif /* NPCI */ struct vcpu_attach_args hac_vcaa; }; -#endif /* * This is set when the ISA bus is attached. If it's not set by the @@ -190,12 +188,12 @@ volatile shared_info_t *HYPERVISOR_shared_info __read_mostly; paddr_t HYPERVISOR_shared_info_pa; union start_info_union start_info_union __aligned(PAGE_SIZE); struct hvm_start_info *hvm_start_info; + +static int xen_hvm_vec = 0; #endif int xen_version; -#ifndef GENPVH -static int xen_hvm_vec = 0; /* power management, for save/restore */ static bool hypervisor_suspend(device_t, const pmf_qual_t *); static bool hypervisor_resume(device_t, const pmf_qual_t *); @@ -208,14 +206,12 @@ enum { XMI_UNPLUG_NICS = 0x02, XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 }; -#endif #ifdef XENPVHVM bool xenhvm_use_percpu_callback = 0; -#ifndef GENPVH static void xen_init_hypercall_page(void) { @@ -237,7 +233,6 @@ xen_init_hypercall_page(void) /* XXX: vtophys(&hypercall_page) */ wrmsr(descs[1], (uintptr_t)&hypercall_page - KERNBASE); } -#endif uint32_t hvm_start_paddr; @@ -264,7 +259,6 @@ init_xen_early(void) if (vm_guest != VM_GUEST_XENPVH) return; -#ifndef GENPVH xen_init_hypercall_page(); HYPERVISOR_shared_info = (void *)((uintptr_t)HYPERVISOR_shared_info_pa + KERNBASE); @@ -283,10 +277,8 @@ init_xen_early(void) } delay_func = x86_delay = xen_delay; x86_initclock_func = xen_initclocks; -#endif } -#ifndef GENPVH static bool xen_check_hypervisordev(void) { @@ -539,10 +531,8 @@ xen_hvm_init_cpu(struct cpu_info *ci) again = 1; return 1; } -#endif /* GENPVH */ #endif /* XENPVHVM */ -#ifndef GENPVH /* we don't need Xen hypervisor in generic PVH mode */ /* * Probe for the hypervisor; always succeeds. */ @@ -564,6 +554,14 @@ hypervisor_match(device_t parent, cfdata_t match, void *aux) return 1; } +#if defined(MULTIPROCESSOR) && defined(XENPV) +static int +hypervisor_vcpu_print(void *aux, const char *parent) +{ + /* Unconfigured cpus are ignored quietly. */ + return (QUIET); +} +#endif /* MULTIPROCESSOR && XENPV */ /* * Attach the hypervisor. */ @@ -776,14 +774,6 @@ hypervisor_attach(device_t parent, device_t self, void *aux) if (!pmf_device_register(self, hypervisor_suspend, hypervisor_resume)) aprint_error_dev(self, "couldn't establish power handler\n"); } -#if defined(MULTIPROCESSOR) && defined(XENPV) -static int -hypervisor_vcpu_print(void *aux, const char *parent) -{ - /* Unconfigured cpus are ignored quietly. */ - return (QUIET); -} -#endif /* MULTIPROCESSOR && XENPV */ static bool hypervisor_suspend(device_t dev, const pmf_qual_t *qual) @@ -879,4 +869,3 @@ xen_map_vcpu(struct cpu_info *ci) ci->ci_vcpuid, ret); } } -#endif /* GENPVH */ From a6953378841bb258c3d6e2ee479ebfccdbc976ab Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 12:01:56 +0100 Subject: [PATCH 018/114] fix: restored original ifdefs --- sys/arch/x86/x86/consinit.c | 3 ++- sys/arch/x86/x86/cpu.c | 6 +++--- sys/arch/xen/xen/hypervisor.c | 8 +++++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sys/arch/x86/x86/consinit.c b/sys/arch/x86/x86/consinit.c index 5f1a39841c86d..7a65f1bfa096a 100644 --- a/sys/arch/x86/x86/consinit.c +++ b/sys/arch/x86/x86/consinit.c @@ -171,7 +171,8 @@ consinit(void) #if (NCOM > 0) int rv; #endif -#if defined(XENPVHVM) + +#ifdef XENPVHVM if (vm_guest == VM_GUEST_XENPVH) { if (xen_pvh_consinit() != 0) return; diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index e4631bb011f36..3d2feee61ec5c 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -483,7 +483,7 @@ cpu_attach(device_t parent, device_t self, void *aux) cpu_identify(ci); x86_errata(); x86_cpu_idle_init(); -#if defined(XENPVHVM) +#ifdef XENPVHVM xen_hvm_init_cpu(ci); #endif break; @@ -493,7 +493,7 @@ cpu_attach(device_t parent, device_t self, void *aux) cpu_identify(ci); x86_errata(); x86_cpu_idle_init(); -#if defined(XENPVHVM) +#ifdef XENPVHVM xen_hvm_init_cpu(ci); #endif break; @@ -1034,7 +1034,7 @@ cpu_hatch(void *v) * above. */ cpu_init(ci); -#if defined(XENPVHVM) +#ifdef XENPVHVM xen_hvm_init_cpu(ci); #endif (*x86_initclock_func)(); diff --git a/sys/arch/xen/xen/hypervisor.c b/sys/arch/xen/xen/hypervisor.c index 281dcc825c248..7e26b7696a61e 100644 --- a/sys/arch/xen/xen/hypervisor.c +++ b/sys/arch/xen/xen/hypervisor.c @@ -279,6 +279,7 @@ init_xen_early(void) x86_initclock_func = xen_initclocks; } + static bool xen_check_hypervisordev(void) { @@ -417,7 +418,7 @@ xen_hvm_init(void) struct xen_hvm_param xen_hvm_param; xen_hvm_param.domid = DOMID_SELF; xen_hvm_param.index = HVM_PARAM_CONSOLE_PFN; - + if ( HYPERVISOR_hvm_op(HVMOP_get_param, &xen_hvm_param) < 0) { aprint_debug( "Xen HVM: Unable to obtain xencons page address\n"); @@ -454,6 +455,7 @@ xen_hvm_init(void) delay_func = x86_delay = xen_delay; x86_initclock_func = xen_initclocks; } + vm_guest = VM_GUEST_XENPVHVM; /* Be more specific */ return 1; } @@ -531,6 +533,7 @@ xen_hvm_init_cpu(struct cpu_info *ci) again = 1; return 1; } + #endif /* XENPVHVM */ /* @@ -562,12 +565,14 @@ hypervisor_vcpu_print(void *aux, const char *parent) return (QUIET); } #endif /* MULTIPROCESSOR && XENPV */ + /* * Attach the hypervisor. */ void hypervisor_attach(device_t parent, device_t self, void *aux) { + #if NPCI >0 #ifdef PCI_BUS_FIXUP int pci_maxbus = 0; @@ -773,6 +778,7 @@ hypervisor_attach(device_t parent, device_t self, void *aux) if (!pmf_device_register(self, hypervisor_suspend, hypervisor_resume)) aprint_error_dev(self, "couldn't establish power handler\n"); + } static bool From 235e7fd198fd226e8555abee77a2d97b639ab3cd Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 11 Dec 2023 15:25:08 +0100 Subject: [PATCH 019/114] fix: removed config file, GENPVH will land in GENERIC --- sys/arch/amd64/conf/MICROVM | 1225 ----------------------------------- 1 file changed, 1225 deletions(-) delete mode 100644 sys/arch/amd64/conf/MICROVM diff --git a/sys/arch/amd64/conf/MICROVM b/sys/arch/amd64/conf/MICROVM deleted file mode 100644 index 80cd343c206a2..0000000000000 --- a/sys/arch/amd64/conf/MICROVM +++ /dev/null @@ -1,1225 +0,0 @@ -# MICROVM - -machine amd64 x86 xen -include "conf/std" # MI standard options -include "arch/xen/conf/std.xenversion" - -options CPU_IN_CKSUM -options EXEC_ELF64 # exec ELF binaries -options EXEC_SCRIPT # exec #! scripts -options MTRR -options MULTIPROCESSOR - -options CHILD_MAX=1024 # 160 is too few -options OPEN_MAX=1024 # 128 is too few - -#options SELFRELOC - -options KGDB # remote debugger -options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600 -makeoptions DEBUG="-g" # compile full symbol table - -options CONSDEVNAME="\"com\"" -#options CONS_OVERRIDE - -mainbus0 at root -cpu* at mainbus? -ioapic* at mainbus? apid ? - -options INCLUDE_CONFIG_FILE # embed config file in kernel binary -maxusers 8 # estimated number of users - -options INSECURE # disable kernel security levels - X needs this - -options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT -#options NTP # NTP phase/frequency locked loop - -#options KTRACE # system call tracing via ktrace(1) - -#options CPU_UCODE # cpu ucode loading support - -# Note: SysV IPC parameters could be changed dynamically, see sysctl(8). -#options SYSVMSG # System V-like message queues -#options SYSVSEM # System V-like semaphores -#options SYSVSHM # System V-like memory sharing - -#options MODULAR # new style module(7) framework -#options MODULAR_DEFAULT_AUTOLOAD -#options USERCONF # userconf(4) support -options PIPE_SOCKETPAIR # smaller, but slower pipe(2) -#options SYSCTL_INCLUDE_DESCR # Include sysctl descriptions in kernel - -# CPU-related options -#options USER_LDT # User-settable LDT, used by Wine -#options SVS # Separate Virtual Space -#options PCPU_IDT # Per CPU IDTs - -# GCC Spectre variant 2 mitigation -#makeoptions SPECTRE_V2_GCC_MITIGATION=1 -#options SPECTRE_V2_GCC_MITIGATION - -# CPU features -#acpicpu* at cpu? # ACPI CPU (including frequency scaling) -#coretemp* at cpu? # Intel on-die thermal sensor -#est0 at cpu0 # Intel Enhanced SpeedStep (non-ACPI) -#hyperv0 at cpu0 # Microsoft Hyper-V -#odcm0 at cpu0 # On-demand clock modulation -#powernow0 at cpu0 # AMD PowerNow! and Cool'n'Quiet (non-ACPI) -#vmt0 at cpu0 # VMware Tools - -#Xen PV support for PVH and HVM guests -options XENPVHVM -options XEN -# Generic PVH support (qemu, firecracker...) -hypervisor* at mainbus? # Xen hypervisor -xenbus* at hypervisor? # Xen virtual bus -xencons* at hypervisor? # Xen virtual console -#xennet* at xenbus? # Xen virtual network interface -#xbd* at xenbus? # Xen virtual block device -# experimental: PVH dom0 support -#options DOM0OPS -#pseudo-device xenevt -#pseudo-device xvif -#pseudo-device xbdback - - -# Alternate buffer queue strategies for better responsiveness under high -# disk I/O load. -#options BUFQ_READPRIO -#options BUFQ_PRIOCSCAN - -# Diagnostic/debugging support options -#options DIAGNOSTIC # inexpensive kernel consistency checks - # XXX to be commented out on release branch -#options DEBUG # expensive debugging checks/support -#options LOCKDEBUG # expensive locking checks/support - -# -# Because gcc omits the frame pointer for any -O level, the line below -# is needed to make backtraces in DDB work. -# -makeoptions COPTS="-O2 -fno-omit-frame-pointer" -#options DDB # in-kernel debugger -#options DDB_COMMANDONENTER="bt" # execute command when ddb is entered -#options DDB_ONPANIC=1 # see also sysctl(7): `ddb.onpanic' -#options DDB_HISTORY_SIZE=512 # enable history editing in DDB -#options KGDB # remote debugger -#options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600 -#makeoptions DEBUG="-g" # compile full symbol table for CTF -#options DDB_COMMANDONENTER="trace;show registers" -#options SYSCALL_STATS # per syscall counts -#options SYSCALL_TIMES # per syscall times -#options SYSCALL_TIMES_HASCOUNTER # use 'broken' rdtsc (soekris) -#options KDTRACE_HOOKS # kernel DTrace hooks - -# Kernel Undefined Behavior Sanitizer (kUBSan). -#options KUBSAN # mandatory -#options UBSAN_ALWAYS_FATAL # optional: panic on all kUBSan reports - -# Kernel Address Sanitizer (kASan). You need to disable SVS to use it. -# The quarantine is optional and can help KASAN find more use-after-frees. -# Use KASAN_PANIC if you want panics instead of warnings. -#makeoptions KASAN=1 # mandatory -#options KASAN # mandatory -#no options SVS # mandatory -#options POOL_QUARANTINE # optional -#options KASAN_PANIC # optional - -# Kernel Concurrency Sanitizer (kCSan). -#makeoptions KCSAN=1 # mandatory -#options KCSAN # mandatory -#options KCSAN_PANIC # optional - -# Kernel Memory Sanitizer (kMSan). You need to disable SVS and kernel modules -# to use it. POOL_NOCACHE is optional and can help KMSAN find uninitialized -# memory in pool caches. Note that KMSAN requires at least 4GB of RAM. -#makeoptions KMSAN=1 # mandatory -#options KMSAN # mandatory -#no options SVS # mandatory -#no options MODULAR # mandatory -#no options MODULAR_DEFAULT_AUTOLOAD # mandatory -#options POOL_NOCACHE # optional -#options KMSAN_PANIC # optional - -# Kernel Code Coverage Driver. -#makeoptions KCOV=1 -#options KCOV - -# Fault Injection Driver. -#options FAULT - -# Heartbeat checks -#options HEARTBEAT -#options HEARTBEAT_MAX_PERIOD_DEFAULT=15 - -# Compatibility options -# x86_64 never shipped with a.out binaries; the two options below are -# only relevant to 32-bit i386 binaries -#options EXEC_AOUT # required by binaries from before 1.5 -#options COMPAT_NOMID # NetBSD 0.8, 386BSD, and BSDI - -# NetBSD backward compatibility. Support goes from COMPAT_15 up until -# the latest release. Note that really old compat (< COMPAT_16) is only -# useful for 32-bit i386 binaries. -#include "conf/compat_netbsd15.config" - -#options COMPAT_386BSD_MBRPART # recognize old partition ID - -#options COMPAT_NETBSD32 -#options EXEC_ELF32 - -# Wedge support -#options DKWEDGE_AUTODISCOVER # Automatically add dk(4) instances -#options DKWEDGE_METHOD_GPT # Supports GPT partitions as wedges -#options DKWEDGE_METHOD_BSDLABEL # Support disklabel entries as wedges -#options DKWEDGE_METHOD_MBR # Support MBR partitions as wedges -#options DKWEDGE_METHOD_APPLE # Support Apple partitions as wedges -#options DKWEDGE_METHOD_RDB # Support RDB partitions as wedges - -# File systems -#include "conf/filesystems.config" -file-system FFS -file-system EXT2FS -file-system KERNFS - -# File system options -# ffs -#options FFS_EI # FFS Endian Independent support -options FFS_NO_SNAPSHOT # No FFS snapshot support -#options QUOTA # legacy UFS quotas -#options QUOTA2 # new, in-filesystem UFS quotas -#options UFS_ACL # UFS Access Control Lists -#options UFS_DIRHASH # UFS Large Directory Hashing -#options UFS_EXTATTR # Extended attribute support for UFS1 -options WAPBL # File system journaling support -# lfs -#options LFS_DIRHASH # LFS version of UFS_DIRHASH -# ext2fs -#options EXT2FS_SYSTEM_FLAGS # makes ext2fs file flags (append and - # immutable) behave as system flags. -# other -#options DISKLABEL_EI # disklabel Endian Independent support -#options NFSSERVER # Network File System server - -# Networking options -#options GATEWAY # packet forwarding -options INET # IP + ICMP + TCP + UDP -options INET6 # IPV6 -#options IPSEC # IP security -#options IPSEC_DEBUG # debug for IP security -#options MPLS # MultiProtocol Label Switching (needs mpls) -#options MROUTING # IP multicast routing -#options PIM # Protocol Independent Multicast -#options NETATALK # AppleTalk networking protocols -#options CAN # Controller Area Network protocol -#options PPP_BSDCOMP # BSD-Compress compression support for PPP -#options PPP_DEFLATE # Deflate compression support for PPP -#options PPP_FILTER # Active filter support for PPP (requires bpf) -#options TCP_DEBUG # Record last TCP_NDEBUG packets with SO_DEBUG -#options TCP_SIGNATURE # Enable RFC-2385 TCP md5 signatures - -#options ALTQ # Manipulate network interfaces' output queues -#options ALTQ_BLUE # Stochastic Fair Blue -#options ALTQ_CBQ # Class-Based Queueing -#options ALTQ_CDNR # Diffserv Traffic Conditioner -#options ALTQ_FIFOQ # First-In First-Out Queue -#options ALTQ_FLOWVALVE # RED/flow-valve (red-penalty-box) -#options ALTQ_HFSC # Hierarchical Fair Service Curve -#options ALTQ_LOCALQ # Local queueing discipline -#options ALTQ_PRIQ # Priority Queueing -#options ALTQ_RED # Random Early Detection -#options ALTQ_RIO # RED with IN/OUT -#options ALTQ_WFQ # Weighted Fair Queueing - -# These options enable verbose messages for several subsystems. -# Warning, these may compile large string tables into the kernel! -#options ACPIVERBOSE # verbose ACPI configuration messages -#options MIIVERBOSE # verbose PHY autoconfig messages -#options PCIVERBOSE # verbose PCI device autoconfig messages -#options PCI_CONFIG_DUMP # verbosely dump PCI config space -#options PCMCIAVERBOSE # verbose PCMCIA configuration messages -#options SCSIVERBOSE # human readable SCSI error messages -#options USBVERBOSE # verbose USB device autoconfig messages -#options HDAUDIOVERBOSE # human readable HDAUDIO device names - -#options NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM - -# -# wscons options -# -# builtin terminal emulations -#options WSEMUL_VT100 # VT100 / VT220 emulation -#options WSEMUL_SUN # sun terminal emulation -#options WSEMUL_DEFAULT="\"vt100\"" # NB: default is "sun" if enabled -# different kernel output - see dev/wscons/wsdisplayvar.h -#options WSDISPLAY_CUSTOM_OUTPUT # color customization from wsconsctl(8) -#options WS_KERNEL_FG=WSCOL_GREEN -#options WS_KERNEL_BG=WSCOL_BLACK -# customization of console border color -#options WSDISPLAY_CUSTOM_BORDER # custom border colors via wsconsctl(8) -# compatibility to other console drivers -#options WSDISPLAY_COMPAT_PCVT # emulate some ioctls -#options WSDISPLAY_COMPAT_SYSCONS # emulate some ioctls -#options WSDISPLAY_COMPAT_USL # wsconscfg VT handling -#options WSDISPLAY_COMPAT_RAWKBD # can get raw scancodes -# don't attach pckbd as the console if no PS/2 keyboard is found -#options PCKBD_CNATTACH_MAY_FAIL -# see dev/pckbport/wskbdmap_mfii.c for implemented layouts -#options PCKBD_LAYOUT="(KB_DE | KB_NODEAD)" -# allocate a number of virtual screens at autoconfiguration time -#options WSDISPLAY_DEFAULTSCREENS=4 -# use a large software cursor that doesn't blink -#options PCDISPLAY_SOFTCURSOR -# modify the screen type of the console; defaults to "80x25" -#options VGA_CONSOLE_SCREENTYPE="\"80x24\"" -# work around a hardware bug that loaded fonts don't work; found on ATI cards -#options VGA_CONSOLE_ATI_BROKEN_FONTSEL -# console scrolling support. -#options WSDISPLAY_SCROLLSUPPORT -# enable VGA raster mode capable of displaying multilingual text on console -#options VGA_RASTERCONSOLE -# enable splash screen support; requires genfb or radeonfb -#options SPLASHSCREEN - -# Kernel root file system and dump configuration. -config netbsd root on ? type ? -#config netbsd root on sd0a type ffs -#config netbsd root on ? type nfs - -# -# Device configuration -# - -# IPMI support -#ipmi0 at mainbus? -#ipmi_acpi* at acpi? -#ipmi0 at ipmi_acpi? - -# ACPI will be used if present. If not it will fall back to MPBIOS -#acpi0 at mainbus0 -#options ACPI_SCANPCI # find PCI roots using ACPI -options MPBIOS # configure CPUs and APICs using MPBIOS -options MPBIOS_SCANPCI # MPBIOS configures PCI roots -#options PCI_INTR_FIXUP # fixup PCI interrupt routing via ACPI -#options PCI_BUS_FIXUP # fixup PCI bus numbering -#options PCI_ADDR_FIXUP # fixup PCI I/O addresses -#options ACPI_ACTIVATE_DEV # If set, activate inactive devices -#options VGA_POST # in-kernel support for VGA POST - -# ACPI devices -#acpiacad* at acpi? # ACPI AC Adapter -#acpibat* at acpi? # ACPI Battery -#acpibut* at acpi? # ACPI Button -#acpidalb* at acpi? # ACPI Direct Application Launch Button -#acpiec* at acpi? # ACPI Embedded Controller (late) -#acpiecdt* at acpi? # ACPI Embedded Controller (early) -#acpifan* at acpi? # ACPI Fan -#acpilid* at acpi? # ACPI Lid Switch -#acpipmtr* at acpi? # ACPI Power Meter (experimental) -#acpismbus* at acpi? # ACPI SMBus CMI (experimental) -#acpitz* at acpi? # ACPI Thermal Zone -#acpivga* at acpi? # ACPI Display Adapter -#acpiout* at acpivga? # ACPI Display Output Device -#acpiwdrt* at acpi? # ACPI Watchdog Resource Table -#acpiwmi* at acpi? # ACPI WMI Mapper - -# Mainboard devices -#aibs* at acpi? # ASUSTeK AI Booster hardware monitor -#asus* at acpi? # ASUS hotkeys -#attimer* at acpi? # AT Timer -#com0 at acpi? # Serial communications interface -#com1 at acpi? # Serial communications interface -#com* at acpi? # Serial communications interface -#fdc* at acpi? # Floppy disk controller -#fd* at fdc? drive ? # the drives themselves -#fujbp* at acpi? # Fujitsu Brightness & Pointer -#fujhk* at acpi? # Fujitsu Hotkeys -##hpacel* at acpi? # HP 3D DriveGuard accelerometer -##hpqlb* at acpi? # HP Quick Launch Buttons -#hpet* at acpihpetbus? # High Precision Event Timer (table) -#hpet* at acpinodebus? # High Precision Event Timer (device) -#joy* at acpi? # Joystick/Game port -#lpt0 at acpi? # Parallel port -#lpt1 at acpi? # Parallel port -#lpt* at acpi? # Parallel port -#mpu* at acpi? # Roland MPU-401 MIDI UART -#pckbc* at acpi? # PC keyboard controller -#pcppi* at acpi? # AT-style speaker sound -#qemufwcfg* at acpi? # QEMU Firmware Configuration device -#sdhc* at acpi? # SD Host Controller -#sony* at acpi? # Sony Notebook Controller -#spic* at acpi? # Sony Programmable I/O Controller -#wsmouse* at spic? # mouse -#thinkpad* at acpi? # IBM/Lenovo Thinkpad hotkeys -#tpm* at acpi? # ACPI TPM (Experimental) -#ug* at acpi? # Abit uGuru Hardware monitor -#valz* at acpi? # Toshiba Dynabook hotkeys -#wb* at acpi? # Winbond W83L518D SD/MMC reader -#sdmmc* at wb? # SD/MMC bus -#wmidell* at acpiwmibus? # Dell WMI mappings -#wmieeepc* at acpiwmibus? # Asus Eee PC WMI mappings -#wmihp* at acpiwmibus? # HP WMI mappings -#wmimsi* at acpiwmibus? # MSI WMI mappings - -# Basic Bus Support - -# PCI bus support -pci* at mainbus? bus ? -#pci* at pchb? bus ? -#pci* at ppb? bus ? - -# PCI bridges -#pchb* at pci? dev ? function ? # PCI-Host bridges -#options AGP_X86 -#pcib* at pci? dev ? function ? # PCI-ISA bridges -#ppb* at pci? dev ? function ? # PCI-PCI bridges -## XXX 'puc's aren't really bridges, but there's no better place for them here -#puc* at pci? dev ? function ? # PCI "universal" comm. cards -# -#amdpcib* at pci? dev ? function ? # AMD 8111 PCI-ISA w/ HPET -#hpet* at amdpcib? -# -#pwdog* at pci? dev ? function ? # QUANCOM PWDOG1 -# -#ichlpcib* at pci? dev ? function ? # Intel ICH PCI-LPC w/ timecounter, -# # watchdog, gpio, Speedstep and HPET -#fwhrng* at ichlpcib? # Intel 82802 FWH Random Number Generator -##hpet* at ichlpcib? -#tco* at tcoichbus? # TCO watch dog timer -# -#aapic* at pci? dev ? function ? # AMD 8131 IO apic -# -#agp* at pchb? - -# ISA bus support -isa0 at mainbus? -#isa0 at pcib? -#isa0 at amdpcib? -#isa0 at ichlpcib? - -# CardBus bridge support -#cbb* at pci? dev ? function ? -#cardslot* at cbb? -# -## CardBus bus support -#cardbus* at cardslot? -#pcmcia* at cardslot? -# -## Console Devices -# -## wscons -#pckbc0 at isa? # pc keyboard controller -#pckbd* at pckbc? # PC keyboard -#pms* at pckbc? # PS/2 mouse for wsmouse -##options PMS_DISABLE_POWERHOOK # Disable PS/2 reset on resume -#options PMS_SYNAPTICS_TOUCHPAD # Enable support for Synaptics Touchpads -#options PMS_ELANTECH_TOUCHPAD # Enable support for Elantech Touchpads -#options PMS_ALPS_TOUCHPAD # Enable support for Alps Touchpads -#vga* at pci? dev ? function ? -#genfb* at pci? dev ? function ? -#options VCONS_DRAW_INTR -#wsdisplay* at vga? console ? -#wsdisplay* at wsemuldisplaydev? -#wskbd* at pckbd? console ? -#wsmouse* at pms? mux 0 -#wsmouse* at wsmousedev? -# -#attimer0 at isa? -#pcppi0 at isa? -#sysbeep0 at pcppi? -# -## DRI legacy drivers -##i915drm* at drm? # Intel i915, i945 DRM driver -##mach64drm* at drm? # mach64 (3D Rage Pro, Rage) DRM driver -##mgadrm* at drm? # Matrox G[24]00, G[45]50 DRM driver -##r128drm* at drm? # ATI Rage 128 DRM driver -##radeondrm* at drm? # ATI Radeon DRM driver -##savagedrm* at drm? # S3 Savage DRM driver -##sisdrm* at drm? # SiS DRM driver -##tdfxdrm* at drm? # 3dfx (voodoo) DRM driver -# -## DRMKMS drivers -#i915drmkms* at pci? dev ? function ? -#intelfb* at intelfbbus? -# -#radeon* at pci? dev ? function ? -#radeondrmkmsfb* at radeonfbbus? -# -##amdgpu* at pci? dev ? function ? -##amdgpufb* at amdgpufbbus? -# -#nouveau* at pci? dev ? function ? -#nouveaufb* at nouveaufbbus? -# -## DRMUMS drivers -- make them loadable, but not statically linked in -#options DRM_LEGACY -##viadrmums* at drm? -# -##options DRM_MAX_RESOLUTION_HORIZONTAL=1920 # Limit DRM size in horizontal dimension -##options DRM_MAX_RESOLUTION_VERTICAL=1080 # Limit DRM size in vertical dimension -# -## Cryptographic Devices -# -## PCI cryptographic devices -#amdccp* at pci? dev ? function ? # AMD Cryptographic Coprocessor -#hifn* at pci? dev ? function ? # Hifn 7755/7811/795x -##qat* at pci? dev ? function ? # Intel QuickAssist -#ubsec* at pci? dev ? function ? # Broadcom 5501/5601/580x/582x -# -## Trusted Platform Module -#tpm* at isa? iomem 0xfed40000 irq 7 -# -## Serial Devices -# -## PCI serial interfaces -#com* at puc? port ? # 16x50s on "universal" comm boards -#cy* at pci? dev ? function ? # Cyclades Cyclom-Y serial boards -#cz* at pci? dev ? function ? # Cyclades-Z multi-port serial boards -# -## PCMCIA serial interfaces -#com* at pcmcia? function ? # Modems and serial cards -# -#pcmcom* at pcmcia? function ? # PCMCIA multi-port serial cards -#com* at pcmcom? slave ? # ...and the slave devices -# -## CardBus serial interfaces -#com* at cardbus? function ? # Modems and serial cards - -# ISA serial interfaces -#options COM_HAYESP # adds Hayes ESP serial board support -com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports -#com1 at isa? port 0x2f8 irq 3 - -# Parallel Printer Interfaces - -# PCI parallel printer interfaces -#lpt* at puc? port ? # || ports on "universal" comm boards -# -## ISA parallel printer interfaces -#lpt0 at isa? port 0x378 irq 7 # standard PC parallel ports -#lpt1 at isa? port 0x278 -# -## Hardware monitors -# -#amdnb_misc* at pci? # AMD NB Misc Configuration -#amdtemp* at amdnb_misc? # AMD CPU Temperature sensors -# -#amdsmn* at pci? # AMD SMN Configuration -#amdzentemp* at amdsmnbus? # AMD Ryzen Family 17h CPU temp sensors -# -## Winbond LPC Super I/O -##wbsio* at isa? port 0x2e -##wbsio* at isa? port 0x4e -# -## IBM Hawk Integrated Systems Management Processor -##ibmhawk0 at iic? addr 0x37 -# -## LM7[89] and compatible hardware monitors -## Use flags to select temp sensor type (see lm(4) man page for details) -##lm0 at isa? port 0x290 flags 0x0 # other common ports: 0x280, 0x310 -##lm* at wbsio? -# -## SMSC LPC47B397 hardware monitor functions -##smsc0 at isa? port 0x02e -# -## SMSC LPC47M192 hardware monitor -##smscmon* at iic? addr 0x2c -##smscmon* at iic? addr 0x2d # (alternate address) -# -## AMD 768 and 8111 power/ACPI controllers -#amdpm* at pci? dev ? function ? # RNG and SMBus 1.0 interface -##iic* at amdpm? # sensors below are on this bus -# -## NVIDIA nForce2/3/4 SMBus controller -#nfsmbc* at pci? dev ? function ? -#nfsmb* at nfsmbc? -#iic* at nfsmb? -# -## Intel PIIX4 power management controllers -#piixpm* at pci? dev ? function ? # PIIX4 compatible PM controller -#iic* at piixpm? # SMBus on PIIX4 -# -## Intel ICH SMBus controller -#ichsmb* at pci? dev ? function ? -#iic* at ichsmb? -# -## Intel S1200,C2000 (non-pch) SMBus controller -#ismt* at pci? dev ? function ? -#iic* at ismt? -# -## DesignWare I2C controller as found in some Intel PCH and AMD FCH devices. -#dwiic* at acpi? # DesignWare I2C controller -#dwiic* at pci? # DesignWare I2C controller -#iic* at dwiic? -# -## Thermal monitor and fan controller -##dbcool* at iic? addr 0x2C # Unknown other motherboard(s) -##dbcool* at iic? addr 0x2D # Tyan S2881 -##dbcool* at iic? addr 0x2E # Tyan S2882-D -# -## IBM Thinkpad Active Protection System -##aps0 at isa? port 0x1600 -# -## Fintek Super I/O with hardware monitor -##finsio0 at isa? port 0x4e -# -## iTE IT87xxF Super I/O with watchdog and sensors support -##itesio0 at isa? port 0x2e -# -## Abit uGuru Hardware system monitor -##ug0 at isa? port 0xe0 -# -## Serial Presence Detect capable memory modules -##spdmem* at iic? addr 0x50 -##spdmem* at iic? addr 0x51 -##spdmem* at iic? addr 0x52 -##spdmem* at iic? addr 0x53 -##spdmem* at iic? addr 0x54 -##spdmem* at iic? addr 0x55 -##spdmem* at iic? addr 0x56 -##spdmem* at iic? addr 0x57 -##sdtemp* at iic? addr 0x18 -##sdtemp* at iic? addr 0x19 -##sdtemp* at iic? addr 0x1a -##sdtemp* at iic? addr 0x1b -##sdtemp* at iic? addr 0x1c -##sdtemp* at iic? addr 0x1d -##sdtemp* at iic? addr 0x1e -##sdtemp* at iic? addr 0x1f -# -## Intel GPIO -#igpio* at acpi? -# -## I2C HID devices -#ihidev* at iic? -# -## I2C Mice -#ims* at ihidev? reportid ? -#wsmouse* at ims? mux 0 -# -## I2O devices -#iop* at pci? dev ? function ? # I/O processor -#iopsp* at iop? tid ? # SCSI/FC-AL ports -#ld* at iop? tid ? # block devices -## XXX dpti.c wants a processor type that is not assigned for x86-64 -##dpti* at iop? tid 0 # DPT/Adaptec control interface -# -## GPIO devices -#gpio* at gpiobus? -# -## 1- Wire support -##gpioow* at gpio? offset ? mask ? # 1-wire bitbanging via gpio -#gpioow* at gpio? -#onewire* at gpioow? -# -## 1-Wire devices -#owtemp* at onewire? # Temperature sensors -# -## I2C support -##gpioiic* at gpio? -##iic* at gpioiic? -# -## Keylock support -##gpiolock* at gpio? -# -## Pulsing GPIO pins in software -##gpiopwm* at gpio? -# -## Soekris 6501 GPIO/LED driver (provides gpiobus, needs gpio) -##soekrisgpio0 at isa? port 0x680 -# -## Nuvoton NCT5104D SuperIO providing GPIO -#nct0 at isa? port ? -# -## SCSI Controllers and Devices -# -## PCI SCSI controllers -#adv* at pci? dev ? function ? # AdvanSys 1200[A,B], 9xx[U,UA] SCSI -#adw* at pci? dev ? function ? # AdvanSys 9x0UW[D], 3940U[2,3]W SCSI -#ahc* at pci? dev ? function ? # Adaptec [23]94x, aic78x0 SCSI -#ahd* at pci? dev ? function ? # Adaptec aic790x SCSI -#bha* at pci? dev ? function ? # BusLogic 9xx SCSI -#dpt* at pci? dev ? function ? # DPT SmartCache/SmartRAID -#iha* at pci? dev ? function ? # Initio INIC-940/950 SCSI -#isp* at pci? dev ? function ? # Qlogic ISP [12]0x0 SCSI/FibreChannel -#mfi* at pci? dev ? function ? # LSI MegaRAID SAS -#mfii* at pci? dev ? function ? # LSI MegaRAID SAS (Fusion and newer) -#mly* at pci? dev ? function ? # Mylex AcceleRAID and eXtremeRAID -#mpt* at pci? dev ? function ? # LSILogic 9x9 and 53c1030 (Fusion-MPT) -#mpii* at pci? dev ? function ? # LSI Logic Fusion-MPT II -#njs* at pci? dev ? function ? # Workbit NinjaSCSI-32 -#pcscp* at pci? dev ? function ? # AMD 53c974 PCscsi-PCI SCSI -#siop* at pci? dev ? function ? # Symbios 53c8xx SCSI -#esiop* at pci? dev ? function ? # Symbios 53c875 and newer SCSI -##options SIOP_SYMLED # drive the act. LED in software -#trm* at pci? dev ? function ? # Tekram DC-395U/UW/F, DC-315/U SCSI -# -## PCMCIA SCSI controllers -#aic* at pcmcia? function ? # Adaptec APA-1460 SCSI -#esp* at pcmcia? function ? # Qlogic ESP406/FAS408 SCSI -#spc* at pcmcia? function ? # Fujitsu MB87030/MB89352 SCSI -# -## CardBus SCSI cards -#adv* at cardbus? function ? # AdvanSys 1200[A,B], 9xx[U,UA] SCSI -#ahc* at cardbus? function ? # Adaptec ADP-1480 -#njs* at cardbus? function ? # Workbit NinjaSCSI-32 -# -## SCSI bus support -scsibus* at scsi? -# -## SCSI devices -#sd* at scsibus? target ? lun ? # SCSI disk drives -#st* at scsibus? target ? lun ? # SCSI tape drives -#cd* at scsibus? target ? lun ? # SCSI CD-ROM drives -#ch* at scsibus? target ? lun ? # SCSI autochangers -#ses* at scsibus? target ? lun ? # SCSI Enclosure Services devices -#ss* at scsibus? target ? lun ? # SCSI scanners -#uk* at scsibus? target ? lun ? # SCSI unknown -# -# -## RAID controllers and devices -#aac* at pci? dev ? function ? # Adaptec AAC family -#amr* at pci? dev ? function ? # AMI/LSI Logic MegaRAID -#arcmsr* at pci? dev ? function ? # Areca SATA RAID controllers -#cac* at pci? dev ? function ? # Compaq PCI array controllers -#ciss* at pci? dev ? function ? # HP Smart Array controllers -#icp* at pci? dev ? function ? # ICP-Vortex GDT & Intel RAID -#mlx* at pci? dev ? function ? # Mylex DAC960 & DEC SWXCR family -#twe* at pci? dev ? function ? # 3ware Escalade RAID controllers -#twa* at pci? dev ? function ? # 3ware Escalade 9xxx RAID controllers -# -#ld* at aac? unit ? -#ld* at amr? unit ? -#ld* at cac? unit ? -#ld* at icp? unit ? -#ld* at twe? unit ? -#ld* at twa? unit ? -#ld* at mlx? unit ? -# -#icpsp* at icp? unit ? # SCSI pass-through -# -## IDE and related devices -## PCI IDE controllers - see pciide(4) for supported hardware. -## The 0x0001 flag force the driver to use DMA, even if the driver doesn't know -## how to set up DMA modes for this chip. This may work, or may cause -## a machine hang with some controllers. -#pciide* at pci? dev ? function ? flags 0x0000 # GENERIC pciide driver -#acardide* at pci? dev ? function ? # Acard IDE controllers -#aceride* at pci? dev ? function ? # Acer Lab IDE controllers -#ahcisata* at pci? dev ? function ? # AHCI SATA controllers -#artsata* at pci? dev ? function ? # Intel i31244 SATA controller -#cmdide* at pci? dev ? function ? # CMD tech IDE controllers -#cypide* at pci? dev ? function ? # Cypress IDE controllers -#hptide* at pci? dev ? function ? # Triones/HighPoint IDE controllers -#iteide* at pci? dev ? function ? # IT Express IDE controllers -#ixpide* at pci? dev ? function ? # ATI IXP IDE controllers -#jmide* at pci? dev ? function ? # JMicron PCI-e PATA/SATA controllers -#ahcisata* at jmide? -#mvsata* at pci? dev ? function ? # Marvell Hercules-I/II -#optiide* at pci? dev ? function ? # Opti IDE controllers -#piixide* at pci? dev ? function ? # Intel IDE controllers -#pdcide* at pci? dev ? function ? # Promise IDE controllers -#pdcsata* at pci? dev ? function ? # Promise SATA150 controllers -#satalink* at pci? dev ? function ? # SiI SATALink controllers -#siisata* at pci? dev ? function ? # SiI SteelVine controllers -#siside* at pci? dev ? function ? # SiS IDE controllers -#slide* at pci? dev ? function ? # Symphony Labs IDE controllers -#svwsata* at pci? dev ? function ? # ServerWorks SATA controllers -#toshide* at pci? dev ? function ? # TOSHIBA PICCOLO controllers -#viaide* at pci? dev ? function ? # VIA/AMD/Nvidia IDE controllers -# -## PCMCIA IDE controllers -#wdc* at pcmcia? function ? -# -## CardBus IDE controllers -#njata* at cardbus? function ? flags 0x01 # Workbit NinjaATA-32 -#siisata* at cardbus? function ? # SiI SteelVine controllers -# -## ISA ST506, ESDI, and IDE controllers -## Use flags 0x01 if you want to try to use 32bits data I/O (the driver will -## fall back to 16bits I/O if 32bits I/O are not functional). -## Some controllers pass the initial 32bit test, but will fail later. -#wdc0 at isa? port 0x1f0 irq 14 flags 0x00 -#wdc1 at isa? port 0x170 irq 15 flags 0x00 -# -## ATA (IDE) bus support -#atabus* at ata? -#options ATADEBUG -# -## IDE drives -## Flags are used only with controllers that support DMA operations -## and mode settings (e.g. some pciide controllers) -## The lowest order four bits (rightmost digit) of the flags define the PIO -## mode to use, the next set of four bits the DMA mode and the third set the -## UltraDMA mode. For each set of four bits, the 3 lower bits define the mode -## to use, and the last bit must be 1 for this setting to be used. -## For DMA and UDMA, 0xf (1111) means 'disable'. -## 0x0fac means 'use PIO mode 4, DMA mode 2, disable UltraDMA'. -## (0xc=1100, 0xa=1010, 0xf=1111) -## 0x0000 means "use whatever the drive claims to support". -#wd* at atabus? drive ? flags 0x0000 -# -## ATAPI bus support -#atapibus* at atapi? -# -# -## ATA RAID configuration support, as found on some Promise controllers. -#pseudo-device ataraid -#ld* at ataraid? vendtype ? unit ? -# -## ATAPI devices -## flags have the same meaning as for IDE drives. -#cd* at atapibus? drive ? flags 0x0000 # ATAPI CD-ROM drives -#sd* at atapibus? drive ? flags 0x0000 # ATAPI disk drives -#st* at atapibus? drive ? flags 0x0000 # ATAPI tape drives -#uk* at atapibus? drive ? flags 0x0000 # ATAPI unknown -# -# -## NVM Express controllers and devices -#nvme* at pci? dev ? function ? -#ld* at nvme? nsid ? -# -# -## Miscellaneous mass storage devices -# -## ISA floppy -##fdc0 at isa? port 0x3f0 irq 6 drq 2 # standard PC floppy controllers -##fdc1 at isa? port 0x370 irq ? drq ? -# -## Network Interfaces -# -## PCI network interfaces -#age* at pci? dev ? function ? # Attansic/Atheros L1 Gigabit Ethernet -#alc* at pci? dev ? function ? # Attansic/Atheros L1C/L2C Ethernet -#ale* at pci? dev ? function ? # Attansic/Atheros L1E Ethernet -#an* at pci? dev ? function ? # Aironet PC4500/PC4800 (802.11) -#aq* at pci? dev ? function ? # Aquantia AQC 10 gigabit -#ath* at pci? dev ? function ? # Atheros 5210/5211/5212 802.11 -#athn* at pci? dev ? function ? # Atheros AR9k (802.11a/g/n) -#atw* at pci? dev ? function ? # ADMtek ADM8211 (802.11) -#bce* at pci? dev ? function ? # Broadcom 440x 10/100 Ethernet -#bge* at pci? dev ? function ? # Broadcom 570x gigabit Ethernet -#bnx* at pci? dev ? function ? # Broadcom NetXtremeII gigabit Ethernet -#bwi* at pci? dev ? function ? # Broadcom BCM43xx wireless -#bwfm* at pci? dev ? function ? # Broadcom FullMAC -#cas* at pci? dev ? function ? # Sun Cassini/Cassini+ Ethernet -#dge* at pci? dev ? function ? # Intel 82597 10GbE LR -#ena* at pci? dev ? function ? # Amazon.com Elastic Network Adapter -#ep* at pci? dev ? function ? # 3Com 3c59x -#epic* at pci? dev ? function ? # SMC EPIC/100 Ethernet -##eqos* at pci? dev ? function ? # DesignWare Ethernet QoS -#et* at pci? dev ? function ? # Agere/LSI ET1310/ET1301 Gigabit -#ex* at pci? dev ? function ? # 3Com 90x[BC] -#fxp* at pci? dev ? function ? # Intel EtherExpress PRO 10+/100B -#gem* at pci? dev ? function ? # Apple GMAC and Sun ERI gigabit enet -#gsip* at pci? dev ? function ? # NS83820 Gigabit Ethernet -#hme* at pci? dev ? function ? # Sun Microelectronics STP2002-STQ -#iavf* at pci? dev ? function ? # Intel Adaptive Virtual Function -#igc* at pci? dev ? function ? # Intel I225 2.5 gigabit -#ipw* at pci? dev ? function ? # Intel PRO/Wireless 2100 -#iwi* at pci? dev ? function ? # Intel PRO/Wireless 2200BG -#iwm* at pci? dev ? function ? # Intel Centrino 7260 -#iwn* at pci? dev ? function ? # Intel PRO/Wireless 4965AGN -#ixg* at pci? dev ? function ? # Intel 8259x 10 gigabit -#ixl* at pci? dev ? function ? # Intel Ethernet 700 Series -#ixv* at pci? dev ? function ? # Intel 8259x 10G virtual function -#jme* at pci? dev ? function ? # JMicron JMC2[56]0 ethernet -#kse* at pci? dev ? function ? # Micrel KSZ8841/8842 ethernet -#lii* at pci? dev ? function ? # Atheros L2 Fast-Ethernet -#malo* at pci? dev ? function ? # Marvell Libertas Wireless -#mcx* at pci? dev ? function ? # Mellanox 5th generation Ethernet -#mskc* at pci? dev ? function ? # Marvell Yukon 2 Gigabit Ethernet -#msk* at mskc? # Marvell Yukon 2 Gigabit Ethernet -#mtd* at pci? dev ? function ? # Myson MTD803 3-in-1 Ethernet -#ne* at pci? dev ? function ? # NE2000-compatible Ethernet -#nfe* at pci? dev ? function ? # NVIDIA nForce Ethernet -#ntwoc* at pci? dev ? function ? # Riscom/N2 PCI Sync Serial -#pcn* at pci? dev ? function ? # AMD PCnet-PCI Ethernet -#ral* at pci? dev ? function ? # Ralink Technology RT25x0 802.11a/b/g -#re* at pci? dev ? function ? # Realtek 8139C+/8169/8169S/8110S -#rge* at pci? dev ? function ? # Realtek 8125 -#rtk* at pci? dev ? function ? # Realtek 8129/8139 -#rtw* at pci? dev ? function ? # Realtek 8180L (802.11) -#rtwn* at pci? dev ? function ? # Realtek 8188CE/8192CE 802.11b/g/n -#sf* at pci? dev ? function ? # Adaptec AIC-6915 Ethernet -#sip* at pci? dev ? function ? # SiS 900/DP83815 Ethernet -#skc* at pci? dev ? function ? # SysKonnect SK9821 Gigabit Ethernet -#sk* at skc? # SysKonnect SK9821 Gigabit Ethernet -#ste* at pci? dev ? function ? # Sundance ST-201 Ethernet -#stge* at pci? dev ? function ? # Sundance/Tamarack TC9021 Gigabit -#ti* at pci? dev ? function ? # Alteon ACEnic gigabit Ethernet -#tl* at pci? dev ? function ? # ThunderLAN-based Ethernet -#tlp* at pci? dev ? function ? # DECchip 21x4x and clones -#txp* at pci? dev ? function ? # 3com 3cr990 -#vge* at pci? dev ? function ? # VIATech VT612X Gigabit Ethernet -#vmx* at pci? dev ? function ? # VMware VMXNET3 -#vr* at pci? dev ? function ? # VIA Rhine Fast Ethernet -#wi* at pci? dev ? function ? # Intersil Prism Mini-PCI (802.11b) -#wm* at pci? dev ? function ? # Intel 82543/82544 gigabit -#wpi* at pci? dev ? function ? # Intel PRO/Wireless 3945ABG -#xge* at pci? dev ? function ? # Neterion (S2io) Xframe-I 10GbE -# -## PCMCIA network interfaces -#an* at pcmcia? function ? # Aironet PC4500/PC4800 (802.11) -#awi* at pcmcia? function ? # BayStack 650/660 (802.11FH/DS) -#cnw* at pcmcia? function ? # Xircom/Netwave AirSurfer -#cs* at pcmcia? function ? # CS89xx Ethernet -#ep* at pcmcia? function ? # 3Com 3c589 and 3c562 Ethernet -#malo* at pcmcia? function ? # Marvell Libertas -#mbe* at pcmcia? function ? # MB8696x based Ethernet -#ne* at pcmcia? function ? # NE2000-compatible Ethernet -#ray* at pcmcia? function ? # Raytheon Raylink (802.11) -#sm* at pcmcia? function ? # Megahertz Ethernet -#wi* at pcmcia? function ? # Lucent/Intersil WaveLan IEEE (802.11) -#xirc* at pcmcia? function ? # Xircom CreditCard Ethernet -#com* at xirc? -#xi* at xirc? -# -#mhzc* at pcmcia? function ? # Megahertz Ethernet/Modem combo cards -#com* at mhzc? -#sm* at mhzc? -# -## CardBus network cards -#ath* at cardbus? function ? # Atheros 5210/5211/5212 802.11 -#athn* at cardbus? function ? # Atheros AR9k (802.11a/g/n) - UNTESTED -#atw* at cardbus? function ? # ADMtek ADM8211 (802.11) -#ex* at cardbus? function ? # 3Com 3C575TX -#fxp* at cardbus? function ? # Intel i8255x -#malo* at cardbus? function ? # Marvell Libertas Wireless -#ral* at cardbus? function ? # Ralink Technology RT25x0 802.11a/b/g -#re* at cardbus? function ? # Realtek 8139C+/8169/8169S/8110S -#rtk* at cardbus? function ? # Realtek 8129/8139 -#rtw* at cardbus? function ? # Realtek 8180L (802.11) -#tlp* at cardbus? function ? # DECchip 21143 -# -## MII/PHY support -#acphy* at mii? phy ? # DAltima AC101 and AMD Am79c874 PHYs -#amhphy* at mii? phy ? # AMD 79c901 Ethernet PHYs -#atphy* at mii? phy ? # Attansic/Atheros PHYs -#bmtphy* at mii? phy ? # Broadcom BCM5201 and BCM5202 PHYs -#brgphy* at mii? phy ? # Broadcom BCM5400-family PHYs -#ciphy* at mii? phy ? # Cicada CS8201 Gig-E PHYs -#dmphy* at mii? phy ? # Davicom DM9101 PHYs -#etphy* at mii? phy ? # Agere/LSI ET1011 TruePHY Gig-E PHYs -#exphy* at mii? phy ? # 3Com internal PHYs -#gentbi* at mii? phy ? # Generic Ten-Bit 1000BASE-[CLS]X PHYs -#glxtphy* at mii? phy ? # Level One LXT-1000 PHYs -#gphyter* at mii? phy ? # NS83861 Gig-E PHY -#icsphy* at mii? phy ? # Integrated Circuit Systems ICS189x -#igphy* at mii? phy ? # Intel IGP01E1000 -#ihphy* at mii? phy ? # Intel 82577 PHYs -#ikphy* at mii? phy ? # Intel 82563 PHYs -#inphy* at mii? phy ? # Intel 82555 PHYs -#iophy* at mii? phy ? # Intel 82553 PHYs -#ipgphy* at mii? phy ? # IC PLUS IP1000A/IP1001 PHYs -#jmphy* at mii? phy ? # Jmicron JMP202/211 PHYs -#lxtphy* at mii? phy ? # Level One LXT-970 PHYs -#makphy* at mii? phy ? # Marvell Semiconductor 88E1000 PHYs -#micphy* at mii? phy ? # Micrel KSZ[89]xxx PHYs -#nsphy* at mii? phy ? # NS83840 PHYs -#nsphyter* at mii? phy ? # NS83843 PHYs -#pnaphy* at mii? phy ? # generic HomePNA PHYs -#qsphy* at mii? phy ? # Quality Semiconductor QS6612 PHYs -#rgephy* at mii? phy ? # Realtek 8169S/8110 internal PHYs -#rlphy* at mii? phy ? # Realtek 8139/8201L PHYs -#smscphy* at mii? phy ? # SMSC LAN87xx PHYs -#sqphy* at mii? phy ? # Seeq 80220/80221/80223 PHYs -#tlphy* at mii? phy ? # ThunderLAN PHYs -#tqphy* at mii? phy ? # TDK Semiconductor PHYs -#ukphy* at mii? phy ? # generic unknown PHYs -#urlphy* at mii? phy ? # Realtek RTL8150L internal PHYs -# -# -## USB Controller and Devices -# -## Virtual USB controller -##pseudo-device vhci -# -## PCI USB controllers -#xhci* at pci? dev ? function ? # eXtensible Host Controller -#ehci* at pci? dev ? function ? # Enhanced Host Controller -#ohci* at pci? dev ? function ? # Open Host Controller -#uhci* at pci? dev ? function ? # Universal Host Controller (Intel) -# -## CardBus USB controllers -#ehci* at cardbus? function ? # Enhanced Host Controller -#ohci* at cardbus? function ? # Open Host Controller -#uhci* at cardbus? function ? # Universal Host Controller (Intel) -# -## ISA USB controllers -##slhci0 at isa? port 0x300 irq 5 # ScanLogic SL811HS -# -## PCMCIA USB controllers -#slhci* at pcmcia? function ? # ScanLogic SL811HS -# -## USB bus support -##usb* at vhci? -#usb* at xhci? -#usb* at ehci? -#usb* at ohci? -#usb* at uhci? -#usb* at slhci? -# -#include "dev/usb/usbdevices.config" -# -## PCI IEEE1394 controllers -#fwohci* at pci? dev ? function ? # IEEE1394 Open Host Controller -# -## CardBus IEEE1394 controllers -#fwohci* at cardbus? function ? # IEEE1394 Open Host Controller -# -#ieee1394if* at fwohci? -#fwip* at ieee1394if? # IP over IEEE1394 -#sbp* at ieee1394if? euihi ? euilo ? -# -## Audio Devices -# -## PCI audio devices -#auacer* at pci? dev ? function ? # ALi M5455 integrated AC'97 Audio -#auich* at pci? dev ? function ? # Intel/AMD/nVidia AC'97 Audio -#auixp* at pci? dev ? function ? # ATI IXP AC'97 Audio -#autri* at pci? dev ? function ? # Trident 4DWAVE based AC'97 Audio -#auvia* at pci? dev ? function ? # VIA AC'97 audio -#clcs* at pci? dev ? function ? # Cirrus Logic CS4280 -#clct* at pci? dev ? function ? # Cirrus Logic CS4281 -#cmpci* at pci? dev ? function ? # C-Media CMI8338/8738 -#eap* at pci? dev ? function ? # Ensoniq AudioPCI -#emuxki* at pci? dev ? function ? # Creative SBLive! and PCI512 -#esa* at pci? dev ? function ? # ESS Allegro-1 / Maestro-3 PCI Audio -#esm* at pci? dev ? function ? # ESS Maestro-1/2/2e PCI Audio Accelerator -#eso* at pci? dev ? function ? # ESS Solo-1 PCI AudioDrive -#fms* at pci? dev ? function ? # Forte Media FM801 -#neo* at pci? dev ? function ? # NeoMagic 256 AC'97 Audio -#sv* at pci? dev ? function ? # S3 SonicVibes -#yds* at pci? dev ? function ? # Yamaha DS-1 PCI Audio -# -## OPL[23] FM synthesizers -##opl0 at isa? port 0x388 # use only if not attached to sound card -#opl* at cmpci? flags 1 -#opl* at eso? -#opl* at fms? -#opl* at sv? -# -## High Definition Audio -#hdaudio* at pci? dev ? function ? # High Definition Audio -#hdafg* at hdaudiobus? -# -## Audio support -#audio* at audiobus? -# -## The spkr driver provides a simple tone interface to the built in speaker. -#spkr* at pcppi? # PC speaker -#spkr* at audio? # PC speaker (synthesized) -##wsbell* at spkr? # Bell for wscons display (module by default) -# -## MPU 401 UARTs -##mpu* at isa? port 0x330 irq 9 # MPU401 or compatible card -#mpu* at cmpci? -#mpu* at eso? -#mpu* at yds? -# -## MIDI support -#midi* at midibus? -#midi* at pcppi? # MIDI interface to the PC speaker -# -## FM-Radio devices -## PCI radio devices -##gtp* at pci? dev ? function ? # Guillemot Maxi Radio FM 2000 Radio Card -# -## Radio support -##radio* at gtp? -# -# -## Video capture devices -# -#coram* at pci? dev ? function ? # Conexant CX23885 PCI-E TV -#cxdtv* at pci? dev ? function ? # Conexant CX2388[0-3] PCI TV -# -#video* at videobus? # Analog capture interface -#dtv* at dtvbus? # Digital capture interface -# -# -## TV cards -# -## Brooktree 848/849/878/879 based TV cards -#bktr* at pci? dev ? function ? -#radio* at bktr? -# -# -## Bluetooth Controller and Device support -# -## Bluetooth PCMCIA Controllers -#bt3c* at pcmcia? function ? # 3Com 3CRWB6096-A -#btbc* at pcmcia? function ? # AnyCom BlueCard LSE041/039/139 -# -## Bluetooth SDIO Controllers -#sbt* at sdmmc? -# -## Bluetooth USB Controllers -#ubt* at uhub? port ? -#aubtfwl* at uhub? port ? -# -## Bluetooth Device Hub -#bthub* at bcsp? -#bthub* at bt3c? -#bthub* at btbc? -#bthub* at btuart? -#bthub* at sbt? -#bthub* at ubt? -# -## Bluetooth HID support -#bthidev* at bthub? -# -## Bluetooth Mouse -#btms* at bthidev? reportid ? -#wsmouse* at btms? mux 0 -# -## Bluetooth Keyboard -#btkbd* at bthidev? reportid ? -#wskbd* at btkbd? console ? mux 1 -# -## Bluetooth Apple Magic Mouse -#btmagic* at bthub? -#wsmouse* at btmagic? mux 0 -# -## Bluetooth Audio support -#btsco* at bthub? -# -# -## SD/MMC/SDIO Controller and Device support -# -## SD/MMC controller -#sdhc* at pci? # SD Host Controller -#rtsx* at pci? # Realtek RTS5209/RTS5229 Card Reader -#sdhc* at cardbus? # SD Host Controller -#sdmmc* at sdhc? # SD/MMC bus -#sdmmc* at rtsx? # SD/MMC bus -# -#ld* at sdmmc? -# -# -## Middle Digital, Inc. PCI-Weasel serial console board control -## devices (watchdog timer, etc.) -#weasel* at pci? -# -## Virtio devices -virtio* at pci? dev ? function ? # Virtio PCI device -#viomb* at virtio? # Virtio memory balloon device -ld* at virtio? # Virtio disk device -vioif* at virtio? # Virtio network device -#viornd* at virtio? # Virtio entropy device -vioscsi* at virtio? # Virtio SCSI device -##vio9p* at virtio? # Virtio 9P device -# -## Hyper-V devices -#vmbus* at acpi? # Hyper-V VMBus -#genfb* at vmbus? # Hyper-V Synthetic Video Framebuffer -#hvkbd* at vmbus? # Hyper-V Synthetic Keyboard -#wskbd* at hvkbd? console ? mux 1 -#hvn* at vmbus? # Hyper-V NetVSC -#hvs* at vmbus? # Hyper-V StorVSC -#hvheartbeat* at vmbus? # Hyper-V Heartbeat Service -#hvshutdown* at vmbus? # Hyper-V Guest Shutdown Service -#hvtimesync* at vmbus? # Hyper-V Time Synchronization Service -##hvkvp* at vmbus? # Hyper-V Data Exchange Service -# -## Pseudo-Devices -# -#pseudo-device crypto # /dev/crypto device -#pseudo-device swcrypto # software crypto implementation -# -## disk/mass storage pseudo-devices -#pseudo-device bio # RAID control device driver -#pseudo-device ccd # concatenated/striped disk devices -#pseudo-device cgd # cryptographic disk devices -#pseudo-device raid # RAIDframe disk driver -#options RAID_AUTOCONFIG # auto-configuration of RAID components -## Options to enable various other RAIDframe RAID types. -##options RF_INCLUDE_EVENODD=1 -##options RF_INCLUDE_RAID5_RS=1 -##options RF_INCLUDE_PARITYLOGGING=1 -##options RF_INCLUDE_CHAINDECLUSTER=1 -##options RF_INCLUDE_INTERDECLUSTER=1 -##options RF_INCLUDE_PARITY_DECLUSTERING=1 -##options RF_INCLUDE_PARITY_DECLUSTERING_DS=1 -#pseudo-device fss # file system snapshot device -# -#pseudo-device md # memory disk device (ramdisk) -#options MEMORY_DISK_HOOKS # enable md specific hooks -#options MEMORY_DISK_DYNAMIC # enable dynamic resizing -# -#pseudo-device vnd # disk-like interface to files -#options VND_COMPRESSION # compressed vnd(4) -# -# -## network pseudo-devices -pseudo-device bpfilter # Berkeley packet filter -#pseudo-device carp # Common Address Redundancy Protocol -pseudo-device loop # network loopback -##pseudo-device mpls # MPLS pseudo-interface -#pseudo-device ppp # Point-to-Point Protocol -#pseudo-device pppoe # PPP over Ethernet (RFC 2516) -#pseudo-device sl # Serial Line IP -#pseudo-device irframetty # IrDA frame line discipline -#pseudo-device tun # network tunneling over tty -#pseudo-device tap # virtual Ethernet -#pseudo-device gre # generic L3 over IP tunnel -#pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) -#pseudo-device ipsecif # tunnel interface for routing based ipsec -##pseudo-device faith # IPv[46] tcp relay translation i/f -#pseudo-device stf # 6to4 IPv6 over IPv4 encapsulation -#pseudo-device vlan # IEEE 802.1q encapsulation -#pseudo-device bridge # simple inter-network bridging -#pseudo-device vether # Virtual Ethernet for bridge -#pseudo-device agr # IEEE 802.3ad link aggregation -#pseudo-device l2tp # L2TPv3 interface -#pseudo-device lagg # Link aggregation interface -#pseudo-device npf # NPF packet filter -# -##pseudo-device canloop # CAN loopback interface -# -## -## accept filters -#pseudo-device accf_data # "dataready" accept filter -#pseudo-device accf_http # "httpready" accept filter -# -## miscellaneous pseudo-devices -pseudo-device pty # pseudo-terminals -#pseudo-device sequencer # MIDI sequencer -## rnd works; RND_COM does not on port i386 yet. -##options RND_COM # use "com" randomness as well (BROKEN) -pseudo-device clockctl # user control of clock subsystem -pseudo-device ksyms # /dev/ksyms -#pseudo-device lockstat # lock profiling -#pseudo-device bcsp # BlueCore Serial Protocol -#pseudo-device btuart # Bluetooth HCI UART (H4) -##pseudo-device nvmm # NetBSD Virtual Machine Monitor -#pseudo-device swwdog # software watchdog timer -- swwdog(4) -# -## wscons pseudo-devices -#pseudo-device wsmux # mouse & keyboard multiplexor -#pseudo-device wsfont -## Give us a choice of fonts based on monitor size -#options FONT_BOLD8x16 -#options FONT_BOLD16x32 -# -## pseudo audio device driver -#pseudo-device pad -# -## userland interface to drivers, including autoconf and properties retrieval -#pseudo-device drvctl -# -## EFI runtime support -#options EFI_RUNTIME -#pseudo-device efi # /dev/efi -# -#include "dev/veriexec.config" -# -#options PAX_SEGVGUARD=0 # PaX Segmentation fault guard -#options PAX_MPROTECT=1 # PaX mprotect(2) restrictions -#options PAX_MPROTECT_DEBUG=1 # PaX mprotect debug -#options PAX_ASLR=1 # PaX Address Space Layout Randomization -#options PAX_ASLR_DEBUG=1 # PaX ASLR debug -# -## Pull in optional local configuration - always at end -#cinclude "arch/amd64/conf/GENERIC.local" From 775aaf417cce863d15a658627382a9adf7a0d9ff Mon Sep 17 00:00:00 2001 From: Gregory 'GaLi' Cavelier Date: Mon, 11 Dec 2023 19:21:57 +0100 Subject: [PATCH 020/114] fix: initialize HYPERVISOR_shared_info_pa only when running on Xen --- sys/arch/amd64/amd64/locore.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index ec427ce3205ce..bad3c24fea71b 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1135,6 +1135,9 @@ ENTRY(start_genpvh) movl %eax,(%ebp) movl $KERNBASE_HI,4(%ebp) /* get a page for HYPERVISOR_shared_info */ + /* this is only needed if we are running on Xen */ + cmpl $VM_GUEST_XENPVH, RELOC(vm_guest) + jne .add_hvm_start_info_page addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx andl $~PGOFSET,%ebx @@ -1142,6 +1145,7 @@ ENTRY(start_genpvh) movl %ebx,(%ebp) movl $0,4(%ebp) /* XXX assume hvm_start_info+dependant structure fits in a single page */ +.add_hvm_start_info_page: addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx andl $~PGOFSET,%ebx From 957b9e1ec83eeab6f4dbef40b1dd651ed607260b Mon Sep 17 00:00:00 2001 From: Gregory 'GaLi' Cavelier Date: Wed, 13 Dec 2023 08:36:53 +0100 Subject: [PATCH 021/114] fix: only copy memmap info if present --- sys/arch/amd64/amd64/genassym.cf | 1 + sys/arch/amd64/amd64/locore.S | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index 8a410c058e4c2..facd9216f755d 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -383,6 +383,7 @@ define SIR_XENIPL_SCHED SIR_XENIPL_SCHED define SIR_XENIPL_HIGH SIR_XENIPL_HIGH define EVTCHN_UPCALL_MASK offsetof(struct vcpu_info, evtchn_upcall_mask) define HVM_START_INFO_SIZE sizeof(struct hvm_start_info) +define START_INFO_VERSION offsetof(struct hvm_start_info, version) define MMAP_PADDR offsetof(struct hvm_start_info, memmap_paddr) define MMAP_ENTRIES offsetof(struct hvm_start_info, memmap_entries) define MMAP_ENTRY_SIZE sizeof(struct hvm_memmap_table_entry) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index bad3c24fea71b..d0522b7cd003d 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1100,7 +1100,10 @@ ENTRY(start_genpvh) cmp $0, %al jne .cmdline_copy - /* Copy memmap_paddr after cmdline */ + /* Copy memmap_paddr after cmdline (only if hvm_start_info->version != 0) */ + xorl %eax, %eax + cmpl START_INFO_VERSION(%ebx), %eax + je .reload_ebx movl MMAP_PADDR(%ebx), %esi movl $RELOC(__kernel_end), %ecx movl %edi, MMAP_PADDR(%ecx) /* Set new memmap_paddr in hvm_start_info */ @@ -1111,6 +1114,7 @@ ENTRY(start_genpvh) shll $2, %ecx rep movsl +.reload_ebx: movl $RELOC(__kernel_end), %ebx /* announce ourself */ From 0f6a6e01002df6f3865f4c4ce6c2be38c1d90173 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 13 Dec 2023 18:13:13 +0100 Subject: [PATCH 022/114] fix: handle console= parameter passed via generic PVH VMM --- sys/arch/x86/x86/consinit.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/sys/arch/x86/x86/consinit.c b/sys/arch/x86/x86/consinit.c index 7a65f1bfa096a..4b9f5f4d86c3f 100644 --- a/sys/arch/x86/x86/consinit.c +++ b/sys/arch/x86/x86/consinit.c @@ -171,6 +171,7 @@ consinit(void) #if (NCOM > 0) int rv; #endif + char console_devname[16] = ""; #ifdef XENPVHVM if (vm_guest == VM_GUEST_XENPVH) { @@ -178,6 +179,13 @@ consinit(void) return; /* fallback to native console selection, usefull for dom0 PVH */ } + if (vm_guest == VM_GUEST_GENPVH) { + union xen_cmdline_parseinfo xcp; + /* get console= parameter from generic PVH VMM */ + xen_parse_cmdline(XEN_PARSE_CONSOLE, &xcp); + strncpy(console_devname, xcp.xcp_console, + sizeof(xcp.xcp_console)); + } #endif if (initted) return; @@ -185,10 +193,14 @@ consinit(void) #ifndef CONS_OVERRIDE consinfo = lookup_bootinfo(BTINFO_CONSOLE); - if (!consinfo) + if (!consinfo) { #endif consinfo = &default_consinfo; - + /* console= parameter was not passed via a generic PVH VMM */ + if (!console_devname[0]) + strncpy(console_devname, consinfo->devname, + sizeof(consinfo->devname)); + } #if (NGENFB > 0) #if defined(XENPVHVM) && defined(DOM0OPS) if (vm_guest == VM_GUEST_XENPVH && xendomain_is_dom0()) @@ -197,8 +209,7 @@ consinit(void) #endif /* XENPVHVM */ fbinfo = lookup_bootinfo(BTINFO_FRAMEBUFFER); #endif - - if (!strcmp(consinfo->devname, "pc")) { + if (!strcmp(console_devname, "pc")) { int error; #if (NGENFB > 0) if (fbinfo && fbinfo->physaddr > 0) { @@ -254,7 +265,7 @@ consinit(void) return; } #if (NCOM > 0) - if (!strcmp(consinfo->devname, "com")) { + if (!strcmp(console_devname, "com")) { int addr = consinfo->addr; int speed = consinfo->speed; From d2e651d5ec0c256834832aeccdbc83532e2dc9bd Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 13 Dec 2023 18:29:26 +0100 Subject: [PATCH 023/114] fix: wrong placement and leftovers --- sys/arch/x86/x86/consinit.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sys/arch/x86/x86/consinit.c b/sys/arch/x86/x86/consinit.c index 4b9f5f4d86c3f..be0d59032e3f2 100644 --- a/sys/arch/x86/x86/consinit.c +++ b/sys/arch/x86/x86/consinit.c @@ -193,14 +193,13 @@ consinit(void) #ifndef CONS_OVERRIDE consinfo = lookup_bootinfo(BTINFO_CONSOLE); - if (!consinfo) { + if (!consinfo) #endif consinfo = &default_consinfo; - /* console= parameter was not passed via a generic PVH VMM */ - if (!console_devname[0]) - strncpy(console_devname, consinfo->devname, - sizeof(consinfo->devname)); - } + /* console= parameter was not passed via a generic PVH VMM */ + if (!console_devname[0]) + strncpy(console_devname, consinfo->devname, + sizeof(consinfo->devname)); #if (NGENFB > 0) #if defined(XENPVHVM) && defined(DOM0OPS) if (vm_guest == VM_GUEST_XENPVH && xendomain_is_dom0()) @@ -289,14 +288,14 @@ consinit(void) } #endif #if (NNULLCONS > 0) - if (!strcmp(consinfo->devname, "nullcons")) { + if (!strcmp(console_devname, "nullcons")) { void nullcninit(struct consdev *cn); nullcninit(0); return; } #endif - panic("invalid console device %s", consinfo->devname); + panic("invalid console device %s", console_devname); } #ifdef KGDB From 6e0b361b345cdca5685cde130e0a5969b3bd9db5 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 13 Dec 2023 22:09:15 +0100 Subject: [PATCH 024/114] fix: sizeof dest, not src --- sys/arch/x86/x86/consinit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/arch/x86/x86/consinit.c b/sys/arch/x86/x86/consinit.c index be0d59032e3f2..f72f872ab54e6 100644 --- a/sys/arch/x86/x86/consinit.c +++ b/sys/arch/x86/x86/consinit.c @@ -184,7 +184,7 @@ consinit(void) /* get console= parameter from generic PVH VMM */ xen_parse_cmdline(XEN_PARSE_CONSOLE, &xcp); strncpy(console_devname, xcp.xcp_console, - sizeof(xcp.xcp_console)); + sizeof(console_devname)); } #endif if (initted) @@ -199,7 +199,7 @@ consinit(void) /* console= parameter was not passed via a generic PVH VMM */ if (!console_devname[0]) strncpy(console_devname, consinfo->devname, - sizeof(consinfo->devname)); + sizeof(console_devname)); #if (NGENFB > 0) #if defined(XENPVHVM) && defined(DOM0OPS) if (vm_guest == VM_GUEST_XENPVH && xendomain_is_dom0()) From 8d12eb89d5ffe74d10358872347867613ae1f2bd Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 18 Dec 2023 08:31:34 +0100 Subject: [PATCH 025/114] feat: first PoC commit for MMIO boot --- sys/arch/amd64/amd64/amd64_mainbus.c | 8 +- sys/arch/amd64/conf/files.amd64 | 2 +- sys/dev/acpi/files.acpi | 2 +- sys/dev/pci/ld_virtio.c | 1 - sys/dev/pci/virtio.c | 2 +- sys/dev/pci/virtioreg.h | 28 ++++++ sys/dev/pci/virtiovar.h | 2 + sys/dev/virtio/files.virtio | 7 ++ sys/dev/virtio/virtio_mmio.c | 137 +++++++++++++++++++++++---- sys/dev/virtio/virtio_mmiovar.h | 2 + 10 files changed, 166 insertions(+), 25 deletions(-) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index 8a1bb0c55c620..d118ef8bf16a8 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -79,6 +79,7 @@ __KERNEL_RCSID(0, "$NetBSD: amd64_mainbus.c,v 1.7 2021/08/07 16:18:41 thorpej Ex #include #endif /* __HAVE_PCI_MSI_MSIX */ #endif +#include /* * XXXfvdl ACPI @@ -100,6 +101,7 @@ union amd64_mainbus_attach_args { #if NIPMI > 0 struct ipmi_attach_args mba_ipmi; #endif + struct cmdline_attach_args mba_cmdline; }; /* @@ -158,7 +160,6 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) #if NISA > 0 || NPCI > 0 || NACPICA > 0 || NIPMI > 0 union amd64_mainbus_attach_args mba; #endif - #if NISADMA > 0 && NACPICA > 0 /* * ACPI needs ISA DMA initialized before they start probing. @@ -212,6 +213,7 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) if (npcibus == 0 && mpacpi_active) npcibus = mp_pci_scan(self, &mba.mba_pba, pcibusprint); #endif + #if defined(MPBIOS) && defined(MPBIOS_SCANPCI) if (npcibus == 0 && mpbios_scanned != 0) npcibus = mp_pci_scan(self, &mba.mba_pba, pcibusprint); @@ -237,6 +239,10 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) } #endif + mba.mba_cmdline.memt = x86_bus_space_mem; + mba.mba_cmdline.dmat = &pci_bus_dma_tag; + config_found(self, &mba.mba_cmdline, NULL, CFARGS(.iattr = "cmdlinebus")); + if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); } diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64 index 99e0e512938b3..3497439097709 100644 --- a/sys/arch/amd64/conf/files.amd64 +++ b/sys/arch/amd64/conf/files.amd64 @@ -94,7 +94,7 @@ include "dev/i2o/files.i2o" # XXX BIOS32 only if something that uses it is configured! device mainbus: isabus, pcibus, bios32, acpibus, cpubus, ioapicbus, - ipmibus, hypervisorbus + ipmibus, hypervisorbus, cmdlinebus attach mainbus at root file arch/amd64/amd64/amd64_mainbus.c mainbus & !xenpv file arch/x86/x86/mainbus.c mainbus diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index 6763f9836693e..9f87bb3b6389c 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -273,7 +273,7 @@ attach amdccp at acpinodebus with amdccp_acpi file dev/acpi/amdccp_acpi.c amdccp_acpi # QEMU Virtio -attach virtio at acpinodebus with virtio_acpi +attach virtio at acpinodebus with virtio_acpi: virtio_mmio file dev/acpi/virtio_acpi.c virtio_acpi # OHCI-compliant USB controller diff --git a/sys/dev/pci/ld_virtio.c b/sys/dev/pci/ld_virtio.c index 0cea794c9a07d..b8d405f5cc4cf 100644 --- a/sys/dev/pci/ld_virtio.c +++ b/sys/dev/pci/ld_virtio.c @@ -340,7 +340,6 @@ ld_virtio_attach(device_t parent, device_t self, void *aux) goto err; } qsize = sc->sc_vq.vq_num; - if (virtio_child_attach_finish(vsc, &sc->sc_vq, 1, NULL, VIRTIO_F_INTR_MSIX) != 0) goto err; diff --git a/sys/dev/pci/virtio.c b/sys/dev/pci/virtio.c index 7c26edc9384e5..e499cd27d724f 100644 --- a/sys/dev/pci/virtio.c +++ b/sys/dev/pci/virtio.c @@ -816,6 +816,7 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, /* alloc and map the memory */ r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0, &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK); + if (r != 0) { aprint_error_dev(sc->sc_dev, "virtqueue %d for %s allocation failed, " @@ -891,7 +892,6 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, "descriptors\n", size_indirect, maxnsegs * vq_num); return 0; - err: sc->sc_ops->setup_queue(sc, vq->vq_index, 0); if (vq->vq_dmamap) diff --git a/sys/dev/pci/virtioreg.h b/sys/dev/pci/virtioreg.h index 2d10af97dc1c3..a1f5d78853d5c 100644 --- a/sys/dev/pci/virtioreg.h +++ b/sys/dev/pci/virtioreg.h @@ -98,6 +98,21 @@ #define VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET 64 #define VIRTIO_CONFIG_DEVICE_STATUS_FAILED 128 +/* Status byte for guest to report progress. */ +#define VIRTIO_CONFIG_STATUS_RESET 0x00 +/* We have seen device and processed generic fields. */ +#define VIRTIO_CONFIG_STATUS_ACK 0x01 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 +/* Driver has used its parts of the config, and is happy. */ +#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 +/* Driver has finished configuring features (modern only). */ +#define VIRTIO_CONFIG_S_FEATURES_OK 0x08 +/* Device entered invalid state, driver must reset it. */ +#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_STATUS_FAILED 0x80 + /* common ISR status flags */ #define VIRTIO_CONFIG_ISR_QUEUE_INTERRUPT 1 #define VIRTIO_CONFIG_ISR_CONFIG_CHANGE 2 @@ -144,6 +159,19 @@ */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* + * Some VirtIO feature bits (currently bits 28 through 34) are + * reserved for the transport being used (eg. virtio_ring), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 34 + +/* Support for indirect buffer descriptors. */ +#define VIRTIO_RING_F_INDIRECT_DESC (1UL << 28) + +/* Support to suppress interrupt until specific index is reached. */ +#define VIRTIO_RING_F_EVENT_IDX (1UL << 29) /* Virtio ring descriptors: 16 bytes. * These can chain together via "next". */ diff --git a/sys/dev/pci/virtiovar.h b/sys/dev/pci/virtiovar.h index fffa9c698b6a7..704e0c29167aa 100644 --- a/sys/dev/pci/virtiovar.h +++ b/sys/dev/pci/virtiovar.h @@ -132,7 +132,9 @@ struct virtio_ops { uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t); void (*setup_queue)(struct virtio_softc *, uint16_t, uint64_t); void (*set_status)(struct virtio_softc *, int); + int (*get_status)(struct virtio_softc *); void (*neg_features)(struct virtio_softc *, uint64_t); + int (*finalize_features)(struct virtio_softc *); int (*alloc_interrupts)(struct virtio_softc *); void (*free_interrupts)(struct virtio_softc *); int (*setup_interrupts)(struct virtio_softc *, int); diff --git a/sys/dev/virtio/files.virtio b/sys/dev/virtio/files.virtio index 4e399624ab0bf..9ce6c7011ac1d 100644 --- a/sys/dev/virtio/files.virtio +++ b/sys/dev/virtio/files.virtio @@ -3,8 +3,15 @@ # XXX the contents of the following included file should be moved here include "dev/pci/files.virtio" +define cmdlinebus { } +#define dummybus { } + file dev/virtio/virtio_mmio.c virtio_mmio device viocon attach viocon at virtio file dev/virtio/viocon.c viocon + + +attach virtio at cmdlinebus with mmio_cmdline: virtio_mmio +file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index 7745006562e27..ab4f7e3923f0e 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -39,6 +39,7 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") #define VIRTIO_PRIVATE #include +#include #define VIRTIO_MMIO_MAGIC ('v' | 'i' << 8 | 'r' << 16 | 't' << 24) @@ -56,10 +57,18 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") #define VIRTIO_MMIO_QUEUE_NUM 0x038 #define VIRTIO_MMIO_QUEUE_ALIGN 0x03c #define VIRTIO_MMIO_QUEUE_PFN 0x040 +#define VIRTIO_MMIO_QUEUE_READY 0x044 /* requires version 2 */ #define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 #define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 #define VIRTIO_MMIO_INTERRUPT_ACK 0x064 #define VIRTIO_MMIO_STATUS 0x070 +#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 /* requires version 2 */ +#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 /* requires version 2 */ +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 /* requires version 2 */ +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 /* requires version 2 */ +#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 /* requires version 2 */ +#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 /* requires version 2 */ +#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc /* requires version 2 */ #define VIRTIO_MMIO_CONFIG 0x100 #define VIRTIO_MMIO_INT_VRING (1 << 0) @@ -86,6 +95,7 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") static void virtio_mmio_kick(struct virtio_softc *, uint16_t); static uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t); static void virtio_mmio_setup_queue(struct virtio_softc *, uint16_t, uint64_t); +static int virtio_mmio_get_status(struct virtio_softc *); static void virtio_mmio_set_status(struct virtio_softc *, int); static void virtio_mmio_negotiate_features(struct virtio_softc *, uint64_t); static int virtio_mmio_alloc_interrupts(struct virtio_softc *); @@ -96,6 +106,7 @@ static const struct virtio_ops virtio_mmio_ops = { .kick = virtio_mmio_kick, .read_queue_size = virtio_mmio_read_queue_size, .setup_queue = virtio_mmio_setup_queue, + .get_status = virtio_mmio_get_status, .set_status = virtio_mmio_set_status, .neg_features = virtio_mmio_negotiate_features, .alloc_interrupts = virtio_mmio_alloc_interrupts, @@ -117,13 +128,47 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, uint16_t idx, uint64_t addr) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_SEL, idx); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM, bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM_MAX)); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_ALIGN, - VIRTIO_PAGE_SIZE); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_PFN, - addr / VIRTIO_PAGE_SIZE); + + if (sc->mmio_version == 1) { + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_SEL, idx); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_ALIGN, + VIRTIO_PAGE_SIZE); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_PFN, + addr / VIRTIO_PAGE_SIZE); + } else { + switch(idx) { + case 0: + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_LOW, + addr); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_HIGH, + ((uint64_t)addr >> 32)); + break; + case 1: + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_LOW, + addr); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_HIGH, + ((uint64_t)addr >> 32)); + break; + case 2: + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_LOW, + addr); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_HIGH, + ((uint64_t)addr >> 32)); + break; + } + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_READY, 1); + } +} + +static int +virtio_mmio_get_status(struct virtio_softc *vsc) +{ + struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; + + return bus_space_read_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_MMIO_STATUS); } static void @@ -163,21 +208,19 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) "wrong magic value 0x%08x; giving up\n", magic); return; } - ver = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_VERSION); - if (ver != 1) { + if (ver < 1 || ver > 2) { aprint_error_dev(vsc->sc_dev, "unknown version 0x%02x; giving up\n", ver); return; } + sc->mmio_version = ver; id = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_DEVICE_ID); - /* we could use PAGE_SIZE, but virtio(4) assumes 4KiB for now */ - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_PAGE_SIZE, - VIRTIO_PAGE_SIZE); - - /* no device connected. */ + if (ver == 1) + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_MMIO_GUEST_PAGE_SIZE, VIRTIO_PAGE_SIZE); if (id == 0) return; @@ -227,27 +270,79 @@ virtio_mmio_common_detach(struct virtio_mmio_softc *sc, int flags) return 0; } +static uint64_t +virtio_filter_transport_features(uint64_t features) +{ + uint64_t transport, mask; + + transport = (1ULL << + (VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START)) - 1; + transport <<= VIRTIO_TRANSPORT_F_START; + + mask = -1ULL & ~transport; + mask |= VIRTIO_RING_F_INDIRECT_DESC; + mask |= VIRTIO_RING_F_EVENT_IDX; + mask |= VIRTIO_F_VERSION_1; + + return (features & mask); +} /* * Feature negotiation. */ static void virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t - guest_features) + child_features) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - uint32_t r; + uint64_t host_features, features; + int status; + + if (sc->mmio_version > 1) + child_features |= VIRTIO_F_VERSION_1; + + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_MMIO_HOST_FEATURES_SEL, 1); + host_features = bus_space_read_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_MMIO_HOST_FEATURES); + host_features <<= 32; bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES_SEL, 0); - r = bus_space_read_4(sc->sc_iot, sc->sc_ioh, + host_features |= bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES); - r &= guest_features; + + /* + * Limit negotiated features to what the driver, virtqueue, and + * host all support. + */ + features = host_features & child_features; + features = virtio_filter_transport_features(features); + vsc->sc_active_features = features; + bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_FEATURES_SEL, 0); + VIRTIO_MMIO_GUEST_FEATURES_SEL, 1); bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_FEATURES, r); + VIRTIO_MMIO_GUEST_FEATURES, features >> 32); - vsc->sc_active_features = r; + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_MMIO_GUEST_FEATURES_SEL, 0); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, + VIRTIO_MMIO_GUEST_FEATURES, features); + + if (sc->mmio_version > 1) { + /* + * Must re-read the status after setting it to verify the + * negotiated features were accepted by the device. + */ + /* https://twitter.com/cperciva/status/1548447423436967936 */ + virtio_mmio_set_status(vsc, VIRTIO_CONFIG_S_FEATURES_OK); + + status = virtio_mmio_get_status(vsc); + if ((status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { + aprint_error_dev(vsc->sc_dev, + "desired features were not accepted\n"); + } + } } /* @@ -290,9 +385,11 @@ virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx) static int virtio_mmio_alloc_interrupts(struct virtio_softc *vsc) { - struct virtio_mmio_softc * const sc = (struct virtio_mmio_softc *)vsc; + /* struct virtio_mmio_softc * const sc = (struct virtio_mmio_softc *)vsc; return sc->sc_alloc_interrupts(sc); + */ + return 0; } static void diff --git a/sys/dev/virtio/virtio_mmiovar.h b/sys/dev/virtio/virtio_mmiovar.h index d879e7b503545..8fb267cba7edb 100644 --- a/sys/dev/virtio/virtio_mmiovar.h +++ b/sys/dev/virtio/virtio_mmiovar.h @@ -37,6 +37,8 @@ struct virtio_mmio_softc { bus_space_handle_t sc_ioh; bus_size_t sc_iosize; + int mmio_version; + void *sc_ih; int (*sc_alloc_interrupts)(struct virtio_mmio_softc *); From c00c9148dac9cad0ef9eab7e5032a47dc0087954 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 18 Dec 2023 16:12:29 +0100 Subject: [PATCH 026/114] fix: not sure indexes relate to queues type --- sys/dev/virtio/virtio_mmio.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index ab4f7e3923f0e..cfeaeccb49cc5 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -127,6 +127,7 @@ static void virtio_mmio_setup_queue(struct virtio_softc *vsc, uint16_t idx, uint64_t addr) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; + struct virtqueue *vqs = sc->sc_sc.sc_vqs; bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM, bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM_MAX)); @@ -138,26 +139,21 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, uint16_t idx, uint64_t addr) bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_PFN, addr / VIRTIO_PAGE_SIZE); } else { - switch(idx) { - case 0: bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_LOW, addr); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_HIGH, ((uint64_t)addr >> 32)); - break; - case 1: - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_LOW, - addr); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_HIGH, - ((uint64_t)addr >> 32)); - break; - case 2: + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_LOW, - addr); + addr + vqs[idx].vq_availoffset); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_HIGH, - ((uint64_t)addr >> 32)); - break; - } + ((uint64_t)(addr+vqs[idx].vq_availoffset) >> 32)); + + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_LOW, + addr + vqs[idx].vq_usedoffset); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_HIGH, + ((uint64_t)(addr+vqs[idx].vq_usedoffset) >> 32)); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_READY, 1); } } From 3ac522187bd1781d3bb15f1ec7fcf476b8c73003 Mon Sep 17 00:00:00 2001 From: Gregory 'GaLi' Cavelier Date: Mon, 18 Dec 2023 21:22:09 +0100 Subject: [PATCH 027/114] fix: correctly identify if the hypervisor is Xen --- sys/arch/amd64/amd64/locore.S | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index d0522b7cd003d..df8d439f14590 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1077,11 +1077,25 @@ ENTRY(start_genpvh) push %ebx xorl %eax, %eax cpuid + cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */ + jb .start_genpvh + xorl %eax, %eax + inc %eax + cpuid + shr $31, %ecx + testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */ + jz .start_genpvh + xorl %eax, %eax + inc %eax + shl $30, %eax + cpuid /* Calling cpuid with eax=0x40000000 */ cmp $0x566e6558, %ebx /* "VneX" */ je .start_xen32 /* We have been started by a VMM that is *not* Xen */ +.start_genpvh: + /* First, copy the hvm_start_info structure to __kernel_end */ pop %ebx movl %ebx, %esi From eb2e61250e912d310533aec88de1d98fbef30d9d Mon Sep 17 00:00:00 2001 From: Gregory 'GaLi' Cavelier Date: Mon, 18 Dec 2023 23:13:10 +0100 Subject: [PATCH 028/114] remove hardcoded value --- sys/arch/amd64/amd64/locore.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index df8d439f14590..82c1b1af5f21d 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -278,6 +278,7 @@ #ifdef XEN #define __ASSEMBLY__ +#include #include #include @@ -1089,7 +1090,7 @@ ENTRY(start_genpvh) inc %eax shl $30, %eax cpuid /* Calling cpuid with eax=0x40000000 */ - cmp $0x566e6558, %ebx /* "VneX" */ + cmp $XEN_CPUID_SIGNATURE_EBX, %ebx /* "VneX" */ je .start_xen32 /* We have been started by a VMM that is *not* Xen */ From e9bf1eb09555b5aff9394029921fbae0a2ca8fdf Mon Sep 17 00:00:00 2001 From: Gregory 'GaLi' Cavelier Date: Mon, 18 Dec 2023 23:13:41 +0100 Subject: [PATCH 029/114] align comments --- sys/arch/amd64/amd64/locore.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 82c1b1af5f21d..97519bcb6a1b0 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1078,18 +1078,18 @@ ENTRY(start_genpvh) push %ebx xorl %eax, %eax cpuid - cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */ + cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */ jb .start_genpvh xorl %eax, %eax inc %eax cpuid shr $31, %ecx - testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */ + testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */ jz .start_genpvh xorl %eax, %eax inc %eax shl $30, %eax - cpuid /* Calling cpuid with eax=0x40000000 */ + cpuid /* Calling cpuid with eax=0x40000000 */ cmp $XEN_CPUID_SIGNATURE_EBX, %ebx /* "VneX" */ je .start_xen32 From 916f19ed437b508e8c99e5d28ff48d01ee73b3ef Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 19 Dec 2023 08:17:09 +0100 Subject: [PATCH 030/114] feat: added cmdline backend --- sys/dev/virtio/cmdlinevar.h | 7 + sys/dev/virtio/virtio_mmio_cmdline.c | 253 +++++++++++++++++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 sys/dev/virtio/cmdlinevar.h create mode 100644 sys/dev/virtio/virtio_mmio_cmdline.c diff --git a/sys/dev/virtio/cmdlinevar.h b/sys/dev/virtio/cmdlinevar.h new file mode 100644 index 0000000000000..f94192a96adc4 --- /dev/null +++ b/sys/dev/virtio/cmdlinevar.h @@ -0,0 +1,7 @@ + +struct cmdline_attach_args { + bus_space_tag_t memt; + bus_dma_tag_t dmat; + int sc_irq; +}; + diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c new file mode 100644 index 0000000000000..7fb2d6f7cc458 --- /dev/null +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -0,0 +1,253 @@ +/*- + * Copyright (c) 2022 Colin Percival + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); + +#include +#include +#include +#include +#include +#include + +#define VIRTIO_PRIVATE +#include +#include +#include + +#define VMMIOSTR "virtio_mmio.device=" + +static int virtio_mmio_cmdline_match(device_t, cfdata_t, void *); +static void virtio_mmio_cmdline_attach(device_t, device_t, void *); +static int virtio_mmio_cmdline_detach(device_t, int); +static int virtio_mmio_cmdline_rescan(device_t, const char *, const int *); +static int virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *); +static void virtio_mmio_cmdline_free_interrupts(struct virtio_mmio_softc *); + +struct virtio_mmio_cmdline_softc { + struct virtio_mmio_softc sc_msc; +}; + +CFATTACH_DECL3_NEW(mmio_cmdline, + sizeof(struct virtio_mmio_cmdline_softc), + virtio_mmio_cmdline_match, virtio_mmio_cmdline_attach, + virtio_mmio_cmdline_detach, NULL, + virtio_mmio_cmdline_rescan, (void *)voidop, DVF_DETACH_SHUTDOWN); + +static void +parsearg(device_t self, struct virtio_mmio_softc *msc, const char *arg) +{ + char *p; + uint64_t sz, baseaddr, irq, id; + int error; + + /* */ + sz = strtoull(arg, (char **)&p, 0); + if ((sz == 0) || (sz == ULLONG_MAX)) + goto bad; + switch (*p) { + case 'E': case 'e': + sz <<= 10; + /* FALLTHROUGH */ + case 'P': case 'p': + sz <<= 10; + /* FALLTHROUGH */ + case 'T': case 't': + sz <<= 10; + /* FALLTHROUGH */ + case 'G': case 'g': + sz <<= 10; + /* FALLTHROUGH */ + case 'M': case 'm': + sz <<= 10; + /* FALLTHROUGH */ + case 'K': case 'k': + sz <<= 10; + p++; + break; + } + + /* @ */ + if (*p++ != '@') + goto bad; + baseaddr = strtoull(p, (char **)&p, 0); + if ((baseaddr == 0) || (baseaddr == ULLONG_MAX)) + goto bad; + + /* : */ + if (*p++ != ':') + goto bad; + irq = strtoull(p, (char **)&p, 0); + if ((irq == 0) || (irq == ULLONG_MAX)) + goto bad; + + /* Optionally, : */ + if (*p) { + if (*p++ != ':') + goto bad; + id = strtoull(p, (char **)&p, 0); + if ((id == 0) || (id == ULLONG_MAX)) + goto bad; + } else { + id = 0; + } + + /* Should have reached the end of the string. */ + if (*p) + goto bad; + + error = bus_space_map(msc->sc_iot, baseaddr, sz, 0, &msc->sc_ioh); + if (error) { + aprint_error_dev(self, "couldn't map %#" PRIx64 ": %d", + (uint64_t)baseaddr, error); + return; + } + + return; + +bad: + printf("Error parsing virtio_mmio parameter: %s\n", arg); +} + +static void +virtio_mmio_cmdline_parse(device_t self, struct virtio_mmio_softc *msc) +{ + char *p, *v, cmdline[128]; + + strcpy(cmdline, xen_start_info.cmd_line); + + aprint_normal("\nkernel parameters: %s", cmdline); + + if ((p = strstr(cmdline, VMMIOSTR)) == NULL) + return; + + while (*p) { + v = p; + while (*p && *p != ' ') + p++; + if (*p) + *p = '\0'; + p = v; + while (*p && *p != '=') + p++; + if (*p) { + p++; + aprint_normal("\nviommio: %s", p); + parsearg(self, msc, p); + } + } +} + +static int +virtio_mmio_cmdline_match(device_t parent, cfdata_t match, void *aux) +{ + if (strstr(xen_start_info.cmd_line, VMMIOSTR) == NULL) + return 0; + return 1; +} + +static void +virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) +{ + /* Attach function for device */ + struct virtio_mmio_cmdline_softc *csc = device_private(self); + struct virtio_mmio_softc *const msc = &csc->sc_msc; + struct virtio_softc *const vsc = &msc->sc_sc; + struct cmdline_attach_args *caa = aux; + + msc->sc_iot = caa->memt; + vsc->sc_dev = self; + vsc->sc_dmat = caa->dmat; + + virtio_mmio_cmdline_parse(self, msc); + + aprint_normal("\n"); + aprint_naive("\n"); + + msc->sc_alloc_interrupts = virtio_mmio_cmdline_alloc_interrupts; + msc->sc_free_interrupts = virtio_mmio_cmdline_free_interrupts; + + virtio_mmio_common_attach(msc); + virtio_mmio_cmdline_rescan(self, "virtio", NULL); +} + +static int +virtio_mmio_cmdline_detach(device_t self, int flags) +{ + struct virtio_mmio_cmdline_softc * const fsc = device_private(self); + struct virtio_mmio_softc * const msc = &fsc->sc_msc; + + return virtio_mmio_common_detach(msc, flags); +} + +static int +virtio_mmio_cmdline_rescan(device_t self, const char *ifattr, const int *locs) +{ + struct virtio_mmio_cmdline_softc *const sc = device_private(self); + struct virtio_mmio_softc *const msc = &sc->sc_msc; + struct virtio_softc *const vsc = &msc->sc_sc; + struct virtio_attach_args va; + + if (vsc->sc_child) + return 0; + + memset(&va, 0, sizeof(va)); + va.sc_childdevid = vsc->sc_childdevid; + + config_found(self, &va, NULL, CFARGS_NONE); + + if (virtio_attach_failed(vsc)) + return 0; + + return 0; +} + + +static int +virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) +{ + struct virtio_softc * const vsc = &msc->sc_sc; + + msc->sc_ih = softint_establish(SOFTINT_BIO, (void *)virtio_mmio_intr, msc); + if (msc->sc_ih == NULL) { + aprint_error_dev(vsc->sc_dev, + "failed to establish interrupt\n"); + return -1; + } + aprint_normal_dev(vsc->sc_dev, "interrupting on ??\n"); + + return 0; +} + +static void +virtio_mmio_cmdline_free_interrupts(struct virtio_mmio_softc *msc) +{ + if (msc->sc_ih != NULL) { + softint_disestablish(msc->sc_ih); + msc->sc_ih = NULL; + } +} + From 946a1dd2321f69f0f069370f872b35e8e1e230f5 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 20 Dec 2023 17:17:42 +0100 Subject: [PATCH 031/114] fix: use intr_establish_xname --- sys/arch/amd64/amd64/amd64_mainbus.c | 1 - sys/dev/virtio/cmdlinevar.h | 1 - sys/dev/virtio/files.virtio | 2 - sys/dev/virtio/virtio_mmio.c | 6 +- sys/dev/virtio/virtio_mmio_cmdline.c | 93 ++++++++++++++++++---------- 5 files changed, 62 insertions(+), 41 deletions(-) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index d118ef8bf16a8..77b77e16f08f5 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -201,7 +201,6 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) mba.mba_pba.pba_iot = x86_bus_space_io; mba.mba_pba.pba_memt = x86_bus_space_mem; mba.mba_pba.pba_dmat = &pci_bus_dma_tag; - mba.mba_pba.pba_dmat64 = &pci_bus_dma64_tag; mba.mba_pba.pba_pc = NULL; mba.mba_pba.pba_flags = PCI_FLAGS_IO_OKAY | PCI_FLAGS_MEM_OKAY | diff --git a/sys/dev/virtio/cmdlinevar.h b/sys/dev/virtio/cmdlinevar.h index f94192a96adc4..4b6bc86de38f5 100644 --- a/sys/dev/virtio/cmdlinevar.h +++ b/sys/dev/virtio/cmdlinevar.h @@ -2,6 +2,5 @@ struct cmdline_attach_args { bus_space_tag_t memt; bus_dma_tag_t dmat; - int sc_irq; }; diff --git a/sys/dev/virtio/files.virtio b/sys/dev/virtio/files.virtio index 9ce6c7011ac1d..b8e513890fa6a 100644 --- a/sys/dev/virtio/files.virtio +++ b/sys/dev/virtio/files.virtio @@ -4,7 +4,6 @@ include "dev/pci/files.virtio" define cmdlinebus { } -#define dummybus { } file dev/virtio/virtio_mmio.c virtio_mmio @@ -12,6 +11,5 @@ device viocon attach viocon at virtio file dev/virtio/viocon.c viocon - attach virtio at cmdlinebus with mmio_cmdline: virtio_mmio file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index cfeaeccb49cc5..42dda98a4cde3 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -354,6 +354,7 @@ virtio_mmio_intr(void *arg) /* check and ack the interrupt */ isr = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_INTERRUPT_STATUS); + printf(">>> ISR: %d\n", isr); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_INTERRUPT_ACK, isr); if ((isr & VIRTIO_MMIO_INT_CONFIG) && @@ -361,6 +362,7 @@ virtio_mmio_intr(void *arg) r = (vsc->sc_config_change)(vsc); if ((isr & VIRTIO_MMIO_INT_VRING) && (vsc->sc_intrhand != NULL)) { + printf(">>> INTR???\n"); if (vsc->sc_soft_ih != NULL) softint_schedule(vsc->sc_soft_ih); else @@ -381,11 +383,9 @@ virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx) static int virtio_mmio_alloc_interrupts(struct virtio_softc *vsc) { - /* struct virtio_mmio_softc * const sc = (struct virtio_mmio_softc *)vsc; + struct virtio_mmio_softc * const sc = (struct virtio_mmio_softc *)vsc; return sc->sc_alloc_interrupts(sc); - */ - return 0; } static void diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 7fb2d6f7cc458..bf32f92911ce4 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -38,6 +38,8 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); #include #include +#include + #define VMMIOSTR "virtio_mmio.device=" static int virtio_mmio_cmdline_match(device_t, cfdata_t, void *); @@ -47,8 +49,16 @@ static int virtio_mmio_cmdline_rescan(device_t, const char *, const int *); static int virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *); static void virtio_mmio_cmdline_free_interrupts(struct virtio_mmio_softc *); +struct mmio_args { + uint64_t sz; + uint64_t baseaddr; + uint64_t irq; + uint64_t id; +}; + struct virtio_mmio_cmdline_softc { struct virtio_mmio_softc sc_msc; + struct mmio_args margs; }; CFATTACH_DECL3_NEW(mmio_cmdline, @@ -58,34 +68,32 @@ CFATTACH_DECL3_NEW(mmio_cmdline, virtio_mmio_cmdline_rescan, (void *)voidop, DVF_DETACH_SHUTDOWN); static void -parsearg(device_t self, struct virtio_mmio_softc *msc, const char *arg) +parsearg(device_t self, struct mmio_args *margs, const char *arg) { char *p; - uint64_t sz, baseaddr, irq, id; - int error; /* */ - sz = strtoull(arg, (char **)&p, 0); - if ((sz == 0) || (sz == ULLONG_MAX)) + margs->sz = strtoull(arg, (char **)&p, 0); + if ((margs->sz == 0) || (margs->sz == ULLONG_MAX)) goto bad; switch (*p) { case 'E': case 'e': - sz <<= 10; + margs->sz <<= 10; /* FALLTHROUGH */ case 'P': case 'p': - sz <<= 10; + margs->sz <<= 10; /* FALLTHROUGH */ case 'T': case 't': - sz <<= 10; + margs->sz <<= 10; /* FALLTHROUGH */ case 'G': case 'g': - sz <<= 10; + margs->sz <<= 10; /* FALLTHROUGH */ case 'M': case 'm': - sz <<= 10; + margs->sz <<= 10; /* FALLTHROUGH */ case 'K': case 'k': - sz <<= 10; + margs->sz <<= 10; p++; break; } @@ -93,39 +101,32 @@ parsearg(device_t self, struct virtio_mmio_softc *msc, const char *arg) /* @ */ if (*p++ != '@') goto bad; - baseaddr = strtoull(p, (char **)&p, 0); - if ((baseaddr == 0) || (baseaddr == ULLONG_MAX)) + margs->baseaddr = strtoull(p, (char **)&p, 0); + if ((margs->baseaddr == 0) || (margs->baseaddr == ULLONG_MAX)) goto bad; /* : */ if (*p++ != ':') goto bad; - irq = strtoull(p, (char **)&p, 0); - if ((irq == 0) || (irq == ULLONG_MAX)) + margs->irq = strtoull(p, (char **)&p, 0); + if ((margs->irq == 0) || (margs->irq == ULLONG_MAX)) goto bad; /* Optionally, : */ if (*p) { if (*p++ != ':') goto bad; - id = strtoull(p, (char **)&p, 0); - if ((id == 0) || (id == ULLONG_MAX)) + margs->id = strtoull(p, (char **)&p, 0); + if ((margs->id == 0) || (margs->id == ULLONG_MAX)) goto bad; } else { - id = 0; + margs->id = 0; } /* Should have reached the end of the string. */ if (*p) goto bad; - error = bus_space_map(msc->sc_iot, baseaddr, sz, 0, &msc->sc_ioh); - if (error) { - aprint_error_dev(self, "couldn't map %#" PRIx64 ": %d", - (uint64_t)baseaddr, error); - return; - } - return; bad: @@ -133,9 +134,12 @@ parsearg(device_t self, struct virtio_mmio_softc *msc, const char *arg) } static void -virtio_mmio_cmdline_parse(device_t self, struct virtio_mmio_softc *msc) +virtio_mmio_cmdline_parse(device_t self, struct virtio_mmio_cmdline_softc *sc) { + struct virtio_mmio_softc *const msc = &sc->sc_msc; + struct mmio_args *margs = &sc->margs; char *p, *v, cmdline[128]; + int error; strcpy(cmdline, xen_start_info.cmd_line); @@ -156,7 +160,20 @@ virtio_mmio_cmdline_parse(device_t self, struct virtio_mmio_softc *msc) if (*p) { p++; aprint_normal("\nviommio: %s", p); - parsearg(self, msc, p); + parsearg(self, margs, p); + + error = bus_space_map( + msc->sc_iot, margs->baseaddr, + margs->sz, 0, &msc->sc_ioh + ); + if (error) { + aprint_error_dev(self, + "couldn't map %#" PRIx64 ": %d", + (uint64_t)margs->baseaddr, error + ); + return; + } + } } } @@ -173,16 +190,17 @@ static void virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) { /* Attach function for device */ - struct virtio_mmio_cmdline_softc *csc = device_private(self); - struct virtio_mmio_softc *const msc = &csc->sc_msc; + struct virtio_mmio_cmdline_softc *sc = device_private(self); + struct virtio_mmio_softc *const msc = &sc->sc_msc; struct virtio_softc *const vsc = &msc->sc_sc; struct cmdline_attach_args *caa = aux; msc->sc_iot = caa->memt; vsc->sc_dev = self; vsc->sc_dmat = caa->dmat; + msc->sc_iosize = sc->margs.sz; - virtio_mmio_cmdline_parse(self, msc); + virtio_mmio_cmdline_parse(self, sc); aprint_normal("\n"); aprint_naive("\n"); @@ -229,15 +247,22 @@ virtio_mmio_cmdline_rescan(device_t self, const char *ifattr, const int *locs) static int virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) { - struct virtio_softc * const vsc = &msc->sc_sc; + struct virtio_mmio_cmdline_softc *const sc = + (struct virtio_mmio_cmdline_softc *)msc; + struct virtio_softc *const vsc = &msc->sc_sc; + struct pic *pic; + int pin = sc->margs.irq; + + pic = &i8259_pic; - msc->sc_ih = softint_establish(SOFTINT_BIO, (void *)virtio_mmio_intr, msc); + msc->sc_ih = intr_establish_xname(sc->margs.irq, pic, pin, IST_LEVEL, IPL_BIO, + virtio_mmio_intr, msc, true, device_xname(vsc->sc_dev)); if (msc->sc_ih == NULL) { aprint_error_dev(vsc->sc_dev, "failed to establish interrupt\n"); return -1; } - aprint_normal_dev(vsc->sc_dev, "interrupting on ??\n"); + aprint_normal_dev(vsc->sc_dev, "interrupting on %ld\n", sc->margs.irq); return 0; } @@ -246,7 +271,7 @@ static void virtio_mmio_cmdline_free_interrupts(struct virtio_mmio_softc *msc) { if (msc->sc_ih != NULL) { - softint_disestablish(msc->sc_ih); + intr_disestablish(msc->sc_ih); msc->sc_ih = NULL; } } From 37e57b3a0f464246611a005a350e01e25e091bfe Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 21 Dec 2023 09:53:49 +0100 Subject: [PATCH 032/114] feat: added Linux MPTable bug for ioapic to be seen --- sys/arch/x86/x86/mpbios.c | 55 +++++++++++++++++++++++++++- sys/dev/virtio/virtio_mmio.c | 1 + sys/dev/virtio/virtio_mmio_cmdline.c | 28 ++++++++++---- sys/kern/subr_disk_open.c | 2 + 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/sys/arch/x86/x86/mpbios.c b/sys/arch/x86/x86/mpbios.c index a3565a44251a5..6190d2f75fd0b 100644 --- a/sys/arch/x86/x86/mpbios.c +++ b/sys/arch/x86/x86/mpbios.c @@ -209,6 +209,9 @@ static void mpbios_int(const uint8_t *, int, struct mp_intr_map *); static const void *mpbios_map(paddr_t, int, struct mp_map *); static void mpbios_unmap(struct mp_map *); +#ifdef MPTABLE_LINUX_BUG_COMPAT +static uint16_t compute_entry_count(const uint8_t *, const uint8_t *); +#endif /* * globals to help us bounce our way through parsing the config table. */ @@ -278,7 +281,6 @@ mpbios_unmap(struct mp_map *handle) pmap_update(pmap_kernel()); uvm_km_free(kernel_map, handle->baseva, handle->vsize, UVM_KMF_VAONLY); } - /* * Look for an Intel MP spec table, indicating SMP capable hardware. */ @@ -333,6 +335,17 @@ mpbios_probe(device_t self) if (mp_fps != NULL) goto found; + /* + * Linux assumes that it always has 640 kB of base memory and + * searches for the MP table at 639k regardless of whether that + * address is present in the system memory map. Some VM systems + * rely on this buggy behaviour. + */ + mp_fps = mpbios_search(self, 639 * 1024, 1024 / 4, &mp_fp_map); + if (mp_fps != NULL) + goto found; + + /* nothing found */ return 0; @@ -376,6 +389,7 @@ mpbios_probe(device_t self) "MP Configuration Table checksum mismatch\n"); goto err; } + return 10; err: if (mp_fps) { @@ -533,6 +547,31 @@ static const uint8_t dflt_lint_tab[2] = { }; +#ifdef MPTABLE_LINUX_BUG_COMPAT +/* Compute the correct entry_count value. */ +static uint16_t +compute_entry_count(const uint8_t *entry, const uint8_t *end) +{ + size_t nentries = 0; + + while (entry < end) { + switch (*entry) { + case MPS_MCT_CPU: + case MPS_MCT_BUS: + case MPS_MCT_IOAPIC: + case MPS_MCT_IOINT: + case MPS_MCT_LINT: + break; + default: + panic("%s: Unknown MP Config Entry %d\n", __func__, + (int)*entry); + } + entry += mp_conf[*entry].length;; + nentries++; + } + return (uint16_t)(nentries); +} +#endif /* * 1st pass on BIOS's Intel MP specification table. * @@ -557,6 +596,9 @@ mpbios_scan(device_t self, int *ncpup) int intr_cnt, cur_intr; #if NLAPIC > 0 paddr_t lapic_base; +#endif +#ifdef MPTABLE_LINUX_BUG_COMPAT + uint16_t countfix = 0; #endif const struct dflt_conf_entry *dflt_conf; const int *dflt_bus_irq; @@ -677,6 +719,13 @@ mpbios_scan(device_t self, int *ncpup) position += sizeof(*mp_cth); count = mp_cth->entry_count; +#ifdef MPTABLE_LINUX_BUG_COMPAT + if (count == 0) { + /* count the correct entry_count */ + countfix = compute_entry_count(position, end); + count = countfix; + } +#endif intr_cnt = 0; while ((count--) && (position < end)) { @@ -721,6 +770,10 @@ mpbios_scan(device_t self, int *ncpup) /* re-walk the table, recording info of interest */ position = (const uint8_t *)mp_cth + sizeof(*mp_cth); count = mp_cth->entry_count; +#ifdef MPTABLE_LINUX_BUG_COMPAT + if (count == 0) + count = countfix; +#endif cur_intr = 0; while ((count--) && (position < end)) { diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index 42dda98a4cde3..771d129a8b1dd 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -351,6 +351,7 @@ virtio_mmio_intr(void *arg) struct virtio_softc *vsc = &sc->sc_sc; int isr, r = 0; + printf(">>> TRIGGERED!!\n"); /* check and ack the interrupt */ isr = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_INTERRUPT_STATUS); diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index bf32f92911ce4..cbca6ed7f44ce 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -38,7 +38,8 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); #include #include -#include +#include +#include "ioapic.h" #define VMMIOSTR "virtio_mmio.device=" @@ -250,19 +251,30 @@ virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) struct virtio_mmio_cmdline_softc *const sc = (struct virtio_mmio_cmdline_softc *)msc; struct virtio_softc *const vsc = &msc->sc_sc; + struct ioapic_softc *ioapic; struct pic *pic; - int pin = sc->margs.irq; - - pic = &i8259_pic; - - msc->sc_ih = intr_establish_xname(sc->margs.irq, pic, pin, IST_LEVEL, IPL_BIO, - virtio_mmio_intr, msc, true, device_xname(vsc->sc_dev)); + int irq = sc->margs.irq; + int pin = irq; + + /* ioapic = ioapic_find_bybase(irq);*/ + ioapic = ioapic_find_bybase(irq); + + if (ioapic != NULL) { + KASSERT(ioapic->sc_pic.pic_type == PIC_IOAPIC); + pic = &ioapic->sc_pic; + pin = irq - pic->pic_vecbase; + irq = -1; + } else + pic = &i8259_pic; + + msc->sc_ih = intr_establish_xname(irq, pic, pin, IST_EDGE, IPL_BIO, + virtio_mmio_intr, msc, false, device_xname(vsc->sc_dev)); if (msc->sc_ih == NULL) { aprint_error_dev(vsc->sc_dev, "failed to establish interrupt\n"); return -1; } - aprint_normal_dev(vsc->sc_dev, "interrupting on %ld\n", sc->margs.irq); + aprint_normal_dev(vsc->sc_dev, "interrupting on %d\n", irq); return 0; } diff --git a/sys/kern/subr_disk_open.c b/sys/kern/subr_disk_open.c index 2f5b71fe2a37b..0af080aacd55f 100644 --- a/sys/kern/subr_disk_open.c +++ b/sys/kern/subr_disk_open.c @@ -66,7 +66,9 @@ opendisk(device_t dv) panic("%s: can't alloc vnode for %s", __func__, device_xname(dv)); vn_lock(tmpvn, LK_EXCLUSIVE | LK_RETRY); + printf(">>> BEFORE\n"); error = VOP_OPEN(tmpvn, FREAD | FSILENT, NOCRED); + printf(">>> AFTER\n"); if (error) { /* * Ignore errors caused by missing device, partition, From 93b807fddcf430a6f186c23ac161f1bae342975d Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 21 Dec 2023 14:44:17 +0100 Subject: [PATCH 033/114] panic on pci-virtio --- sys/dev/pci/virtio.c | 3 ++- sys/kern/subr_disk_open.c | 2 +- sys/kern/vnode_if.c | 1 + sys/ufs/ufs/ufs_vnops.c | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sys/dev/pci/virtio.c b/sys/dev/pci/virtio.c index e499cd27d724f..8e55c64f2e0ea 100644 --- a/sys/dev/pci/virtio.c +++ b/sys/dev/pci/virtio.c @@ -74,7 +74,8 @@ static void virtio_reset_vq(struct virtio_softc *, void virtio_set_status(struct virtio_softc *sc, int status) { - sc->sc_ops->set_status(sc, status); + if (sc->sc_ops->set_status) + sc->sc_ops->set_status(sc, status); } /* diff --git a/sys/kern/subr_disk_open.c b/sys/kern/subr_disk_open.c index 0af080aacd55f..09ddce9330a92 100644 --- a/sys/kern/subr_disk_open.c +++ b/sys/kern/subr_disk_open.c @@ -66,7 +66,7 @@ opendisk(device_t dv) panic("%s: can't alloc vnode for %s", __func__, device_xname(dv)); vn_lock(tmpvn, LK_EXCLUSIVE | LK_RETRY); - printf(">>> BEFORE\n"); + printf(">>> BEFORE: %s\n", device_xname(dv)); error = VOP_OPEN(tmpvn, FREAD | FSILENT, NOCRED); printf(">>> AFTER\n"); if (error) { diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c index bc8d16a8e5d0e..b2f3cff645962 100644 --- a/sys/kern/vnode_if.c +++ b/sys/kern/vnode_if.c @@ -566,6 +566,7 @@ VOP_OPEN(struct vnode *vp, error = vop_pre(vp, &mp, &mpsafe, FST_NO); if (error) return error; + printf("VTYPE: %s\n", mp->mnt_stat.f_fstypename); error = (VCALL(vp, VOFFSET(vop_open), &a)); vop_post(vp, mp, mpsafe, FST_NO); vop_open_post(&a, error); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 3352132ac7a53..a4c1885b4f0d3 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -229,6 +229,7 @@ ufs_open(void *v) kauth_cred_t a_cred; } */ *ap = v; + /* * Files marked append-only must be opened for appending. */ From ca1d7ab5935f09b551d4fffd21660d39d65b4632 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 21 Dec 2023 14:49:40 +0100 Subject: [PATCH 034/114] fix: wrongly removed dma64_tag --- sys/arch/amd64/amd64/amd64_mainbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index 77b77e16f08f5..d118ef8bf16a8 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -201,6 +201,7 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) mba.mba_pba.pba_iot = x86_bus_space_io; mba.mba_pba.pba_memt = x86_bus_space_mem; mba.mba_pba.pba_dmat = &pci_bus_dma_tag; + mba.mba_pba.pba_dmat64 = &pci_bus_dma64_tag; mba.mba_pba.pba_pc = NULL; mba.mba_pba.pba_flags = PCI_FLAGS_IO_OKAY | PCI_FLAGS_MEM_OKAY | From 7e84a05563901870f131c57e39af22c7373766ca Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 26 Dec 2023 12:25:34 +0100 Subject: [PATCH 035/114] fix: IRQ triggered, block device recognized. Cleanup --- sys/dev/virtio/cmdlinevar.h | 1 + sys/dev/virtio/virtio_mmio.c | 123 +++++++++++---------------- sys/dev/virtio/virtio_mmio_cmdline.c | 3 +- sys/kern/subr_disk.c | 2 +- sys/kern/subr_disk_open.c | 2 - sys/kern/vnode_if.c | 1 - 6 files changed, 51 insertions(+), 81 deletions(-) diff --git a/sys/dev/virtio/cmdlinevar.h b/sys/dev/virtio/cmdlinevar.h index 4b6bc86de38f5..443a67790e62e 100644 --- a/sys/dev/virtio/cmdlinevar.h +++ b/sys/dev/virtio/cmdlinevar.h @@ -2,5 +2,6 @@ struct cmdline_attach_args { bus_space_tag_t memt; bus_dma_tag_t dmat; + bus_dma_tag_t dmat64; }; diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index 771d129a8b1dd..1ec23fdfa0be7 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -41,6 +41,8 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") #include #include +#include + #define VIRTIO_MMIO_MAGIC ('v' | 'i' << 8 | 'r' << 16 | 't' << 24) #define VIRTIO_MMIO_MAGIC_VALUE 0x000 @@ -128,6 +130,7 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, uint16_t idx, uint64_t addr) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; struct virtqueue *vqs = sc->sc_sc.sc_vqs; + paddr_t paddr; bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM, bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM_MAX)); @@ -139,20 +142,23 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, uint16_t idx, uint64_t addr) bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_PFN, addr / VIRTIO_PAGE_SIZE); } else { + paddr = vtophys((vaddr_t)vqs->vq_desc); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_LOW, - addr); + paddr); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_HIGH, - ((uint64_t)addr >> 32)); + ((uint64_t)paddr >> 32)); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_LOW, - addr + vqs[idx].vq_availoffset); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_HIGH, - ((uint64_t)(addr+vqs[idx].vq_availoffset) >> 32)); + paddr = vtophys((vaddr_t)vqs->vq_avail); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_LOW, + paddr); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_HIGH, + ((uint64_t)(paddr) >> 32)); + paddr = vtophys((vaddr_t)vqs->vq_used); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_LOW, - addr + vqs[idx].vq_usedoffset); + paddr); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_HIGH, - ((uint64_t)(addr+vqs[idx].vq_usedoffset) >> 32)); + ((uint64_t)(paddr) >> 32)); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_READY, 1); } @@ -266,79 +272,49 @@ virtio_mmio_common_detach(struct virtio_mmio_softc *sc, int flags) return 0; } -static uint64_t -virtio_filter_transport_features(uint64_t features) -{ - uint64_t transport, mask; - - transport = (1ULL << - (VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START)) - 1; - transport <<= VIRTIO_TRANSPORT_F_START; - - mask = -1ULL & ~transport; - mask |= VIRTIO_RING_F_INDIRECT_DESC; - mask |= VIRTIO_RING_F_EVENT_IDX; - mask |= VIRTIO_F_VERSION_1; - - return (features & mask); -} /* * Feature negotiation. */ static void virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t - child_features) + guest_features) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - uint64_t host_features, features; - int status; - - if (sc->mmio_version > 1) - child_features |= VIRTIO_F_VERSION_1; - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_HOST_FEATURES_SEL, 1); - host_features = bus_space_read_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_HOST_FEATURES); - host_features <<= 32; - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_HOST_FEATURES_SEL, 0); - host_features |= bus_space_read_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_HOST_FEATURES); - - /* - * Limit negotiated features to what the driver, virtqueue, and - * host all support. - */ - features = host_features & child_features; - features = virtio_filter_transport_features(features); - vsc->sc_active_features = features; - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_FEATURES_SEL, 1); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_FEATURES, features >> 32); - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_FEATURES_SEL, 0); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_FEATURES, features); - - if (sc->mmio_version > 1) { - /* - * Must re-read the status after setting it to verify the - * negotiated features were accepted by the device. - */ - /* https://twitter.com/cperciva/status/1548447423436967936 */ - virtio_mmio_set_status(vsc, VIRTIO_CONFIG_S_FEATURES_OK); - - status = virtio_mmio_get_status(vsc); - if ((status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { - aprint_error_dev(vsc->sc_dev, + uint64_t host, negotiated, device_status; + + guest_features |= VIRTIO_F_VERSION_1; +#ifdef __NEED_VIRTIO_F_ACCESS_PLATFORM + /* XXX This could use some work. */ + guest_features |= VIRTIO_F_ACCESS_PLATFORM; +#endif /* __NEED_VIRTIO_F_ACCESS_PLATFORM */ + /* notify on empty is 0.9 only */ + guest_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY; + vsc->sc_active_features = 0; + + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES_SEL, 0); + host = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES_SEL, 1); + host |= (uint64_t) + bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES) << 32; + + negotiated = host & guest_features; + + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES_SEL, 0); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES, + negotiated & 0xffffffff); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES_SEL, 1); + bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES, + negotiated >> 32); + virtio_mmio_set_status(vsc, VIRTIO_CONFIG_S_FEATURES_OK); + + device_status = virtio_mmio_get_status(vsc); + if ((device_status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { + aprint_error_dev(vsc->sc_dev, "desired features were not accepted\n"); - } } + + vsc->sc_active_features = negotiated; + } /* @@ -351,11 +327,9 @@ virtio_mmio_intr(void *arg) struct virtio_softc *vsc = &sc->sc_sc; int isr, r = 0; - printf(">>> TRIGGERED!!\n"); /* check and ack the interrupt */ isr = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_INTERRUPT_STATUS); - printf(">>> ISR: %d\n", isr); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_INTERRUPT_ACK, isr); if ((isr & VIRTIO_MMIO_INT_CONFIG) && @@ -363,7 +337,6 @@ virtio_mmio_intr(void *arg) r = (vsc->sc_config_change)(vsc); if ((isr & VIRTIO_MMIO_INT_VRING) && (vsc->sc_intrhand != NULL)) { - printf(">>> INTR???\n"); if (vsc->sc_soft_ih != NULL) softint_schedule(vsc->sc_soft_ih); else diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index cbca6ed7f44ce..1d5f6ce0d7d22 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -256,7 +256,6 @@ virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) int irq = sc->margs.irq; int pin = irq; - /* ioapic = ioapic_find_bybase(irq);*/ ioapic = ioapic_find_bybase(irq); if (ioapic != NULL) { @@ -267,7 +266,7 @@ virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) } else pic = &i8259_pic; - msc->sc_ih = intr_establish_xname(irq, pic, pin, IST_EDGE, IPL_BIO, + msc->sc_ih = intr_establish_xname(irq, pic, pin, IST_LEVEL, IPL_BIO, virtio_mmio_intr, msc, false, device_xname(vsc->sc_dev)); if (msc->sc_ih == NULL) { aprint_error_dev(vsc->sc_dev, diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index db79240ec805d..5033c4a920578 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -417,7 +417,6 @@ int disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp, struct buf *bp, unsigned int sector, int count) { - if ((lp->d_secsize / DEV_BSIZE) == 0 || lp->d_secpercyl == 0) return EINVAL; @@ -427,6 +426,7 @@ disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp, bp->b_oflags &= ~BO_DONE; bp->b_cylinder = sector / lp->d_secpercyl; (*strat)(bp); + return biowait(bp); } diff --git a/sys/kern/subr_disk_open.c b/sys/kern/subr_disk_open.c index 09ddce9330a92..2f5b71fe2a37b 100644 --- a/sys/kern/subr_disk_open.c +++ b/sys/kern/subr_disk_open.c @@ -66,9 +66,7 @@ opendisk(device_t dv) panic("%s: can't alloc vnode for %s", __func__, device_xname(dv)); vn_lock(tmpvn, LK_EXCLUSIVE | LK_RETRY); - printf(">>> BEFORE: %s\n", device_xname(dv)); error = VOP_OPEN(tmpvn, FREAD | FSILENT, NOCRED); - printf(">>> AFTER\n"); if (error) { /* * Ignore errors caused by missing device, partition, diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c index b2f3cff645962..bc8d16a8e5d0e 100644 --- a/sys/kern/vnode_if.c +++ b/sys/kern/vnode_if.c @@ -566,7 +566,6 @@ VOP_OPEN(struct vnode *vp, error = vop_pre(vp, &mp, &mpsafe, FST_NO); if (error) return error; - printf("VTYPE: %s\n", mp->mnt_stat.f_fstypename); error = (VCALL(vp, VOFFSET(vop_open), &a)); vop_post(vp, mp, mpsafe, FST_NO); vop_open_post(&a, error); From 53cd71a4326560f070b84eeef46ece8bd0091a1b Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 26 Dec 2023 12:35:45 +0100 Subject: [PATCH 036/114] chores: cleanup --- sys/arch/x86/x86/mpbios.c | 2 +- sys/dev/virtio/virtio_mmio_cmdline.c | 2 +- sys/kern/subr_disk.c | 2 +- sys/ufs/ufs/ufs_vnops.c | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sys/arch/x86/x86/mpbios.c b/sys/arch/x86/x86/mpbios.c index 6190d2f75fd0b..fa576513258db 100644 --- a/sys/arch/x86/x86/mpbios.c +++ b/sys/arch/x86/x86/mpbios.c @@ -553,7 +553,7 @@ static uint16_t compute_entry_count(const uint8_t *entry, const uint8_t *end) { size_t nentries = 0; - + while (entry < end) { switch (*entry) { case MPS_MCT_CPU: diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 1d5f6ce0d7d22..f47c90c8bb810 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -170,7 +170,7 @@ virtio_mmio_cmdline_parse(device_t self, struct virtio_mmio_cmdline_softc *sc) if (error) { aprint_error_dev(self, "couldn't map %#" PRIx64 ": %d", - (uint64_t)margs->baseaddr, error + (uint64_t)margs->baseaddr, error ); return; } diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index 5033c4a920578..db79240ec805d 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -417,6 +417,7 @@ int disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp, struct buf *bp, unsigned int sector, int count) { + if ((lp->d_secsize / DEV_BSIZE) == 0 || lp->d_secpercyl == 0) return EINVAL; @@ -426,7 +427,6 @@ disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp, bp->b_oflags &= ~BO_DONE; bp->b_cylinder = sector / lp->d_secpercyl; (*strat)(bp); - return biowait(bp); } diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index a4c1885b4f0d3..3352132ac7a53 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -229,7 +229,6 @@ ufs_open(void *v) kauth_cred_t a_cred; } */ *ap = v; - /* * Files marked append-only must be opened for appending. */ From 43463263659577c7f270eac8da6a326e0c3558a7 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 28 Dec 2023 12:05:10 +0100 Subject: [PATCH 037/114] fix: works on qemu/mmio --- sys/dev/virtio/virtio_mmio_cmdline.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index f47c90c8bb810..65be5895eeb06 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -197,9 +197,16 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) struct cmdline_attach_args *caa = aux; msc->sc_iot = caa->memt; - vsc->sc_dev = self; - vsc->sc_dmat = caa->dmat; msc->sc_iosize = sc->margs.sz; + vsc->sc_dev = self; + + if (BUS_DMA_TAG_VALID(caa->dmat64)) { + aprint_verbose(": using 64-bit DMA"); + vsc->sc_dmat = caa->dmat64; + } else { + aprint_verbose(": using 32-bit DMA"); + vsc->sc_dmat = caa->dmat; + } virtio_mmio_cmdline_parse(self, sc); @@ -255,6 +262,7 @@ virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) struct pic *pic; int irq = sc->margs.irq; int pin = irq; + bool mpsafe; ioapic = ioapic_find_bybase(irq); @@ -266,8 +274,10 @@ virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *msc) } else pic = &i8259_pic; - msc->sc_ih = intr_establish_xname(irq, pic, pin, IST_LEVEL, IPL_BIO, - virtio_mmio_intr, msc, false, device_xname(vsc->sc_dev)); + mpsafe = (0 != (vsc->sc_flags & VIRTIO_F_INTR_MPSAFE)); + + msc->sc_ih = intr_establish_xname(irq, pic, pin, IST_LEVEL, vsc->sc_ipl, + virtio_mmio_intr, msc, mpsafe, device_xname(vsc->sc_dev)); if (msc->sc_ih == NULL) { aprint_error_dev(vsc->sc_dev, "failed to establish interrupt\n"); From 2bc2c08f890b1841b03248ab47502cb75a69e426 Mon Sep 17 00:00:00 2001 From: Gregory 'GaLi' Cavelier Date: Thu, 28 Dec 2023 18:40:46 +0100 Subject: [PATCH 038/114] fix: call ksyms_addsyms_elf() with symsize=0 because we don't have access to the ELF header --- sys/arch/amd64/amd64/machdep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index d932e04c46e28..f1c8f5c66a6bd 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1525,8 +1525,10 @@ init_x86_64_ksyms(void) } else { uintptr_t endp = (uintptr_t)(void *)&end; - ksyms_addsyms_elf(*(long *)endp, - ((long *)endp) + 1, esym); + if (vm_guest == VM_GUEST_GENPVH) + ksyms_addsyms_elf(0, ((long *)endp) + 1, esym); + else + ksyms_addsyms_elf(*(long *)endp, ((long *)endp) + 1, esym); } #endif } From c09440be5aca7e16ce845c3ccbdfb47bac03fb63 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 31 Dec 2023 11:02:43 +0100 Subject: [PATCH 039/114] feat: added pvclock (kvm_clock) support --- sys/arch/x86/conf/files.x86 | 4 + sys/arch/x86/x86/pvclock.c | 235 ++++++++++++++++++++++++++++++++++++ sys/arch/x86/x86/pvreg.h | 118 ++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 sys/arch/x86/x86/pvclock.c create mode 100644 sys/arch/x86/x86/pvreg.h diff --git a/sys/arch/x86/conf/files.x86 b/sys/arch/x86/conf/files.x86 index e6c48c1e48b20..dd7e85f1aad8a 100644 --- a/sys/arch/x86/conf/files.x86 +++ b/sys/arch/x86/conf/files.x86 @@ -81,6 +81,10 @@ device hyperv attach hyperv at cpufeaturebus file arch/x86/x86/hyperv.c hyperv needs-flag +device pvclock +attach pvclock at cpufeaturebus +file arch/x86/x86/pvclock.c pvclock + file arch/x86/x86/apic.c ioapic | lapic file arch/x86/x86/bus_dma.c machdep file arch/x86/x86/bus_space.c machdep diff --git a/sys/arch/x86/x86/pvclock.c b/sys/arch/x86/x86/pvclock.c new file mode 100644 index 0000000000000..3dae196044a4b --- /dev/null +++ b/sys/arch/x86/x86/pvclock.c @@ -0,0 +1,235 @@ +/* $OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $ */ + +/* + * Copyright (c) 2018 Reyk Floeter + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if !defined(__i386__) && !defined(__amd64__) +#error pvclock(4) is only supported on i386 and amd64 +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +uint pvclock_lastcount; + +struct pvclock_softc { + device_t sc_dev; + void *sc_time; + paddr_t sc_paddr; + struct timecounter *sc_tc; +}; + +static int pvclock_match(device_t, cfdata_t, void *); +static void pvclock_attach(device_t, device_t, void *); +static int pvclock_activate(device_t, devact_t); + +/* +void pvclock_read_time_info(struct pvclock_softc *, + struct pvclock_time_info *); +*/ +static inline uint32_t + pvclock_read_begin(const struct pvclock_time_info *); +static inline int + pvclock_read_done(const struct pvclock_time_info *, uint32_t); +static uint + pvclock_get_timecount(struct timecounter *); + +struct timecounter pvclock_timecounter = { + .tc_get_timecount = pvclock_get_timecount, + .tc_counter_mask = ~0u, + .tc_frequency = 0, + .tc_name = NULL, + .tc_quality = -2000, + .tc_priv = NULL, +}; + +CFATTACH_DECL_NEW(pvclock, sizeof(struct pvclock_softc), + pvclock_match, + pvclock_attach, + NULL /* detach */, + pvclock_activate); + + +static int +pvclock_match(device_t parent, cfdata_t cf, void *aux) +{ + u_int regs[6]; + /* + * pvclock is provided by different hypervisors, we currently + * only support the "kvmclock". + */ + x86_cpuid(0x40000000 + CPUID_OFFSET_KVM_FEATURES, regs); + /* + * We only implement support for the 2nd version of pvclock. + * The first version is basically the same but with different + * non-standard MSRs and it is deprecated. + */ + if ((regs[0] & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) + return (0); + + /* + * Only the "stable" clock with a sync'ed TSC is supported. + * In this case the host guarantees that the TSC is constant + * and invariant, either by the underlying TSC or by passing + * on a synchronized value. + */ + if ((regs[0] & + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) + return (0); + + return (1); +} + +static inline uint32_t +pvclock_read_begin(const struct pvclock_time_info *ti) +{ + uint32_t ti_version = ti->ti_version & ~0x1; + virtio_membar_sync(); + return (ti_version); +} + +static inline int +pvclock_read_done(const struct pvclock_time_info *ti, + uint32_t ti_version) +{ + virtio_membar_sync(); + return (ti->ti_version == ti_version); +} + +static uint +pvclock_get_timecount(struct timecounter *tc) +{ + struct pvclock_softc *sc = tc->tc_priv; + struct pvclock_time_info *ti; + uint64_t tsc_timestamp, system_time, delta, ctr; + uint32_t ti_version, mul_frac; + int8_t shift; + uint8_t flags; + + ti = sc->sc_time; + do { + ti_version = pvclock_read_begin(ti); + system_time = ti->ti_system_time; + tsc_timestamp = ti->ti_tsc_timestamp; + mul_frac = ti->ti_tsc_to_system_mul; + shift = ti->ti_tsc_shift; + flags = ti->ti_flags; + } while (!pvclock_read_done(ti, ti_version)); + + /* + * The algorithm is described in + * linux/Documentation/virtual/kvm/msr.txt + */ + delta = rdtsc() - tsc_timestamp; + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + ctr = ((delta * mul_frac) >> 32) + system_time; + + if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) + return (ctr); + + if (ctr < pvclock_lastcount) + return (pvclock_lastcount); + + atomic_swap_uint(&pvclock_lastcount, ctr); + + return (ctr); +} + +static void +pvclock_attach(device_t parent, device_t self, void *aux) +{ + struct pvclock_softc *sc = device_private(self); + struct pvclock_time_info *ti; + paddr_t pa; + uint32_t ti_version; + uint8_t flags; + + if ((sc->sc_time = (void *)uvm_km_alloc(kernel_map, + PAGE_SIZE, PAGE_SIZE, + UVM_KMF_WIRED | UVM_KMF_ZERO)) == NULL) { + aprint_error("time page allocation failed\n"); + return; + } + + if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { + aprint_error("time page PA extraction failed\n"); + uvm_km_free(kernel_map, (vaddr_t)sc->sc_time, + PAGE_SIZE, UVM_KMF_WIRED); + sc->sc_time = NULL; + return; + } + + wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); + sc->sc_paddr = pa; + + sc->sc_dev = self; + + ti = sc->sc_time; + do { + ti_version = pvclock_read_begin(ti); + flags = ti->ti_flags; + } while (!pvclock_read_done(ti, ti_version)); + + sc->sc_tc = &pvclock_timecounter; + sc->sc_tc->tc_name = device_xname(sc->sc_dev); + + sc->sc_tc->tc_frequency = 1000000000ULL; + sc->sc_tc->tc_priv = sc; + + pvclock_lastcount = 0; + + /* Better than HPET but below TSC */ + sc->sc_tc->tc_quality = 1500; + + if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { + /* if tsc is not stable, set a lower priority */ + /* Better than i8254 but below HPET */ + sc->sc_tc->tc_quality = 500; + } + + tc_init(sc->sc_tc); +} + +int +pvclock_activate(device_t self, devact_t act) +{ + struct pvclock_softc *sc = (struct pvclock_softc *)self; + int rv = 0; + paddr_t pa = sc->sc_paddr; + + switch (act) { + case DVACT_DEACTIVATE: + wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); + break; + } + + return (rv); +} diff --git a/sys/arch/x86/x86/pvreg.h b/sys/arch/x86/x86/pvreg.h new file mode 100644 index 0000000000000..c5c60ce0e8f85 --- /dev/null +++ b/sys/arch/x86/x86/pvreg.h @@ -0,0 +1,118 @@ +/* $OpenBSD: pvreg.h,v 1.6 2019/05/13 15:40:34 pd Exp $ */ + +/* + * Copyright (c) 2015 Reyk Floeter + * Copyright (c) 2015 Stefan Fritsch + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _DEV_PV_PVBUS_H_ +#define _DEV_PV_PVBUS_H_ + +#define CPUID_HV_SIGNATURE_START 0x40000000 +#define CPUID_HV_SIGNATURE_END 0x40010000 +#define CPUID_HV_SIGNATURE_STEP 0x100 +#define CPUID_HV_SIGNATURE_STRLEN 12 + +/* + * KVM + */ +#define CPUID_OFFSET_KVM_FEATURES 0x1 + +#define KVM_FEATURE_CLOCKSOURCE 0 /* deprecated */ +#define KVM_FEATURE_NOP_IO_DELAY 1 +#define KVM_FEATURE_MMU_OP 2 /* deprecated */ +#define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_ASYNC_PF 4 +#define KVM_FEATURE_STEAL_TIME 5 +#define KVM_FEATURE_PV_EOI 6 +#define KVM_FEATURE_PV_UNHALT 7 +#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 + +#define KVM_MSR_EOI_EN 0x4b564d04 +#define KVM_PV_EOI_BIT 0 + +#define KVM_MSR_WALL_CLOCK 0x4b564d00 +#define KVM_MSR_SYSTEM_TIME 0x4b564d01 + +struct pvclock_wall_clock { + uint32_t wc_version; + uint32_t wc_sec; + uint32_t wc_nsec; +} __packed; + +struct pvclock_time_info { + uint32_t ti_version; + uint32_t ti_pad0; + uint64_t ti_tsc_timestamp; + uint64_t ti_system_time; + uint32_t ti_tsc_to_system_mul; + int8_t ti_tsc_shift; + uint8_t ti_flags; + uint8_t ti_pad[2]; +} __packed; + +#define PVCLOCK_FLAG_TSC_STABLE 0x01 +#define PVCLOCK_SYSTEM_TIME_ENABLE 0x01 + +/* + * Hyper-V + */ +#define CPUID_OFFSET_HYPERV_INTERFACE 0x1 +#define CPUID_OFFSET_HYPERV_VERSION 0x2 +#define CPUID_OFFSET_HYPERV_FEATURES 0x3 +#define CPUID_OFFSET_HYPERV_ENLIGHTENMENT_INFO 0x4 +#define CPUID_OFFSET_HYPERV_IMPL_LIMITS 0x5 + +#define HYPERV_VERSION_EAX_BUILD_NUMBER 0 +#define HYPERV_VERSION_EBX_MAJOR_M 0xffff0000 +#define HYPERV_VERSION_EBX_MAJOR_S 16 +#define HYPERV_VERSION_EBX_MINOR_M 0x0000ffff +#define HYPERV_VERSION_EBX_MINOR_S 0 +#define HYPERV_VERSION_ECX_SERVICE_PACK 0 +#define HYPERV_VERSION_EDX_SERVICE_BRANCH_M 0xff000000 +#define HYPERV_VERSION_EDX_SERVICE_BRANCH_S 24 +#define HYPERV_VERSION_EDX_SERVICE_NUMBER_M 0x00ffffff +#define HYPERV_VERSION_EDX_SERVICE_NUMBER_S 0 + +#define HYPERV_VERSION_WS2008 0x00060000 +#define HYPERV_VERSION_WIN7 0x00060001 +#define HYPERV_VERSION_WIN8 0x00060002 +#define HYPERV_VERSION_WIN8_1 0x00060003 +#define HYPERV_VERSION_WIN10 0x00100000 + +#define HYPERV_FEATURE_EAX_VP_RUNTIME 0 +#define HYPERV_FEATURE_EAX_TIME_REF_COUNT 1 +#define HYPERV_FEATURE_EAX_SYNIC 2 +#define HYPERV_FEATURE_EAX_STIMER 3 +#define HYPERV_FEATURE_EAX_APIC 4 +#define HYPERV_FEATURE_EAX_HYPERCALL 5 +#define HYPERV_FEATURE_EAX_VP_INDEX 6 +#define HYPERV_FEATURE_EAX_MSR_RESET 7 +#define HYPERV_FEATURE_EAX_STATS_PAGES 8 +#define HYPERV_FEATURE_EAX_REF_TSC 9 +#define HYPERV_FEATURE_EAX_GUEST_IDLE 10 +#define HYPERV_FEATURE_EAX_TIMER_FREQ 11 +#define HYPERV_FEATURE_EAX_DEBUG 12 + +/* + * Xen + */ +#define CPUID_OFFSET_XEN_VERSION 0x1 +#define CPUID_OFFSET_XEN_HYPERCALL 0x2 + +#define XEN_VERSION_MAJOR_S 16 +#define XEN_VERSION_MINOR_M 0xffff + +#endif /* _DEV_PV_PVBUS_H_ */ From 74b5c3a42c3124139a34a7a5aec70c42406beca5 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 3 Jan 2024 06:38:23 +0100 Subject: [PATCH 040/114] sync thorpej@ mmio changes --- sys/dev/virtio/virtio_mmio.c | 1 + sys/dev/virtio/virtio_mmiovar.h | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index 1ec23fdfa0be7..9f14fdf8fafc2 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -377,3 +377,4 @@ virtio_mmio_setup_interrupts(struct virtio_softc *vsc __unused, return 0; } + diff --git a/sys/dev/virtio/virtio_mmiovar.h b/sys/dev/virtio/virtio_mmiovar.h index 8fb267cba7edb..896915d345baf 100644 --- a/sys/dev/virtio/virtio_mmiovar.h +++ b/sys/dev/virtio/virtio_mmiovar.h @@ -1,4 +1,4 @@ -/* $NetBSD: virtio_mmiovar.h,v 1.5 2021/10/22 02:57:23 yamaguchi Exp $ */ +/* $NetBSD: virtio_mmiovar.h,v 1.6 2024/01/02 07:24:50 thorpej Exp $ */ /* * Copyright (c) 2018 Jonathan A. Kollasch * All rights reserved. @@ -36,8 +36,7 @@ struct virtio_mmio_softc { bus_space_tag_t sc_iot; bus_space_handle_t sc_ioh; bus_size_t sc_iosize; - - int mmio_version; + bool sc_le_regs; void *sc_ih; From 35590010c0d2513886ae4aba6b92412d3882020a Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 3 Jan 2024 06:58:23 +0100 Subject: [PATCH 041/114] sync with GENPVH --- sys/dev/virtio/virtio_mmio.c | 301 ++++++++++++++++++++--------------- 1 file changed, 172 insertions(+), 129 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index 9f14fdf8fafc2..c737b8f7cacd2 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -1,6 +1,35 @@ -/* $NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $ */ +/* $NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp $ */ /* $OpenBSD: virtio_mmio.c,v 1.2 2017/02/24 17:12:31 patrick Exp $ */ +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /* * Copyright (c) 2014 Patrick Wildt * Copyright (c) 2012 Stefan Fritsch. @@ -29,7 +58,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp $"); #include #include @@ -39,9 +68,6 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") #define VIRTIO_PRIVATE #include -#include - -#include #define VIRTIO_MMIO_MAGIC ('v' | 'i' << 8 | 'r' << 16 | 't' << 24) @@ -49,28 +75,28 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") #define VIRTIO_MMIO_VERSION 0x004 #define VIRTIO_MMIO_DEVICE_ID 0x008 #define VIRTIO_MMIO_VENDOR_ID 0x00c -#define VIRTIO_MMIO_HOST_FEATURES 0x010 -#define VIRTIO_MMIO_HOST_FEATURES_SEL 0x014 -#define VIRTIO_MMIO_GUEST_FEATURES 0x020 -#define VIRTIO_MMIO_GUEST_FEATURES_SEL 0x024 -#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 /* "HostFeatures" in v1 */ +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 /* "HostFeaturesSel" in v1 */ +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 /* "GuestFeatures" in v1 */ +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 /* "GuestFeaturesSel" in v1 */ +#define VIRTIO_MMIO_V1_GUEST_PAGE_SIZE 0x028 #define VIRTIO_MMIO_QUEUE_SEL 0x030 #define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 #define VIRTIO_MMIO_QUEUE_NUM 0x038 -#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c -#define VIRTIO_MMIO_QUEUE_PFN 0x040 -#define VIRTIO_MMIO_QUEUE_READY 0x044 /* requires version 2 */ +#define VIRTIO_MMIO_V1_QUEUE_ALIGN 0x03c +#define VIRTIO_MMIO_V1_QUEUE_PFN 0x040 +#define VIRTIO_MMIO_QUEUE_READY 0x044 #define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 #define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 #define VIRTIO_MMIO_INTERRUPT_ACK 0x064 #define VIRTIO_MMIO_STATUS 0x070 -#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 /* requires version 2 */ -#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 /* requires version 2 */ -#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 /* requires version 2 */ -#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 /* requires version 2 */ -#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 /* requires version 2 */ -#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 /* requires version 2 */ -#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc /* requires version 2 */ +#define VIRTIO_MMIO_V2_QUEUE_DESC_LOW 0x080 +#define VIRTIO_MMIO_V2_QUEUE_DESC_HIGH 0x084 +#define VIRTIO_MMIO_V2_QUEUE_AVAIL_LOW 0x090 +#define VIRTIO_MMIO_V2_QUEUE_AVAIL_HIGH 0x094 +#define VIRTIO_MMIO_V2_QUEUE_USED_LOW 0x0a0 +#define VIRTIO_MMIO_V2_QUEUE_USED_HIGH 0x0a4 +#define VIRTIO_MMIO_V2_CONFIG_GEN 0x0fc #define VIRTIO_MMIO_CONFIG 0x100 #define VIRTIO_MMIO_INT_VRING (1 << 0) @@ -96,19 +122,59 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.11 2023/07/07 07:19:36 rin Exp $") static void virtio_mmio_kick(struct virtio_softc *, uint16_t); static uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t); -static void virtio_mmio_setup_queue(struct virtio_softc *, uint16_t, uint64_t); -static int virtio_mmio_get_status(struct virtio_softc *); +static void virtio_mmio_v1_setup_queue(struct virtio_softc *, uint16_t, uint64_t); +static void virtio_mmio_v2_setup_queue(struct virtio_softc *, uint16_t, uint64_t); static void virtio_mmio_set_status(struct virtio_softc *, int); static void virtio_mmio_negotiate_features(struct virtio_softc *, uint64_t); static int virtio_mmio_alloc_interrupts(struct virtio_softc *); static void virtio_mmio_free_interrupts(struct virtio_softc *); static int virtio_mmio_setup_interrupts(struct virtio_softc *, int); -static const struct virtio_ops virtio_mmio_ops = { +static uint32_t +virtio_mmio_reg_read(struct virtio_mmio_softc *sc, bus_addr_t reg) +{ + uint32_t val; + + val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, reg); + if (sc->sc_le_regs) { + val = le32toh(val); + } + return val; +} + +static void +virtio_mmio_reg_write(struct virtio_mmio_softc *sc, bus_addr_t reg, + uint32_t val) +{ + if (sc->sc_le_regs) { + val = htole32(val); + } + bus_space_write_4(sc->sc_iot, sc->sc_ioh, reg, val); +} + +static void +virtio_mmio_v2_set_addr(struct virtio_mmio_softc *sc, bus_addr_t reg, + uint64_t addr) +{ + virtio_mmio_reg_write(sc, reg, BUS_ADDR_LO32(addr)); + virtio_mmio_reg_write(sc, reg + 4, BUS_ADDR_HI32(addr)); +} + +static const struct virtio_ops virtio_mmio_v1_ops = { .kick = virtio_mmio_kick, .read_queue_size = virtio_mmio_read_queue_size, - .setup_queue = virtio_mmio_setup_queue, - .get_status = virtio_mmio_get_status, + .setup_queue = virtio_mmio_v1_setup_queue, + .set_status = virtio_mmio_set_status, + .neg_features = virtio_mmio_negotiate_features, + .alloc_interrupts = virtio_mmio_alloc_interrupts, + .free_interrupts = virtio_mmio_free_interrupts, + .setup_interrupts = virtio_mmio_setup_interrupts, +}; + +static const struct virtio_ops virtio_mmio_v2_ops = { + .kick = virtio_mmio_kick, + .read_queue_size = virtio_mmio_read_queue_size, + .setup_queue = virtio_mmio_v2_setup_queue, .set_status = virtio_mmio_set_status, .neg_features = virtio_mmio_negotiate_features, .alloc_interrupts = virtio_mmio_alloc_interrupts, @@ -120,57 +186,50 @@ static uint16_t virtio_mmio_read_queue_size(struct virtio_softc *vsc, uint16_t idx) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_SEL, idx); - return bus_space_read_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_QUEUE_NUM_MAX); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_SEL, idx); + return virtio_mmio_reg_read(sc, VIRTIO_MMIO_QUEUE_NUM_MAX); } static void -virtio_mmio_setup_queue(struct virtio_softc *vsc, uint16_t idx, uint64_t addr) +virtio_mmio_v1_setup_queue(struct virtio_softc *vsc, uint16_t idx, + uint64_t addr) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - struct virtqueue *vqs = sc->sc_sc.sc_vqs; - paddr_t paddr; - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM, - bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NUM_MAX)); - - if (sc->mmio_version == 1) { - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_SEL, idx); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_ALIGN, - VIRTIO_PAGE_SIZE); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_PFN, - addr / VIRTIO_PAGE_SIZE); - } else { - paddr = vtophys((vaddr_t)vqs->vq_desc); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_LOW, - paddr); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_DESC_HIGH, - ((uint64_t)paddr >> 32)); - - paddr = vtophys((vaddr_t)vqs->vq_avail); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_LOW, - paddr); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_AVAIL_HIGH, - ((uint64_t)(paddr) >> 32)); - - paddr = vtophys((vaddr_t)vqs->vq_used); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_LOW, - paddr); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_USED_HIGH, - ((uint64_t)(paddr) >> 32)); - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_READY, 1); - } + + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_SEL, idx); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_NUM, + virtio_mmio_reg_read(sc, VIRTIO_MMIO_QUEUE_NUM_MAX)); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_V1_QUEUE_ALIGN, + VIRTIO_PAGE_SIZE); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_V1_QUEUE_PFN, + addr / VIRTIO_PAGE_SIZE); } -static int -virtio_mmio_get_status(struct virtio_softc *vsc) +static void +virtio_mmio_v2_setup_queue(struct virtio_softc *vsc, uint16_t idx, + uint64_t addr) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - - return bus_space_read_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_STATUS); + struct virtqueue *vq = &vsc->sc_vqs[idx]; + KASSERT(vq->vq_index == idx); + + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_SEL, idx); + if (addr == 0) { + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_READY, 0); + virtio_mmio_v2_set_addr(sc, VIRTIO_MMIO_V2_QUEUE_DESC_LOW, 0); + virtio_mmio_v2_set_addr(sc, VIRTIO_MMIO_V2_QUEUE_AVAIL_LOW, 0); + virtio_mmio_v2_set_addr(sc, VIRTIO_MMIO_V2_QUEUE_USED_LOW, 0); + } else { + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_NUM, + virtio_mmio_reg_read(sc, VIRTIO_MMIO_QUEUE_NUM_MAX)); + virtio_mmio_v2_set_addr(sc, VIRTIO_MMIO_V2_QUEUE_DESC_LOW, + addr); + virtio_mmio_v2_set_addr(sc, VIRTIO_MMIO_V2_QUEUE_AVAIL_LOW, + addr + vq->vq_availoffset); + virtio_mmio_v2_set_addr(sc, VIRTIO_MMIO_V2_QUEUE_USED_LOW, + addr + vq->vq_usedoffset); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_READY, 1); + } } static void @@ -180,10 +239,8 @@ virtio_mmio_set_status(struct virtio_softc *vsc, int status) int old = 0; if (status != 0) - old = bus_space_read_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_STATUS); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_STATUS, - status|old); + old = virtio_mmio_reg_read(sc, VIRTIO_MMIO_STATUS); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_STATUS, status|old); } bool @@ -191,11 +248,13 @@ virtio_mmio_common_probe_present(struct virtio_mmio_softc *sc) { uint32_t magic; + /* XXX */ magic = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_MAGIC_VALUE); return (magic == VIRTIO_MMIO_MAGIC); } + void virtio_mmio_common_attach(struct virtio_mmio_softc *sc) { @@ -206,30 +265,44 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) magic = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_MAGIC_VALUE); if (magic != VIRTIO_MMIO_MAGIC) { - aprint_error_dev(vsc->sc_dev, - "wrong magic value 0x%08x; giving up\n", magic); - return; + if (magic == le32toh(VIRTIO_MMIO_MAGIC)) { + sc->sc_le_regs = true; + } else { + aprint_error_dev(vsc->sc_dev, + "wrong magic value 0x%08x; giving up\n", magic); + return; + } } - ver = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_VERSION); - if (ver < 1 || ver > 2) { + vsc->sc_bus_endian = READ_ENDIAN; + vsc->sc_struct_endian = STRUCT_ENDIAN; + + ver = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); + switch (ver) { + case 1: + /* we could use PAGE_SIZE, but virtio(4) assumes 4KiB for now */ + virtio_mmio_reg_write(sc, + VIRTIO_MMIO_V1_GUEST_PAGE_SIZE, VIRTIO_PAGE_SIZE); + vsc->sc_ops = &virtio_mmio_v1_ops; + break; + + case 2: + vsc->sc_ops = &virtio_mmio_v2_ops; + break; + + default: aprint_error_dev(vsc->sc_dev, - "unknown version 0x%02x; giving up\n", ver); + "unknown version 0x%08x; giving up\n", ver); return; } - sc->mmio_version = ver; - - id = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_DEVICE_ID); - /* we could use PAGE_SIZE, but virtio(4) assumes 4KiB for now */ - if (ver == 1) - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_GUEST_PAGE_SIZE, VIRTIO_PAGE_SIZE); - if (id == 0) + aprint_normal_dev(self, "VirtIO-MMIO v%d\n", ver); + + id = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_ID); + if (id == 0) { + /* no device connected. */ return; + } virtio_print_device_type(self, id, ver); - vsc->sc_ops = &virtio_mmio_ops; - vsc->sc_bus_endian = READ_ENDIAN; - vsc->sc_struct_endian = STRUCT_ENDIAN; /* set up our device config tag */ vsc->sc_devcfg_iosize = sc->sc_iosize - VIRTIO_MMIO_CONFIG; @@ -277,44 +350,18 @@ virtio_mmio_common_detach(struct virtio_mmio_softc *sc, int flags) */ static void virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t - guest_features) + driver_features) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - uint64_t host, negotiated, device_status; - - guest_features |= VIRTIO_F_VERSION_1; -#ifdef __NEED_VIRTIO_F_ACCESS_PLATFORM - /* XXX This could use some work. */ - guest_features |= VIRTIO_F_ACCESS_PLATFORM; -#endif /* __NEED_VIRTIO_F_ACCESS_PLATFORM */ - /* notify on empty is 0.9 only */ - guest_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY; - vsc->sc_active_features = 0; - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES_SEL, 0); - host = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES_SEL, 1); - host |= (uint64_t) - bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_HOST_FEATURES) << 32; - - negotiated = host & guest_features; - - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES_SEL, 0); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES, - negotiated & 0xffffffff); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES_SEL, 1); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_GUEST_FEATURES, - negotiated >> 32); - virtio_mmio_set_status(vsc, VIRTIO_CONFIG_S_FEATURES_OK); - - device_status = virtio_mmio_get_status(vsc); - if ((device_status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { - aprint_error_dev(vsc->sc_dev, - "desired features were not accepted\n"); - } + uint32_t r; - vsc->sc_active_features = negotiated; + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0); + r = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES); + r &= driver_features; + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, r); + vsc->sc_active_features = r; } /* @@ -328,10 +375,8 @@ virtio_mmio_intr(void *arg) int isr, r = 0; /* check and ack the interrupt */ - isr = bus_space_read_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_INTERRUPT_STATUS); - bus_space_write_4(sc->sc_iot, sc->sc_ioh, - VIRTIO_MMIO_INTERRUPT_ACK, isr); + isr = virtio_mmio_reg_read(sc, VIRTIO_MMIO_INTERRUPT_STATUS); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_INTERRUPT_ACK, isr); if ((isr & VIRTIO_MMIO_INT_CONFIG) && (vsc->sc_config_change != NULL)) r = (vsc->sc_config_change)(vsc); @@ -350,8 +395,7 @@ static void virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NOTIFY, - idx); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_QUEUE_NOTIFY, idx); } static int @@ -377,4 +421,3 @@ virtio_mmio_setup_interrupts(struct virtio_softc *vsc __unused, return 0; } - From 9645551572f8362557eea70ac787a5bb92bdadd1 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 4 Jan 2024 08:25:29 +0100 Subject: [PATCH 042/114] feat: multiple virtio devices support --- sys/arch/amd64/amd64/amd64_mainbus.c | 5 +- sys/dev/virtio/virtio_mmio_cmdline.c | 120 ++++++++++++++++----------- 2 files changed, 75 insertions(+), 50 deletions(-) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index d118ef8bf16a8..384e808214589 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -238,10 +238,11 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) CFARGS(.iattr = "isabus")); } #endif - mba.mba_cmdline.memt = x86_bus_space_mem; mba.mba_cmdline.dmat = &pci_bus_dma_tag; - config_found(self, &mba.mba_cmdline, NULL, CFARGS(.iattr = "cmdlinebus")); + mba.mba_cmdline.dmat64 = &pci_bus_dma64_tag; + config_found(self, &mba.mba_cmdline, NULL, + CFARGS(.iattr = "cmdlinebus")); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 65be5895eeb06..a8734ce110927 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -43,13 +43,6 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); #define VMMIOSTR "virtio_mmio.device=" -static int virtio_mmio_cmdline_match(device_t, cfdata_t, void *); -static void virtio_mmio_cmdline_attach(device_t, device_t, void *); -static int virtio_mmio_cmdline_detach(device_t, int); -static int virtio_mmio_cmdline_rescan(device_t, const char *, const int *); -static int virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *); -static void virtio_mmio_cmdline_free_interrupts(struct virtio_mmio_softc *); - struct mmio_args { uint64_t sz; uint64_t baseaddr; @@ -62,14 +55,33 @@ struct virtio_mmio_cmdline_softc { struct mmio_args margs; }; +static int virtio_mmio_cmdline_match(device_t, cfdata_t, void *); +static void virtio_mmio_cmdline_attach(device_t, device_t, void *); +static int virtio_mmio_cmdline_do_attach(device_t, + struct cmdline_attach_args *, + struct mmio_args *); +static int virtio_mmio_cmdline_detach(device_t, int); +static int virtio_mmio_cmdline_rescan(device_t, const char *, const int *); +static int virtio_mmio_cmdline_alloc_interrupts(struct virtio_mmio_softc *); +static void virtio_mmio_cmdline_free_interrupts(struct virtio_mmio_softc *); + CFATTACH_DECL3_NEW(mmio_cmdline, sizeof(struct virtio_mmio_cmdline_softc), virtio_mmio_cmdline_match, virtio_mmio_cmdline_attach, virtio_mmio_cmdline_detach, NULL, virtio_mmio_cmdline_rescan, (void *)voidop, DVF_DETACH_SHUTDOWN); +static int +virtio_mmio_cmdline_match(device_t parent, cfdata_t match, void *aux) +{ + if (strstr(xen_start_info.cmd_line, VMMIOSTR) == NULL) + return 0; + + return 1; +} + static void -parsearg(device_t self, struct mmio_args *margs, const char *arg) +parsearg(struct mmio_args *margs, const char *arg) { char *p; @@ -131,73 +143,73 @@ parsearg(device_t self, struct mmio_args *margs, const char *arg) return; bad: - printf("Error parsing virtio_mmio parameter: %s\n", arg); + aprint_error("Error parsing virtio_mmio parameter: %s\n", arg); } static void -virtio_mmio_cmdline_parse(device_t self, struct virtio_mmio_cmdline_softc *sc) +virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) { - struct virtio_mmio_softc *const msc = &sc->sc_msc; + struct virtio_mmio_cmdline_softc *sc = device_private(self); + struct cmdline_attach_args *caa = aux; struct mmio_args *margs = &sc->margs; - char *p, *v, cmdline[128]; + char *v, *n, cmdline[128]; int error; - - strcpy(cmdline, xen_start_info.cmd_line); - - aprint_normal("\nkernel parameters: %s", cmdline); - - if ((p = strstr(cmdline, VMMIOSTR)) == NULL) - return; + static char *p = NULL; + static int idx = 0; + bool hasnext; + + if (idx == 0) { + strcpy(cmdline, xen_start_info.cmd_line); + aprint_verbose("\nkernel parameters: %s", cmdline); + if ((p = strstr(cmdline, VMMIOSTR)) == NULL) + return; + } while (*p) { + hasnext = false; v = p; while (*p && *p != ' ') p++; - if (*p) + if (*p) { + n = p; *p = '\0'; + hasnext = true; + } p = v; while (*p && *p != '=') p++; if (*p) { p++; aprint_normal("\nviommio: %s", p); - parsearg(self, margs, p); - - error = bus_space_map( - msc->sc_iot, margs->baseaddr, - margs->sz, 0, &msc->sc_ioh - ); - if (error) { - aprint_error_dev(self, - "couldn't map %#" PRIx64 ": %d", - (uint64_t)margs->baseaddr, error - ); - return; - } + parsearg(margs, p); + + error = virtio_mmio_cmdline_do_attach(self, + caa, margs); + if (error) + return; + } + if (hasnext) { + p = n+1; + idx++; + config_found(parent, caa, NULL, + CFARGS(.iattr = "cmdlinebus")); } } } static int -virtio_mmio_cmdline_match(device_t parent, cfdata_t match, void *aux) +virtio_mmio_cmdline_do_attach(device_t self, + struct cmdline_attach_args *caa, + struct mmio_args *margs) { - if (strstr(xen_start_info.cmd_line, VMMIOSTR) == NULL) - return 0; - return 1; -} - -static void -virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) -{ - /* Attach function for device */ struct virtio_mmio_cmdline_softc *sc = device_private(self); struct virtio_mmio_softc *const msc = &sc->sc_msc; struct virtio_softc *const vsc = &msc->sc_sc; - struct cmdline_attach_args *caa = aux; + int error; msc->sc_iot = caa->memt; - msc->sc_iosize = sc->margs.sz; + msc->sc_iosize = margs->sz; vsc->sc_dev = self; if (BUS_DMA_TAG_VALID(caa->dmat64)) { @@ -208,7 +220,17 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) vsc->sc_dmat = caa->dmat; } - virtio_mmio_cmdline_parse(self, sc); + error = bus_space_map( + msc->sc_iot, margs->baseaddr, + margs->sz, 0, &msc->sc_ioh + ); + if (error) { + aprint_error_dev(self, + "couldn't map %#" PRIx64 ": %d", + (uint64_t)margs->baseaddr, error + ); + return error; + } aprint_normal("\n"); aprint_naive("\n"); @@ -218,13 +240,15 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) virtio_mmio_common_attach(msc); virtio_mmio_cmdline_rescan(self, "virtio", NULL); + + return 0; } static int virtio_mmio_cmdline_detach(device_t self, int flags) { - struct virtio_mmio_cmdline_softc * const fsc = device_private(self); - struct virtio_mmio_softc * const msc = &fsc->sc_msc; + struct virtio_mmio_cmdline_softc * const sc = device_private(self); + struct virtio_mmio_softc * const msc = &sc->sc_msc; return virtio_mmio_common_detach(msc, flags); } From 992936911af6f11f8b900b30e038c742b762abf3 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 4 Jan 2024 11:29:06 +0100 Subject: [PATCH 043/114] fix: features for Firecracker --- sys/dev/virtio/virtio_mmio.c | 46 +++++++++++++++++++++++++++------ sys/dev/virtio/virtio_mmiovar.h | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index c737b8f7cacd2..b9abcc310c5c3 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -66,6 +66,8 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp #include #include +#include + #define VIRTIO_PRIVATE #include @@ -125,6 +127,7 @@ static uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t); static void virtio_mmio_v1_setup_queue(struct virtio_softc *, uint16_t, uint64_t); static void virtio_mmio_v2_setup_queue(struct virtio_softc *, uint16_t, uint64_t); static void virtio_mmio_set_status(struct virtio_softc *, int); +static int virtio_mmio_get_status(struct virtio_softc *); static void virtio_mmio_negotiate_features(struct virtio_softc *, uint64_t); static int virtio_mmio_alloc_interrupts(struct virtio_softc *); static void virtio_mmio_free_interrupts(struct virtio_softc *); @@ -165,6 +168,7 @@ static const struct virtio_ops virtio_mmio_v1_ops = { .read_queue_size = virtio_mmio_read_queue_size, .setup_queue = virtio_mmio_v1_setup_queue, .set_status = virtio_mmio_set_status, + .get_status = virtio_mmio_get_status, .neg_features = virtio_mmio_negotiate_features, .alloc_interrupts = virtio_mmio_alloc_interrupts, .free_interrupts = virtio_mmio_free_interrupts, @@ -176,6 +180,7 @@ static const struct virtio_ops virtio_mmio_v2_ops = { .read_queue_size = virtio_mmio_read_queue_size, .setup_queue = virtio_mmio_v2_setup_queue, .set_status = virtio_mmio_set_status, + .get_status = virtio_mmio_get_status, .neg_features = virtio_mmio_negotiate_features, .alloc_interrupts = virtio_mmio_alloc_interrupts, .free_interrupts = virtio_mmio_free_interrupts, @@ -232,6 +237,14 @@ virtio_mmio_v2_setup_queue(struct virtio_softc *vsc, uint16_t idx, } } +static int +virtio_mmio_get_status(struct virtio_softc *vsc) +{ + struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; + + return virtio_mmio_reg_read(sc, VIRTIO_MMIO_STATUS); +} + static void virtio_mmio_set_status(struct virtio_softc *vsc, int status) { @@ -260,7 +273,7 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) { struct virtio_softc *vsc = &sc->sc_sc; device_t self = vsc->sc_dev; - uint32_t id, magic, ver; + uint32_t id, magic; magic = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_MAGIC_VALUE); @@ -276,8 +289,8 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) vsc->sc_bus_endian = READ_ENDIAN; vsc->sc_struct_endian = STRUCT_ENDIAN; - ver = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); - switch (ver) { + sc->sc_ver = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); + switch (sc->sc_ver) { case 1: /* we could use PAGE_SIZE, but virtio(4) assumes 4KiB for now */ virtio_mmio_reg_write(sc, @@ -291,10 +304,10 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) default: aprint_error_dev(vsc->sc_dev, - "unknown version 0x%08x; giving up\n", ver); + "unknown version 0x%08x; giving up\n", sc->sc_ver); return; } - aprint_normal_dev(self, "VirtIO-MMIO v%d\n", ver); + aprint_normal_dev(self, "VirtIO-MMIO v%d\n", sc->sc_ver); id = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_ID); if (id == 0) { @@ -302,7 +315,7 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) return; } - virtio_print_device_type(self, id, ver); + virtio_print_device_type(self, id, sc->sc_ver); /* set up our device config tag */ vsc->sc_devcfg_iosize = sc->sc_iosize - VIRTIO_MMIO_CONFIG; @@ -353,13 +366,30 @@ virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t driver_features) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - uint32_t r; + uint64_t r, device_status; + + if (sc->sc_ver > 1) + driver_features |= VIRTIO_F_VERSION_1; virtio_mmio_reg_write(sc, VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0); r = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1); + r |= (uint64_t)virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES) << 32; + r &= driver_features; + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0); - virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, r); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, r & 0xffffffff); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, r >> 32); + + virtio_mmio_set_status(vsc, VIRTIO_CONFIG_S_FEATURES_OK); + + device_status = virtio_mmio_get_status(vsc); + if ((device_status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { + aprint_error_dev(vsc->sc_dev, + "desired features were not accepted\n"); + } vsc->sc_active_features = r; } diff --git a/sys/dev/virtio/virtio_mmiovar.h b/sys/dev/virtio/virtio_mmiovar.h index 896915d345baf..a862dd4ae3dd9 100644 --- a/sys/dev/virtio/virtio_mmiovar.h +++ b/sys/dev/virtio/virtio_mmiovar.h @@ -36,6 +36,7 @@ struct virtio_mmio_softc { bus_space_tag_t sc_iot; bus_space_handle_t sc_ioh; bus_size_t sc_iosize; + uint8_t sc_ver; bool sc_le_regs; void *sc_ih; From 814b955a8a5a570e0f7665ca1af298b15a3475b3 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 5 Jan 2024 05:50:13 +0100 Subject: [PATCH 044/114] thorpej MMIOv2 fix --- sys/dev/virtio/virtio_mmio.c | 89 ++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index b9abcc310c5c3..c5342e5f9e7d7 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -66,8 +66,6 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp #include #include -#include - #define VIRTIO_PRIVATE #include @@ -101,9 +99,6 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp #define VIRTIO_MMIO_V2_CONFIG_GEN 0x0fc #define VIRTIO_MMIO_CONFIG 0x100 -#define VIRTIO_MMIO_INT_VRING (1 << 0) -#define VIRTIO_MMIO_INT_CONFIG (1 << 1) - /* * MMIO configuration space for virtio-mmio v1 is in guest byte order. * @@ -126,8 +121,8 @@ static void virtio_mmio_kick(struct virtio_softc *, uint16_t); static uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t); static void virtio_mmio_v1_setup_queue(struct virtio_softc *, uint16_t, uint64_t); static void virtio_mmio_v2_setup_queue(struct virtio_softc *, uint16_t, uint64_t); -static void virtio_mmio_set_status(struct virtio_softc *, int); static int virtio_mmio_get_status(struct virtio_softc *); +static void virtio_mmio_set_status(struct virtio_softc *, int); static void virtio_mmio_negotiate_features(struct virtio_softc *, uint64_t); static int virtio_mmio_alloc_interrupts(struct virtio_softc *); static void virtio_mmio_free_interrupts(struct virtio_softc *); @@ -168,7 +163,6 @@ static const struct virtio_ops virtio_mmio_v1_ops = { .read_queue_size = virtio_mmio_read_queue_size, .setup_queue = virtio_mmio_v1_setup_queue, .set_status = virtio_mmio_set_status, - .get_status = virtio_mmio_get_status, .neg_features = virtio_mmio_negotiate_features, .alloc_interrupts = virtio_mmio_alloc_interrupts, .free_interrupts = virtio_mmio_free_interrupts, @@ -180,7 +174,6 @@ static const struct virtio_ops virtio_mmio_v2_ops = { .read_queue_size = virtio_mmio_read_queue_size, .setup_queue = virtio_mmio_v2_setup_queue, .set_status = virtio_mmio_set_status, - .get_status = virtio_mmio_get_status, .neg_features = virtio_mmio_negotiate_features, .alloc_interrupts = virtio_mmio_alloc_interrupts, .free_interrupts = virtio_mmio_free_interrupts, @@ -273,7 +266,8 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) { struct virtio_softc *vsc = &sc->sc_sc; device_t self = vsc->sc_dev; - uint32_t id, magic; + uint32_t id, magic, ver; + int virtio_vers; magic = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_MAGIC_VALUE); @@ -289,25 +283,34 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) vsc->sc_bus_endian = READ_ENDIAN; vsc->sc_struct_endian = STRUCT_ENDIAN; - sc->sc_ver = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); - switch (sc->sc_ver) { + ver = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); + switch (ver) { case 1: /* we could use PAGE_SIZE, but virtio(4) assumes 4KiB for now */ virtio_mmio_reg_write(sc, VIRTIO_MMIO_V1_GUEST_PAGE_SIZE, VIRTIO_PAGE_SIZE); vsc->sc_ops = &virtio_mmio_v1_ops; + /* + * MMIO v1 ("legacy") sets up the queue like VirtIO 0.9, + * so that's what we'll report as the VirtIO version. + */ + virtio_vers = 0; break; case 2: vsc->sc_ops = &virtio_mmio_v2_ops; + /* + * MMIO v2 is documented in the VirtIO 1.0 spec. + */ + virtio_vers = 1; break; default: aprint_error_dev(vsc->sc_dev, - "unknown version 0x%08x; giving up\n", sc->sc_ver); + "unknown version 0x%08x; giving up\n", ver); return; } - aprint_normal_dev(self, "VirtIO-MMIO v%d\n", sc->sc_ver); + aprint_normal_dev(self, "VirtIO-MMIO v%d\n", ver); id = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_ID); if (id == 0) { @@ -315,7 +318,7 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) return; } - virtio_print_device_type(self, id, sc->sc_ver); + virtio_print_device_type(self, id, virtio_vers); /* set up our device config tag */ vsc->sc_devcfg_iosize = sc->sc_iosize - VIRTIO_MMIO_CONFIG; @@ -360,38 +363,64 @@ virtio_mmio_common_detach(struct virtio_mmio_softc *sc, int flags) /* * Feature negotiation. + * + * We fold pre-VirtIO-1.0 feature negotiation into this single routine + * because the "legacy" (MMIO-v1) also had the feature sel registers. */ static void virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t driver_features) { struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; - uint64_t r, device_status; + device_t self = vsc->sc_dev; + uint64_t saved_driver_features = driver_features; + uint64_t device_features, negotiated; + uint32_t device_status; - if (sc->sc_ver > 1) - driver_features |= VIRTIO_F_VERSION_1; + driver_features |= VIRTIO_F_VERSION_1; + vsc->sc_active_features = 0; virtio_mmio_reg_write(sc, VIRTIO_MMIO_DEVICE_FEATURES_SEL, 0); - r = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES); + device_features = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES); virtio_mmio_reg_write(sc, VIRTIO_MMIO_DEVICE_FEATURES_SEL, 1); - r |= (uint64_t)virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES) << 32; + device_features |= (uint64_t) + virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_FEATURES) << 32; + + /* notify on empty is 0.9 only */ + if (device_features & VIRTIO_F_VERSION_1) { + driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY; + } else { + /* + * If the driver requires version 1, but the device doesn't + * support it, fail now. + */ + if (saved_driver_features & VIRTIO_F_VERSION_1) { + aprint_error_dev(self, "device rejected version 1\n"); + virtio_mmio_set_status(vsc, + VIRTIO_CONFIG_DEVICE_STATUS_FAILED); + return; + } + } - r &= driver_features; + negotiated = device_features & driver_features; virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 0); - virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, r & 0xffffffff); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, + (uint32_t)negotiated); virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1); - virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, r >> 32); - - virtio_mmio_set_status(vsc, VIRTIO_CONFIG_S_FEATURES_OK); + virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, + (uint32_t)(negotiated >> 32)); + virtio_mmio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK); device_status = virtio_mmio_get_status(vsc); - if ((device_status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { - aprint_error_dev(vsc->sc_dev, - "desired features were not accepted\n"); + if ((device_status & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) { + aprint_error_dev(self, "feature negotiation failed\n"); + virtio_mmio_set_status(vsc, + VIRTIO_CONFIG_DEVICE_STATUS_FAILED); + return; } - vsc->sc_active_features = r; + vsc->sc_active_features = negotiated; } /* @@ -407,10 +436,10 @@ virtio_mmio_intr(void *arg) /* check and ack the interrupt */ isr = virtio_mmio_reg_read(sc, VIRTIO_MMIO_INTERRUPT_STATUS); virtio_mmio_reg_write(sc, VIRTIO_MMIO_INTERRUPT_ACK, isr); - if ((isr & VIRTIO_MMIO_INT_CONFIG) && + if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) && (vsc->sc_config_change != NULL)) r = (vsc->sc_config_change)(vsc); - if ((isr & VIRTIO_MMIO_INT_VRING) && + if ((isr & VIRTIO_CONFIG_ISR_QUEUE_INTERRUPT) && (vsc->sc_intrhand != NULL)) { if (vsc->sc_soft_ih != NULL) softint_schedule(vsc->sc_soft_ih); From 1189c84a017e3b1073f2a86d9849d45274448c0b Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 6 Jan 2024 08:07:55 +0100 Subject: [PATCH 045/114] feat: support for Fiecracker --- sys/arch/x86/x86/bus_dma.c | 2 +- sys/dev/ldvar.h | 1 + sys/dev/pci/ld_virtio.c | 33 +++++++++++++++++++++++---------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/sys/arch/x86/x86/bus_dma.c b/sys/arch/x86/x86/bus_dma.c index b47b4b1bc2e4a..7d2753bb7c203 100644 --- a/sys/arch/x86/x86/bus_dma.c +++ b/sys/arch/x86/x86/bus_dma.c @@ -319,7 +319,7 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, goto out; } - if (map->_dm_bounce_thresh != 0) + if (map->_dm_bounce_thresh != 0 || map->_dm_segcnt == 1) cookieflags |= X86_DMA_MIGHT_NEED_BOUNCE; if ((cookieflags & X86_DMA_MIGHT_NEED_BOUNCE) == 0) { diff --git a/sys/dev/ldvar.h b/sys/dev/ldvar.h index 0aa0c097a742e..5b9d8bc207561 100644 --- a/sys/dev/ldvar.h +++ b/sys/dev/ldvar.h @@ -56,6 +56,7 @@ struct ld_softc { uint64_t sc_secperunit; /* # sectors in total */ int sc_secsize; /* sector size in bytes */ int sc_maxxfer; /* max xfer size in bytes */ + int sc_maxnsegs; /* maximum number of segments */ int sc_maxqueuecnt; /* maximum h/w queue depth */ char *sc_typename; /* inquiry data */ diff --git a/sys/dev/pci/ld_virtio.c b/sys/dev/pci/ld_virtio.c index b8d405f5cc4cf..afdc1e48aa8c0 100644 --- a/sys/dev/pci/ld_virtio.c +++ b/sys/dev/pci/ld_virtio.c @@ -196,6 +196,7 @@ ld_virtio_alloc_reqs(struct ld_virtio_softc *sc, int qsize) memset(vaddr, 0, allocsize); for (i = 0; i < qsize; i++) { struct virtio_blk_req *vr = &sc->sc_reqs[i]; + int nsegs; r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), offsetof(struct virtio_blk_req, vr_bp), 1, @@ -219,10 +220,18 @@ ld_virtio_alloc_reqs(struct ld_virtio_softc *sc, int qsize) "error code %d\n", r); goto err_reqs; } + /* + * if d->sc_maxnsegs == VIRTIO_BLK_MIN_SEGMENTS + 1, the + * device only supports a single data segment. + */ + if (ld->sc_maxnsegs == VIRTIO_BLK_MIN_SEGMENTS + 1) + nsegs = ld->sc_maxnsegs - VIRTIO_BLK_MIN_SEGMENTS; + else + nsegs = (ld->sc_maxxfer / NBPG) + VIRTIO_BLK_MIN_SEGMENTS; + r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), ld->sc_maxxfer, - (ld->sc_maxxfer / NBPG) + - VIRTIO_BLK_MIN_SEGMENTS, + nsegs, ld->sc_maxxfer, 0, BUS_DMA_WAITOK|BUS_DMA_ALLOCNOW, @@ -264,7 +273,7 @@ ld_virtio_attach(device_t parent, device_t self, void *aux) struct ld_softc *ld = &sc->sc_ld; struct virtio_softc *vsc = device_private(parent); uint64_t features; - int qsize, maxxfersize, maxnsegs; + int qsize, maxxfersize; if (virtio_child(vsc) != NULL) { aprint_normal(": child already attached for %s; " @@ -317,25 +326,29 @@ ld_virtio_attach(device_t parent, device_t self, void *aux) maxxfersize = MAXPHYS; if (features & VIRTIO_BLK_F_SEG_MAX) { - maxnsegs = virtio_read_device_config_4(vsc, + ld->sc_maxnsegs = virtio_read_device_config_4(vsc, VIRTIO_BLK_CONFIG_SEG_MAX); - if (maxnsegs < VIRTIO_BLK_MIN_SEGMENTS) { + if (ld->sc_maxnsegs < VIRTIO_BLK_MIN_SEGMENTS) { aprint_error_dev(sc->sc_dev, "Too small SEG_MAX %d minimum is %d\n", - maxnsegs, VIRTIO_BLK_MIN_SEGMENTS); - maxnsegs = maxxfersize / NBPG; + ld->sc_maxnsegs, VIRTIO_BLK_MIN_SEGMENTS); + ld->sc_maxnsegs = maxxfersize / NBPG; // goto err; } } else - maxnsegs = maxxfersize / NBPG; + /* + * if there is no VIRTIO_BLK_F_SEG_MAX feature advertised, the + * number of segments can be as low as 1 (i.e. Firecracker) + */ + ld->sc_maxnsegs = 1; /* 2 for the minimum size */ - maxnsegs += VIRTIO_BLK_MIN_SEGMENTS; + ld->sc_maxnsegs += VIRTIO_BLK_MIN_SEGMENTS; virtio_init_vq_vqdone(vsc, &sc->sc_vq, 0, ld_virtio_vq_done); - if (virtio_alloc_vq(vsc, &sc->sc_vq, maxxfersize, maxnsegs, + if (virtio_alloc_vq(vsc, &sc->sc_vq, maxxfersize, ld->sc_maxnsegs, "I/O request") != 0) { goto err; } From efe198a92b51057af64be7ac183466ea14718f14 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 6 Jan 2024 08:21:16 +0100 Subject: [PATCH 046/114] fix: wipe pvclock for now --- sys/arch/x86/conf/files.x86 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sys/arch/x86/conf/files.x86 b/sys/arch/x86/conf/files.x86 index dd7e85f1aad8a..e6c48c1e48b20 100644 --- a/sys/arch/x86/conf/files.x86 +++ b/sys/arch/x86/conf/files.x86 @@ -81,10 +81,6 @@ device hyperv attach hyperv at cpufeaturebus file arch/x86/x86/hyperv.c hyperv needs-flag -device pvclock -attach pvclock at cpufeaturebus -file arch/x86/x86/pvclock.c pvclock - file arch/x86/x86/apic.c ioapic | lapic file arch/x86/x86/bus_dma.c machdep file arch/x86/x86/bus_space.c machdep From 4434b2e3535ec00bce2423ca10c5dbc913bf8cfa Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 6 Jan 2024 08:21:36 +0100 Subject: [PATCH 047/114] fix: wipe pvclock for now --- sys/arch/x86/x86/pvclock.c | 235 ------------------------------------- sys/arch/x86/x86/pvreg.h | 118 ------------------- 2 files changed, 353 deletions(-) delete mode 100644 sys/arch/x86/x86/pvclock.c delete mode 100644 sys/arch/x86/x86/pvreg.h diff --git a/sys/arch/x86/x86/pvclock.c b/sys/arch/x86/x86/pvclock.c deleted file mode 100644 index 3dae196044a4b..0000000000000 --- a/sys/arch/x86/x86/pvclock.c +++ /dev/null @@ -1,235 +0,0 @@ -/* $OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $ */ - -/* - * Copyright (c) 2018 Reyk Floeter - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#if !defined(__i386__) && !defined(__amd64__) -#error pvclock(4) is only supported on i386 and amd64 -#endif - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -uint pvclock_lastcount; - -struct pvclock_softc { - device_t sc_dev; - void *sc_time; - paddr_t sc_paddr; - struct timecounter *sc_tc; -}; - -static int pvclock_match(device_t, cfdata_t, void *); -static void pvclock_attach(device_t, device_t, void *); -static int pvclock_activate(device_t, devact_t); - -/* -void pvclock_read_time_info(struct pvclock_softc *, - struct pvclock_time_info *); -*/ -static inline uint32_t - pvclock_read_begin(const struct pvclock_time_info *); -static inline int - pvclock_read_done(const struct pvclock_time_info *, uint32_t); -static uint - pvclock_get_timecount(struct timecounter *); - -struct timecounter pvclock_timecounter = { - .tc_get_timecount = pvclock_get_timecount, - .tc_counter_mask = ~0u, - .tc_frequency = 0, - .tc_name = NULL, - .tc_quality = -2000, - .tc_priv = NULL, -}; - -CFATTACH_DECL_NEW(pvclock, sizeof(struct pvclock_softc), - pvclock_match, - pvclock_attach, - NULL /* detach */, - pvclock_activate); - - -static int -pvclock_match(device_t parent, cfdata_t cf, void *aux) -{ - u_int regs[6]; - /* - * pvclock is provided by different hypervisors, we currently - * only support the "kvmclock". - */ - x86_cpuid(0x40000000 + CPUID_OFFSET_KVM_FEATURES, regs); - /* - * We only implement support for the 2nd version of pvclock. - * The first version is basically the same but with different - * non-standard MSRs and it is deprecated. - */ - if ((regs[0] & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) - return (0); - - /* - * Only the "stable" clock with a sync'ed TSC is supported. - * In this case the host guarantees that the TSC is constant - * and invariant, either by the underlying TSC or by passing - * on a synchronized value. - */ - if ((regs[0] & - (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) - return (0); - - return (1); -} - -static inline uint32_t -pvclock_read_begin(const struct pvclock_time_info *ti) -{ - uint32_t ti_version = ti->ti_version & ~0x1; - virtio_membar_sync(); - return (ti_version); -} - -static inline int -pvclock_read_done(const struct pvclock_time_info *ti, - uint32_t ti_version) -{ - virtio_membar_sync(); - return (ti->ti_version == ti_version); -} - -static uint -pvclock_get_timecount(struct timecounter *tc) -{ - struct pvclock_softc *sc = tc->tc_priv; - struct pvclock_time_info *ti; - uint64_t tsc_timestamp, system_time, delta, ctr; - uint32_t ti_version, mul_frac; - int8_t shift; - uint8_t flags; - - ti = sc->sc_time; - do { - ti_version = pvclock_read_begin(ti); - system_time = ti->ti_system_time; - tsc_timestamp = ti->ti_tsc_timestamp; - mul_frac = ti->ti_tsc_to_system_mul; - shift = ti->ti_tsc_shift; - flags = ti->ti_flags; - } while (!pvclock_read_done(ti, ti_version)); - - /* - * The algorithm is described in - * linux/Documentation/virtual/kvm/msr.txt - */ - delta = rdtsc() - tsc_timestamp; - if (shift < 0) - delta >>= -shift; - else - delta <<= shift; - ctr = ((delta * mul_frac) >> 32) + system_time; - - if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) - return (ctr); - - if (ctr < pvclock_lastcount) - return (pvclock_lastcount); - - atomic_swap_uint(&pvclock_lastcount, ctr); - - return (ctr); -} - -static void -pvclock_attach(device_t parent, device_t self, void *aux) -{ - struct pvclock_softc *sc = device_private(self); - struct pvclock_time_info *ti; - paddr_t pa; - uint32_t ti_version; - uint8_t flags; - - if ((sc->sc_time = (void *)uvm_km_alloc(kernel_map, - PAGE_SIZE, PAGE_SIZE, - UVM_KMF_WIRED | UVM_KMF_ZERO)) == NULL) { - aprint_error("time page allocation failed\n"); - return; - } - - if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { - aprint_error("time page PA extraction failed\n"); - uvm_km_free(kernel_map, (vaddr_t)sc->sc_time, - PAGE_SIZE, UVM_KMF_WIRED); - sc->sc_time = NULL; - return; - } - - wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); - sc->sc_paddr = pa; - - sc->sc_dev = self; - - ti = sc->sc_time; - do { - ti_version = pvclock_read_begin(ti); - flags = ti->ti_flags; - } while (!pvclock_read_done(ti, ti_version)); - - sc->sc_tc = &pvclock_timecounter; - sc->sc_tc->tc_name = device_xname(sc->sc_dev); - - sc->sc_tc->tc_frequency = 1000000000ULL; - sc->sc_tc->tc_priv = sc; - - pvclock_lastcount = 0; - - /* Better than HPET but below TSC */ - sc->sc_tc->tc_quality = 1500; - - if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { - /* if tsc is not stable, set a lower priority */ - /* Better than i8254 but below HPET */ - sc->sc_tc->tc_quality = 500; - } - - tc_init(sc->sc_tc); -} - -int -pvclock_activate(device_t self, devact_t act) -{ - struct pvclock_softc *sc = (struct pvclock_softc *)self; - int rv = 0; - paddr_t pa = sc->sc_paddr; - - switch (act) { - case DVACT_DEACTIVATE: - wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); - break; - } - - return (rv); -} diff --git a/sys/arch/x86/x86/pvreg.h b/sys/arch/x86/x86/pvreg.h deleted file mode 100644 index c5c60ce0e8f85..0000000000000 --- a/sys/arch/x86/x86/pvreg.h +++ /dev/null @@ -1,118 +0,0 @@ -/* $OpenBSD: pvreg.h,v 1.6 2019/05/13 15:40:34 pd Exp $ */ - -/* - * Copyright (c) 2015 Reyk Floeter - * Copyright (c) 2015 Stefan Fritsch - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef _DEV_PV_PVBUS_H_ -#define _DEV_PV_PVBUS_H_ - -#define CPUID_HV_SIGNATURE_START 0x40000000 -#define CPUID_HV_SIGNATURE_END 0x40010000 -#define CPUID_HV_SIGNATURE_STEP 0x100 -#define CPUID_HV_SIGNATURE_STRLEN 12 - -/* - * KVM - */ -#define CPUID_OFFSET_KVM_FEATURES 0x1 - -#define KVM_FEATURE_CLOCKSOURCE 0 /* deprecated */ -#define KVM_FEATURE_NOP_IO_DELAY 1 -#define KVM_FEATURE_MMU_OP 2 /* deprecated */ -#define KVM_FEATURE_CLOCKSOURCE2 3 -#define KVM_FEATURE_ASYNC_PF 4 -#define KVM_FEATURE_STEAL_TIME 5 -#define KVM_FEATURE_PV_EOI 6 -#define KVM_FEATURE_PV_UNHALT 7 -#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 - -#define KVM_MSR_EOI_EN 0x4b564d04 -#define KVM_PV_EOI_BIT 0 - -#define KVM_MSR_WALL_CLOCK 0x4b564d00 -#define KVM_MSR_SYSTEM_TIME 0x4b564d01 - -struct pvclock_wall_clock { - uint32_t wc_version; - uint32_t wc_sec; - uint32_t wc_nsec; -} __packed; - -struct pvclock_time_info { - uint32_t ti_version; - uint32_t ti_pad0; - uint64_t ti_tsc_timestamp; - uint64_t ti_system_time; - uint32_t ti_tsc_to_system_mul; - int8_t ti_tsc_shift; - uint8_t ti_flags; - uint8_t ti_pad[2]; -} __packed; - -#define PVCLOCK_FLAG_TSC_STABLE 0x01 -#define PVCLOCK_SYSTEM_TIME_ENABLE 0x01 - -/* - * Hyper-V - */ -#define CPUID_OFFSET_HYPERV_INTERFACE 0x1 -#define CPUID_OFFSET_HYPERV_VERSION 0x2 -#define CPUID_OFFSET_HYPERV_FEATURES 0x3 -#define CPUID_OFFSET_HYPERV_ENLIGHTENMENT_INFO 0x4 -#define CPUID_OFFSET_HYPERV_IMPL_LIMITS 0x5 - -#define HYPERV_VERSION_EAX_BUILD_NUMBER 0 -#define HYPERV_VERSION_EBX_MAJOR_M 0xffff0000 -#define HYPERV_VERSION_EBX_MAJOR_S 16 -#define HYPERV_VERSION_EBX_MINOR_M 0x0000ffff -#define HYPERV_VERSION_EBX_MINOR_S 0 -#define HYPERV_VERSION_ECX_SERVICE_PACK 0 -#define HYPERV_VERSION_EDX_SERVICE_BRANCH_M 0xff000000 -#define HYPERV_VERSION_EDX_SERVICE_BRANCH_S 24 -#define HYPERV_VERSION_EDX_SERVICE_NUMBER_M 0x00ffffff -#define HYPERV_VERSION_EDX_SERVICE_NUMBER_S 0 - -#define HYPERV_VERSION_WS2008 0x00060000 -#define HYPERV_VERSION_WIN7 0x00060001 -#define HYPERV_VERSION_WIN8 0x00060002 -#define HYPERV_VERSION_WIN8_1 0x00060003 -#define HYPERV_VERSION_WIN10 0x00100000 - -#define HYPERV_FEATURE_EAX_VP_RUNTIME 0 -#define HYPERV_FEATURE_EAX_TIME_REF_COUNT 1 -#define HYPERV_FEATURE_EAX_SYNIC 2 -#define HYPERV_FEATURE_EAX_STIMER 3 -#define HYPERV_FEATURE_EAX_APIC 4 -#define HYPERV_FEATURE_EAX_HYPERCALL 5 -#define HYPERV_FEATURE_EAX_VP_INDEX 6 -#define HYPERV_FEATURE_EAX_MSR_RESET 7 -#define HYPERV_FEATURE_EAX_STATS_PAGES 8 -#define HYPERV_FEATURE_EAX_REF_TSC 9 -#define HYPERV_FEATURE_EAX_GUEST_IDLE 10 -#define HYPERV_FEATURE_EAX_TIMER_FREQ 11 -#define HYPERV_FEATURE_EAX_DEBUG 12 - -/* - * Xen - */ -#define CPUID_OFFSET_XEN_VERSION 0x1 -#define CPUID_OFFSET_XEN_HYPERCALL 0x2 - -#define XEN_VERSION_MAJOR_S 16 -#define XEN_VERSION_MINOR_M 0xffff - -#endif /* _DEV_PV_PVBUS_H_ */ From 0305c7ba8dda90fc090bca72d51d9869bfd5d165 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 6 Jan 2024 08:24:01 +0100 Subject: [PATCH 048/114] fix: no need for this anymore --- sys/dev/virtio/virtio_mmiovar.h | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/dev/virtio/virtio_mmiovar.h b/sys/dev/virtio/virtio_mmiovar.h index a862dd4ae3dd9..896915d345baf 100644 --- a/sys/dev/virtio/virtio_mmiovar.h +++ b/sys/dev/virtio/virtio_mmiovar.h @@ -36,7 +36,6 @@ struct virtio_mmio_softc { bus_space_tag_t sc_iot; bus_space_handle_t sc_ioh; bus_size_t sc_iosize; - uint8_t sc_ver; bool sc_le_regs; void *sc_ih; From ac4b48e2558753e2820060f1aa7d63d0aa8810a7 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 7 Jan 2024 21:16:15 +0100 Subject: [PATCH 049/114] feat: dropped PCI bus --- sys/arch/amd64/amd64/amd64_mainbus.c | 5 +++-- sys/dev/virtio/cmdlinevar.h | 4 +++- sys/dev/virtio/files.virtio | 2 +- sys/dev/virtio/virtio_mmio_cmdline.c | 19 +++++++++++-------- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index 384e808214589..3af0025ce1cba 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -130,6 +130,8 @@ int mp_eisa_bus = -1; extern bool acpi_present; extern bool mpacpi_active; +extern struct x86_bus_dma_tag cmdline_bus_dma_tag; + # ifdef MPVERBOSE # if MPVERBOSE > 0 int mp_verbose = MPVERBOSE; @@ -239,8 +241,7 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) } #endif mba.mba_cmdline.memt = x86_bus_space_mem; - mba.mba_cmdline.dmat = &pci_bus_dma_tag; - mba.mba_cmdline.dmat64 = &pci_bus_dma64_tag; + mba.mba_cmdline.dmat = &cmdline_bus_dma_tag; config_found(self, &mba.mba_cmdline, NULL, CFARGS(.iattr = "cmdlinebus")); diff --git a/sys/dev/virtio/cmdlinevar.h b/sys/dev/virtio/cmdlinevar.h index 443a67790e62e..81ec2a843aee4 100644 --- a/sys/dev/virtio/cmdlinevar.h +++ b/sys/dev/virtio/cmdlinevar.h @@ -1,7 +1,9 @@ +#ifndef _VIRTIO_CMDLINEBUSVAR_H_ +#define _VIRTIO_CMDLINEBUSVAR_H_ struct cmdline_attach_args { bus_space_tag_t memt; bus_dma_tag_t dmat; - bus_dma_tag_t dmat64; }; +#endif diff --git a/sys/dev/virtio/files.virtio b/sys/dev/virtio/files.virtio index b8e513890fa6a..2c1ccf6ff7f81 100644 --- a/sys/dev/virtio/files.virtio +++ b/sys/dev/virtio/files.virtio @@ -3,7 +3,7 @@ # XXX the contents of the following included file should be moved here include "dev/pci/files.virtio" -define cmdlinebus { } +define cmdlinebus {} file dev/virtio/virtio_mmio.c virtio_mmio diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index a8734ce110927..6ddc25f33195e 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -38,11 +38,21 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); #include #include +#include + #include #include "ioapic.h" #define VMMIOSTR "virtio_mmio.device=" +struct x86_bus_dma_tag cmdline_bus_dma_tag = { + ._tag_needs_free = 0, + ._bounce_thresh = 0, + ._bounce_alloc_lo = 0, + ._bounce_alloc_hi = 0, + ._may_bounce = NULL, +}; + struct mmio_args { uint64_t sz; uint64_t baseaddr; @@ -209,17 +219,10 @@ virtio_mmio_cmdline_do_attach(device_t self, int error; msc->sc_iot = caa->memt; + vsc->sc_dmat = caa->dmat; msc->sc_iosize = margs->sz; vsc->sc_dev = self; - if (BUS_DMA_TAG_VALID(caa->dmat64)) { - aprint_verbose(": using 64-bit DMA"); - vsc->sc_dmat = caa->dmat64; - } else { - aprint_verbose(": using 32-bit DMA"); - vsc->sc_dmat = caa->dmat; - } - error = bus_space_map( msc->sc_iot, margs->baseaddr, margs->sz, 0, &msc->sc_ioh From 9783ca31b4202cee10defd1bcd596847a18fdd9b Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 08:38:50 +0100 Subject: [PATCH 050/114] feat: introduced pvbus --- sys/arch/amd64/amd64/amd64_mainbus.c | 21 ++++++++++----- sys/arch/amd64/conf/files.amd64 | 5 +++- sys/dev/pv/files.pv | 8 ++++++ sys/dev/pv/pvbus.c | 39 ++++++++++++++++++++++++++++ sys/dev/pv/pvvar.h | 17 ++++++++++++ sys/dev/virtio/files.virtio | 4 +-- sys/dev/virtio/virtio_mmio_cmdline.c | 16 ++++++------ 7 files changed, 92 insertions(+), 18 deletions(-) create mode 100644 sys/dev/pv/files.pv create mode 100644 sys/dev/pv/pvbus.c create mode 100644 sys/dev/pv/pvvar.h diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index 3af0025ce1cba..e1e62b53486fa 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: amd64_mainbus.c,v 1.7 2021/08/07 16:18:41 thorpej Ex #include "isadma.h" #include "acpica.h" #include "ipmi.h" +#include "pvbus.h" #include "opt_acpi.h" #include "opt_mpbios.h" @@ -79,7 +80,9 @@ __KERNEL_RCSID(0, "$NetBSD: amd64_mainbus.c,v 1.7 2021/08/07 16:18:41 thorpej Ex #include #endif /* __HAVE_PCI_MSI_MSIX */ #endif -#include +#if NPVBUS > 0 +#include +#endif /* * XXXfvdl ACPI @@ -101,7 +104,9 @@ union amd64_mainbus_attach_args { #if NIPMI > 0 struct ipmi_attach_args mba_ipmi; #endif - struct cmdline_attach_args mba_cmdline; +#if NPVBUS > 0 + struct pvbus_attach_args mba_pvba; +#endif }; /* @@ -159,7 +164,7 @@ amd64_mainbus_match(device_t parent, cfdata_t match, void *aux) void amd64_mainbus_attach(device_t parent, device_t self, void *aux) { -#if NISA > 0 || NPCI > 0 || NACPICA > 0 || NIPMI > 0 +#if NISA > 0 || NPCI > 0 || NACPICA > 0 || NIPMI > 0 || NPVBUS > 0 union amd64_mainbus_attach_args mba; #endif #if NISADMA > 0 && NACPICA > 0 @@ -240,10 +245,12 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) CFARGS(.iattr = "isabus")); } #endif - mba.mba_cmdline.memt = x86_bus_space_mem; - mba.mba_cmdline.dmat = &cmdline_bus_dma_tag; - config_found(self, &mba.mba_cmdline, NULL, - CFARGS(.iattr = "cmdlinebus")); + +#if NPVBUS > 0 + mba.mba_pvba.pvba_busname = "pvbus"; + config_found(self, &mba.mba_pvba.pvba_busname, NULL, + CFARGS(.iattr = "pvbus")); +#endif if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, "couldn't establish power handler\n"); diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64 index 3497439097709..14cdec8350606 100644 --- a/sys/arch/amd64/conf/files.amd64 +++ b/sys/arch/amd64/conf/files.amd64 @@ -94,7 +94,7 @@ include "dev/i2o/files.i2o" # XXX BIOS32 only if something that uses it is configured! device mainbus: isabus, pcibus, bios32, acpibus, cpubus, ioapicbus, - ipmibus, hypervisorbus, cmdlinebus + ipmibus, hypervisorbus, cmdlinebus, pvbus attach mainbus at root file arch/amd64/amd64/amd64_mainbus.c mainbus & !xenpv file arch/x86/x86/mainbus.c mainbus @@ -200,4 +200,7 @@ file dev/acpi/vmbus_acpi.c vmbus_acpi # VMEbus support include "dev/vme/files.vme" +# PVbus support +include "dev/pv/files.pv" + include "arch/amd64/conf/majors.amd64" diff --git a/sys/dev/pv/files.pv b/sys/dev/pv/files.pv new file mode 100644 index 0000000000000..1775b9edf5af5 --- /dev/null +++ b/sys/dev/pv/files.pv @@ -0,0 +1,8 @@ +define pvbus {} + +device pv {} +attach pv at pvbus +file dev/pv/pvbus.c pvbus needs-flag + +attach virtio at pv with mmio_cmdline: virtio_mmio +file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c new file mode 100644 index 0000000000000..a10b50d32a0cf --- /dev/null +++ b/sys/dev/pv/pvbus.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +#include + +#include + +struct x86_bus_dma_tag pvbus_bus_dma_tag = { + ._tag_needs_free = 0, + ._bounce_thresh = 0, + ._bounce_alloc_lo = 0, + ._bounce_alloc_hi = 0, + ._may_bounce = NULL, +}; + +static int +pv_match(device_t parent, cfdata_t match, void *aux) +{ + return 1; +} + +static void +pv_attach(device_t parent, device_t self, void *aux) { + struct pv_attach_args pvaa; + + pvaa.pvaa_memt = x86_bus_space_mem; + pvaa.pvaa_dmat = &pvbus_bus_dma_tag; + + aprint_naive("\n"); + + config_found(self, &pvaa, NULL, CFARGS_NONE); +} + +CFATTACH_DECL_NEW(pv, sizeof(struct pv_softc), + pv_match, pv_attach, NULL, NULL); diff --git a/sys/dev/pv/pvvar.h b/sys/dev/pv/pvvar.h new file mode 100644 index 0000000000000..222f7d23aa5eb --- /dev/null +++ b/sys/dev/pv/pvvar.h @@ -0,0 +1,17 @@ +#ifndef _PVBUS_PVVAR_H_ +#define _PVBUS_PVVAR_H_ + +struct pv_softc { + device_t sc_dev; +}; + +struct pvbus_attach_args { + const char *pvba_busname; +}; + +struct pv_attach_args { + bus_space_tag_t pvaa_memt; + bus_dma_tag_t pvaa_dmat; +}; + +#endif diff --git a/sys/dev/virtio/files.virtio b/sys/dev/virtio/files.virtio index 2c1ccf6ff7f81..0584a57e1c4e6 100644 --- a/sys/dev/virtio/files.virtio +++ b/sys/dev/virtio/files.virtio @@ -11,5 +11,5 @@ device viocon attach viocon at virtio file dev/virtio/viocon.c viocon -attach virtio at cmdlinebus with mmio_cmdline: virtio_mmio -file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline +#attach virtio at cmdlinebus with mmio_cmdline: virtio_mmio +#file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 6ddc25f33195e..1ee76c7a7c415 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -36,6 +36,7 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); #define VIRTIO_PRIVATE #include #include +#include #include #include @@ -68,7 +69,7 @@ struct virtio_mmio_cmdline_softc { static int virtio_mmio_cmdline_match(device_t, cfdata_t, void *); static void virtio_mmio_cmdline_attach(device_t, device_t, void *); static int virtio_mmio_cmdline_do_attach(device_t, - struct cmdline_attach_args *, + struct pv_attach_args *, struct mmio_args *); static int virtio_mmio_cmdline_detach(device_t, int); static int virtio_mmio_cmdline_rescan(device_t, const char *, const int *); @@ -160,7 +161,7 @@ static void virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) { struct virtio_mmio_cmdline_softc *sc = device_private(self); - struct cmdline_attach_args *caa = aux; + struct pv_attach_args *pvaa = aux; struct mmio_args *margs = &sc->margs; char *v, *n, cmdline[128]; int error; @@ -194,7 +195,7 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) parsearg(margs, p); error = virtio_mmio_cmdline_do_attach(self, - caa, margs); + pvaa, margs); if (error) return; @@ -202,15 +203,14 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) if (hasnext) { p = n+1; idx++; - config_found(parent, caa, NULL, - CFARGS(.iattr = "cmdlinebus")); + config_found(parent, pvaa, NULL, CFARGS_NONE); } } } static int virtio_mmio_cmdline_do_attach(device_t self, - struct cmdline_attach_args *caa, + struct pv_attach_args *pvaa, struct mmio_args *margs) { struct virtio_mmio_cmdline_softc *sc = device_private(self); @@ -218,8 +218,8 @@ virtio_mmio_cmdline_do_attach(device_t self, struct virtio_softc *const vsc = &msc->sc_sc; int error; - msc->sc_iot = caa->memt; - vsc->sc_dmat = caa->dmat; + msc->sc_iot = pvaa->pvaa_memt; + vsc->sc_dmat = pvaa->pvaa_dmat; msc->sc_iosize = margs->sz; vsc->sc_dev = self; From 541ecfdd3bb71e18a54b8ac3b02eddbaba3be736 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 08:42:17 +0100 Subject: [PATCH 051/114] fix: aprint_normal --- sys/dev/pv/pvbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c index a10b50d32a0cf..d76910ddd3921 100644 --- a/sys/dev/pv/pvbus.c +++ b/sys/dev/pv/pvbus.c @@ -31,6 +31,7 @@ pv_attach(device_t parent, device_t self, void *aux) { pvaa.pvaa_dmat = &pvbus_bus_dma_tag; aprint_naive("\n"); + aprint_normal("\n"); config_found(self, &pvaa, NULL, CFARGS_NONE); } From 98662790ff7ec61433f4c67c4e009c8097d7bb38 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 08:46:33 +0100 Subject: [PATCH 052/114] fix: no need for cmdlinebus anymore --- sys/arch/amd64/amd64/amd64_mainbus.c | 2 -- sys/arch/amd64/conf/files.amd64 | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index e1e62b53486fa..574583d9fc3c1 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -135,8 +135,6 @@ int mp_eisa_bus = -1; extern bool acpi_present; extern bool mpacpi_active; -extern struct x86_bus_dma_tag cmdline_bus_dma_tag; - # ifdef MPVERBOSE # if MPVERBOSE > 0 int mp_verbose = MPVERBOSE; diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64 index 14cdec8350606..d4e2fc5f38b6c 100644 --- a/sys/arch/amd64/conf/files.amd64 +++ b/sys/arch/amd64/conf/files.amd64 @@ -94,7 +94,7 @@ include "dev/i2o/files.i2o" # XXX BIOS32 only if something that uses it is configured! device mainbus: isabus, pcibus, bios32, acpibus, cpubus, ioapicbus, - ipmibus, hypervisorbus, cmdlinebus, pvbus + ipmibus, hypervisorbus, pvbus attach mainbus at root file arch/amd64/amd64/amd64_mainbus.c mainbus & !xenpv file arch/x86/x86/mainbus.c mainbus From 822ea512e467a9a1713eba4820ba43319b303a25 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 08:48:47 +0100 Subject: [PATCH 053/114] fix: no need for cmdlinebus anymore --- sys/dev/virtio/files.virtio | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/dev/virtio/files.virtio b/sys/dev/virtio/files.virtio index 0584a57e1c4e6..8008b59ce14eb 100644 --- a/sys/dev/virtio/files.virtio +++ b/sys/dev/virtio/files.virtio @@ -3,8 +3,6 @@ # XXX the contents of the following included file should be moved here include "dev/pci/files.virtio" -define cmdlinebus {} - file dev/virtio/virtio_mmio.c virtio_mmio device viocon From 44c287de0f541e748a448ac1d1387d11bfcf8f58 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 08:57:01 +0100 Subject: [PATCH 054/114] fix: sync with trunk --- sys/dev/pci/virtio.c | 5 ++--- sys/dev/pci/virtioreg.h | 29 ----------------------------- 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/sys/dev/pci/virtio.c b/sys/dev/pci/virtio.c index 8e55c64f2e0ea..7c26edc9384e5 100644 --- a/sys/dev/pci/virtio.c +++ b/sys/dev/pci/virtio.c @@ -74,8 +74,7 @@ static void virtio_reset_vq(struct virtio_softc *, void virtio_set_status(struct virtio_softc *sc, int status) { - if (sc->sc_ops->set_status) - sc->sc_ops->set_status(sc, status); + sc->sc_ops->set_status(sc, status); } /* @@ -817,7 +816,6 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, /* alloc and map the memory */ r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0, &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK); - if (r != 0) { aprint_error_dev(sc->sc_dev, "virtqueue %d for %s allocation failed, " @@ -893,6 +891,7 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, "descriptors\n", size_indirect, maxnsegs * vq_num); return 0; + err: sc->sc_ops->setup_queue(sc, vq->vq_index, 0); if (vq->vq_dmamap) diff --git a/sys/dev/pci/virtioreg.h b/sys/dev/pci/virtioreg.h index a1f5d78853d5c..bb9e186d24196 100644 --- a/sys/dev/pci/virtioreg.h +++ b/sys/dev/pci/virtioreg.h @@ -98,21 +98,6 @@ #define VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET 64 #define VIRTIO_CONFIG_DEVICE_STATUS_FAILED 128 -/* Status byte for guest to report progress. */ -#define VIRTIO_CONFIG_STATUS_RESET 0x00 -/* We have seen device and processed generic fields. */ -#define VIRTIO_CONFIG_STATUS_ACK 0x01 -/* We have found a driver for the device. */ -#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 -/* Driver has used its parts of the config, and is happy. */ -#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 -/* Driver has finished configuring features (modern only). */ -#define VIRTIO_CONFIG_S_FEATURES_OK 0x08 -/* Device entered invalid state, driver must reset it. */ -#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40 -/* We've given up on this device. */ -#define VIRTIO_CONFIG_STATUS_FAILED 0x80 - /* common ISR status flags */ #define VIRTIO_CONFIG_ISR_QUEUE_INTERRUPT 1 #define VIRTIO_CONFIG_ISR_CONFIG_CHANGE 2 @@ -159,20 +144,6 @@ */ #define VRING_AVAIL_F_NO_INTERRUPT 1 -/* - * Some VirtIO feature bits (currently bits 28 through 34) are - * reserved for the transport being used (eg. virtio_ring), the - * rest are per-device feature bits. - */ -#define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 34 - -/* Support for indirect buffer descriptors. */ -#define VIRTIO_RING_F_INDIRECT_DESC (1UL << 28) - -/* Support to suppress interrupt until specific index is reached. */ -#define VIRTIO_RING_F_EVENT_IDX (1UL << 29) - /* Virtio ring descriptors: 16 bytes. * These can chain together via "next". */ struct vring_desc { From 71d1d98cf08f532844040e6149e9f9741047de72 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 09:05:01 +0100 Subject: [PATCH 055/114] fix: sync with trunk --- sys/dev/virtio/virtio_mmio.c | 54 ++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio.c b/sys/dev/virtio/virtio_mmio.c index c5342e5f9e7d7..f9824573f006e 100644 --- a/sys/dev/virtio/virtio_mmio.c +++ b/sys/dev/virtio/virtio_mmio.c @@ -1,4 +1,4 @@ -/* $NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp $ */ +/* $NetBSD: virtio_mmio.c,v 1.13 2024/01/06 06:59:33 thorpej Exp $ */ /* $OpenBSD: virtio_mmio.c,v 1.2 2017/02/24 17:12:31 patrick Exp $ */ /*- @@ -58,7 +58,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.12 2024/01/02 07:24:50 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: virtio_mmio.c,v 1.13 2024/01/06 06:59:33 thorpej Exp $"); #include #include @@ -266,7 +266,7 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) { struct virtio_softc *vsc = &sc->sc_sc; device_t self = vsc->sc_dev; - uint32_t id, magic, ver; + uint32_t id, magic; int virtio_vers; magic = bus_space_read_4(sc->sc_iot, sc->sc_ioh, @@ -283,15 +283,16 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) vsc->sc_bus_endian = READ_ENDIAN; vsc->sc_struct_endian = STRUCT_ENDIAN; - ver = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); - switch (ver) { + sc->sc_mmio_vers = virtio_mmio_reg_read(sc, VIRTIO_MMIO_VERSION); + switch (sc->sc_mmio_vers) { case 1: /* we could use PAGE_SIZE, but virtio(4) assumes 4KiB for now */ virtio_mmio_reg_write(sc, VIRTIO_MMIO_V1_GUEST_PAGE_SIZE, VIRTIO_PAGE_SIZE); vsc->sc_ops = &virtio_mmio_v1_ops; /* - * MMIO v1 ("legacy") sets up the queue like VirtIO 0.9, + * MMIO v1 ("legacy") is documented in the VirtIO 0.9.x + * draft(s) and uses the same page-oriented queue setup, * so that's what we'll report as the VirtIO version. */ virtio_vers = 0; @@ -307,10 +308,10 @@ virtio_mmio_common_attach(struct virtio_mmio_softc *sc) default: aprint_error_dev(vsc->sc_dev, - "unknown version 0x%08x; giving up\n", ver); + "unknown version 0x%08x; giving up\n", sc->sc_mmio_vers); return; } - aprint_normal_dev(self, "VirtIO-MMIO v%d\n", ver); + aprint_normal_dev(self, "VirtIO-MMIO-v%u\n", sc->sc_mmio_vers); id = virtio_mmio_reg_read(sc, VIRTIO_MMIO_DEVICE_ID); if (id == 0) { @@ -390,6 +391,16 @@ virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t if (device_features & VIRTIO_F_VERSION_1) { driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY; } else { + /* + * Require version 1 for MMIO-v2 transport. + */ + if (sc->sc_mmio_vers >= 2) { + aprint_error_dev(self, "MMIO-v%u requires version 1\n", + sc->sc_mmio_vers); + virtio_mmio_set_status(vsc, + VIRTIO_CONFIG_DEVICE_STATUS_FAILED); + return; + } /* * If the driver requires version 1, but the device doesn't * support it, fail now. @@ -410,14 +421,29 @@ virtio_mmio_negotiate_features(struct virtio_softc *vsc, uint64_t virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 1); virtio_mmio_reg_write(sc, VIRTIO_MMIO_DRIVER_FEATURES, (uint32_t)(negotiated >> 32)); - virtio_mmio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK); - device_status = virtio_mmio_get_status(vsc); - if ((device_status & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) { - aprint_error_dev(self, "feature negotiation failed\n"); + /* + * FEATURES_OK status is not present pre-1.0. + */ + if (device_features & VIRTIO_F_VERSION_1) { virtio_mmio_set_status(vsc, - VIRTIO_CONFIG_DEVICE_STATUS_FAILED); - return; + VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK); + device_status = virtio_mmio_get_status(vsc); + if ((device_status & + VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) { + aprint_error_dev(self, "feature negotiation failed\n"); + virtio_mmio_set_status(vsc, + VIRTIO_CONFIG_DEVICE_STATUS_FAILED); + return; + } + } + + if (negotiated & VIRTIO_F_VERSION_1) { + /* + * All VirtIO 1.0 access is little-endian. + */ + vsc->sc_bus_endian = LITTLE_ENDIAN; + vsc->sc_struct_endian = LITTLE_ENDIAN; } vsc->sc_active_features = negotiated; From c441291d6070e7e59426f637916025cf87fb08de Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 8 Jan 2024 21:46:42 +0100 Subject: [PATCH 056/114] fix: licenses --- sys/dev/pv/pvbus.c | 27 +++++++++++++++++++++++++ sys/dev/pv/pvvar.h | 27 +++++++++++++++++++++++++ sys/dev/virtio/virtio_mmio_cmdline.c | 30 ++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c index d76910ddd3921..f82763bc243d7 100644 --- a/sys/dev/pv/pvbus.c +++ b/sys/dev/pv/pvbus.c @@ -1,3 +1,30 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 2024 Emile 'iMil' Heitor. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + #include #include #include diff --git a/sys/dev/pv/pvvar.h b/sys/dev/pv/pvvar.h index 222f7d23aa5eb..1a59fd09719b0 100644 --- a/sys/dev/pv/pvvar.h +++ b/sys/dev/pv/pvvar.h @@ -1,3 +1,30 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 2024 Emile 'iMil' Heitor. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + #ifndef _PVBUS_PVVAR_H_ #define _PVBUS_PVVAR_H_ diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 1ee76c7a7c415..8bd0654025858 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -1,4 +1,31 @@ -/*- +/* $NetBSD$ */ + +/* + * Copyright (c) 2024 Emile 'iMil' Heitor. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* * Copyright (c) 2022 Colin Percival * * Redistribution and use in source and binary forms, with or without @@ -35,7 +62,6 @@ __KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); #define VIRTIO_PRIVATE #include -#include #include #include From d10326fce54e8dfd2faeb6a428ce25a56498a2ab Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 9 Jan 2024 07:06:44 +0100 Subject: [PATCH 057/114] fix: NetBSD license --- sys/dev/pv/pvbus.c | 30 +++++++++++++++----------- sys/dev/pv/pvvar.h | 30 +++++++++++++++----------- sys/dev/virtio/cmdlinevar.h | 9 -------- sys/dev/virtio/virtio_mmio_cmdline.c | 32 ++++++++++++++++------------ 4 files changed, 52 insertions(+), 49 deletions(-) delete mode 100644 sys/dev/virtio/cmdlinevar.h diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c index f82763bc243d7..f1101d623051a 100644 --- a/sys/dev/pv/pvbus.c +++ b/sys/dev/pv/pvbus.c @@ -1,7 +1,11 @@ /* $NetBSD$ */ -/* - * Copyright (c) 2024 Emile 'iMil' Heitor. +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emile 'iMil' Heitor. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,17 +16,17 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ #include diff --git a/sys/dev/pv/pvvar.h b/sys/dev/pv/pvvar.h index 1a59fd09719b0..bba44b926dc0b 100644 --- a/sys/dev/pv/pvvar.h +++ b/sys/dev/pv/pvvar.h @@ -1,7 +1,11 @@ /* $NetBSD$ */ -/* - * Copyright (c) 2024 Emile 'iMil' Heitor. +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emile 'iMil' Heitor. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,17 +16,17 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _PVBUS_PVVAR_H_ diff --git a/sys/dev/virtio/cmdlinevar.h b/sys/dev/virtio/cmdlinevar.h deleted file mode 100644 index 81ec2a843aee4..0000000000000 --- a/sys/dev/virtio/cmdlinevar.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _VIRTIO_CMDLINEBUSVAR_H_ -#define _VIRTIO_CMDLINEBUSVAR_H_ - -struct cmdline_attach_args { - bus_space_tag_t memt; - bus_dma_tag_t dmat; -}; - -#endif diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 8bd0654025858..f97773a206e50 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -1,7 +1,11 @@ /* $NetBSD$ */ -/* - * Copyright (c) 2024 Emile 'iMil' Heitor. +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emile 'iMil' Heitor. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -12,20 +16,20 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ -/* +/*- * Copyright (c) 2022 Colin Percival * * Redistribution and use in source and binary forms, with or without From 53af54c3f85436ae92b2ff92f766bfe58ab06771 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 9 Jan 2024 16:46:36 +0100 Subject: [PATCH 058/114] fix: not needed anymore --- sys/dev/virtio/files.virtio | 3 --- 1 file changed, 3 deletions(-) diff --git a/sys/dev/virtio/files.virtio b/sys/dev/virtio/files.virtio index 8008b59ce14eb..4e399624ab0bf 100644 --- a/sys/dev/virtio/files.virtio +++ b/sys/dev/virtio/files.virtio @@ -8,6 +8,3 @@ file dev/virtio/virtio_mmio.c virtio_mmio device viocon attach viocon at virtio file dev/virtio/viocon.c viocon - -#attach virtio at cmdlinebus with mmio_cmdline: virtio_mmio -#file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline From b61f6eb24d5ddf52e7218d1bdc61bc8396007ea1 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 9 Jan 2024 17:20:26 +0100 Subject: [PATCH 059/114] fix: cleanup --- sys/dev/pci/virtioreg.h | 1 + sys/dev/pci/virtiovar.h | 2 -- sys/dev/virtio/virtio_mmio_cmdline.c | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/sys/dev/pci/virtioreg.h b/sys/dev/pci/virtioreg.h index bb9e186d24196..2d10af97dc1c3 100644 --- a/sys/dev/pci/virtioreg.h +++ b/sys/dev/pci/virtioreg.h @@ -144,6 +144,7 @@ */ #define VRING_AVAIL_F_NO_INTERRUPT 1 + /* Virtio ring descriptors: 16 bytes. * These can chain together via "next". */ struct vring_desc { diff --git a/sys/dev/pci/virtiovar.h b/sys/dev/pci/virtiovar.h index 704e0c29167aa..fffa9c698b6a7 100644 --- a/sys/dev/pci/virtiovar.h +++ b/sys/dev/pci/virtiovar.h @@ -132,9 +132,7 @@ struct virtio_ops { uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t); void (*setup_queue)(struct virtio_softc *, uint16_t, uint64_t); void (*set_status)(struct virtio_softc *, int); - int (*get_status)(struct virtio_softc *); void (*neg_features)(struct virtio_softc *, uint64_t); - int (*finalize_features)(struct virtio_softc *); int (*alloc_interrupts)(struct virtio_softc *); void (*free_interrupts)(struct virtio_softc *); int (*setup_interrupts)(struct virtio_softc *, int); diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index f97773a206e50..213f8bc7a429a 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -54,9 +54,6 @@ * SUCH DAMAGE. */ -#include -__KERNEL_RCSID(0, "$NetBSD: virtio_mmio_cmdline.c"); - #include #include #include From c66574abd9089cde54a8aa5d247973861dd69535 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 9 Jan 2024 17:28:34 +0100 Subject: [PATCH 060/114] fix: cleanup --- sys/arch/x86/x86/mpbios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/x86/x86/mpbios.c b/sys/arch/x86/x86/mpbios.c index fa576513258db..85b3e38e3e256 100644 --- a/sys/arch/x86/x86/mpbios.c +++ b/sys/arch/x86/x86/mpbios.c @@ -281,6 +281,7 @@ mpbios_unmap(struct mp_map *handle) pmap_update(pmap_kernel()); uvm_km_free(kernel_map, handle->baseva, handle->vsize, UVM_KMF_VAONLY); } + /* * Look for an Intel MP spec table, indicating SMP capable hardware. */ @@ -389,7 +390,6 @@ mpbios_probe(device_t self) "MP Configuration Table checksum mismatch\n"); goto err; } - return 10; err: if (mp_fps) { From b146177e2c29f977c65811b10203dbd1906a340e Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 9 Jan 2024 17:31:15 +0100 Subject: [PATCH 061/114] fix: cleanup --- sys/arch/amd64/amd64/amd64_mainbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/amd64/amd64/amd64_mainbus.c b/sys/arch/amd64/amd64/amd64_mainbus.c index 574583d9fc3c1..f9d1118a54fc8 100644 --- a/sys/arch/amd64/amd64/amd64_mainbus.c +++ b/sys/arch/amd64/amd64/amd64_mainbus.c @@ -165,6 +165,7 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) #if NISA > 0 || NPCI > 0 || NACPICA > 0 || NIPMI > 0 || NPVBUS > 0 union amd64_mainbus_attach_args mba; #endif + #if NISADMA > 0 && NACPICA > 0 /* * ACPI needs ISA DMA initialized before they start probing. @@ -218,7 +219,6 @@ amd64_mainbus_attach(device_t parent, device_t self, void *aux) if (npcibus == 0 && mpacpi_active) npcibus = mp_pci_scan(self, &mba.mba_pba, pcibusprint); #endif - #if defined(MPBIOS) && defined(MPBIOS_SCANPCI) if (npcibus == 0 && mpbios_scanned != 0) npcibus = mp_pci_scan(self, &mba.mba_pba, pcibusprint); From 5129f1b4260724d5d54f0e72c1cd9186956dbc67 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 10 Jan 2024 11:26:45 +0100 Subject: [PATCH 062/114] fix: cleanup and comment --- sys/dev/virtio/virtio_mmio_cmdline.c | 40 +++++++++++----------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 213f8bc7a429a..9cfa4ee4cc107 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -66,21 +66,11 @@ #include #include -#include - #include #include "ioapic.h" #define VMMIOSTR "virtio_mmio.device=" -struct x86_bus_dma_tag cmdline_bus_dma_tag = { - ._tag_needs_free = 0, - ._bounce_thresh = 0, - ._bounce_alloc_lo = 0, - ._bounce_alloc_hi = 0, - ._may_bounce = NULL, -}; - struct mmio_args { uint64_t sz; uint64_t baseaddr; @@ -196,29 +186,34 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) static int idx = 0; bool hasnext; + aprint_normal("\n"); + aprint_naive("\n"); + if (idx == 0) { strcpy(cmdline, xen_start_info.cmd_line); - aprint_verbose("\nkernel parameters: %s", cmdline); + aprint_verbose("kernel parameters: %s\n", cmdline); if ((p = strstr(cmdline, VMMIOSTR)) == NULL) return; } - while (*p) { + while (*p) { /* manual strtok() */ hasnext = false; - v = p; - while (*p && *p != ' ') + v = p; /* start of VMMIOSTR or next param */ + while (*p && *p != ' ') /* find end of param */ p++; if (*p) { - n = p; - *p = '\0'; - hasnext = true; + n = p; /* record end of param */ + *p = '\0'; /* end it */ + hasnext = true; /* more params to come */ } - p = v; - while (*p && *p != '=') + if (strncmp(v, VMMIOSTR, strlen(VMMIOSTR)) != 0) + continue; /* this was not an MMIO parameter */ + p = v; /* start of VMMIOSTR */ + while (*p && *p != '=') /* point to the value */ p++; if (*p) { p++; - aprint_normal("\nviommio: %s", p); + aprint_normal("viommio: %s\n", p); parsearg(margs, p); error = virtio_mmio_cmdline_do_attach(self, @@ -228,7 +223,7 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) return; } if (hasnext) { - p = n+1; + p = n+1; /* previously recorded end of param */ idx++; config_found(parent, pvaa, NULL, CFARGS_NONE); } @@ -262,9 +257,6 @@ virtio_mmio_cmdline_do_attach(device_t self, return error; } - aprint_normal("\n"); - aprint_naive("\n"); - msc->sc_alloc_interrupts = virtio_mmio_cmdline_alloc_interrupts; msc->sc_free_interrupts = virtio_mmio_cmdline_free_interrupts; From 18ae00b387a991d8b5b1a5b6cc05a4e4a3ae37a1 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 10 Jan 2024 14:16:46 +0100 Subject: [PATCH 063/114] fix: let's check sizes shall we --- sys/dev/virtio/virtio_mmio_cmdline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index 9cfa4ee4cc107..ec2fadc806634 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -190,7 +190,7 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) aprint_naive("\n"); if (idx == 0) { - strcpy(cmdline, xen_start_info.cmd_line); + strncpy(cmdline, xen_start_info.cmd_line, sizeof(cmdline)); aprint_verbose("kernel parameters: %s\n", cmdline); if ((p = strstr(cmdline, VMMIOSTR)) == NULL) return; From 478920783e13fcd9b24d1d1ff5141da1b5ebb431 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 11 Jan 2024 06:48:25 +0100 Subject: [PATCH 064/114] doc: added pv man --- share/man/man4/pv.4 | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 share/man/man4/pv.4 diff --git a/share/man/man4/pv.4 b/share/man/man4/pv.4 new file mode 100644 index 0000000000000..f25ba4a247549 --- /dev/null +++ b/share/man/man4/pv.4 @@ -0,0 +1,44 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" ported from OpenBSD +.Dd January 2024 +.Dt PV 4 +.Os +.Sh NAME +.Nm pv +.Nd paravirtual device tree root +.Sh SYNOPSIS +.Cd "pv* at pvbus?" +.Sh DESCRIPTION +.Nm +driver is used on virtual machines that are running on hypervisors. +It provides a pseudo-bus for all paravirtual devices that do not +attach to a well-known bus like +.Xr pci 4 . +.Sh SEE ALSO +.Xr virtio 4 , +.Xr mmio_virtio 4 From e1255e49d54e90f2f56380f50d44db25ecd49367 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 11 Jan 2024 07:24:11 +0100 Subject: [PATCH 065/114] doc: added virtio_mmio man --- share/man/man4/virtio_mmio.4 | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 share/man/man4/virtio_mmio.4 diff --git a/share/man/man4/virtio_mmio.4 b/share/man/man4/virtio_mmio.4 new file mode 100644 index 0000000000000..6f92cb3fdc4a8 --- /dev/null +++ b/share/man/man4/virtio_mmio.4 @@ -0,0 +1,75 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd January 2024 +.Dt VIRTIO_MMIO 4 +.Os NetBSD +.Sh NAME +.Nm virtio_mmio +.Nd VirtIO over memory mapped device. +.Sh SYNOPSIS +.Cd "pv* at pvbus?" +.Cd "virtio* at pv?" +.Pp +.Cd "acpi0 at mainbus0" +.Cd "virtio* at acpi?" +.Sh DESCRIPTION +.Nm +can be used in virtual environments without +.Xr pci 4 +support (a common situation in embedded devices models) might use simple +memory mapped device ( +.Nm +) instead of the +.Xr pci 4 +device. +.Pp +The memory mapped +.Xr virtio 4 +device behaviour is based on the +.Xr pci 4 +device specification. Therefore most operations including device initialization, +queues configuration and buffer transfers are nearly identical. +.Pp +Unlike +.Xr pci 4 +, +.Nm +provides no generic device discovery mechanism. For each device, the guest OS will +need to know the location of the registers and interrupt(s) used. +.Pp +Device location can be read from either +.Xr acpi 4 +or via kernel command line parameters, implemented as a +.Xr pv 4 +virtual device. +.Sh SEE ALSO +.Xr virtio 4 +.Pp +.Rs +.%T Virtual I/O Device (VIRTIO) Version 1.2 +.%U https://docs.oasis-open.org/virtio/virtio/v1.2/virtio-v1.2.html +.Re From 9361b4ca1f368210ec681436725332cf742f54f9 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sun, 14 Jan 2024 21:15:46 +0100 Subject: [PATCH 066/114] fix: performance analysis and fixes --- sys/arch/amd64/amd64/locore.S | 9 +++++ sys/arch/x86/x86/cpu.c | 6 ++- sys/arch/x86/x86/identcpu_subr.c | 6 +++ sys/arch/x86/x86/x86_machdep.c | 6 +++ sys/kern/init_main.c | 63 ++++++++++++++++++++++++++++++++ sys/kern/subr_autoconf.c | 4 +- 6 files changed, 91 insertions(+), 3 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 97519bcb6a1b0..e0e7997324796 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -386,6 +386,12 @@ END(biosextmem) .type _C_LABEL(lwp0uarea), @object LABEL(lwp0uarea) .quad 0 END(lwp0uarea) + .type _C_LABEL(boottime_low), @object +LABEL(boottime_low) .long 0 /* low part of rdtsc */ +END(boottime_low) + .type _C_LABEL(boottime_high), @object +LABEL(boottime_high) .long 0 /* low part of rdtsc */ +END(boottime_high) #ifndef XENPV .globl gdt64_lo @@ -1036,6 +1042,9 @@ END(start) /* entry point for Xen PVH */ .code32 ENTRY(start_genpvh) + rdtsc + movl %eax, RELOC(boottime_low) + movl %edx, RELOC(boottime_high) /* Xen doesn't start us with a valid gdt */ movl $RELOC(gdtdesc32), %eax lgdt (%eax) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 3d2feee61ec5c..2bb03bde427d2 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -1418,6 +1418,8 @@ cpu_get_tsc_freq(struct cpu_info *ci) int64_t overhead; if (CPU_IS_PRIMARY(ci) && cpu_hascounter()) { + if (ci->ci_data.cpu_cc_freq != 0) + return; /* * If it's the first call of this function, try to get TSC * freq from CPUID by calling cpu_tsc_freq_cpuid(). @@ -1425,8 +1427,8 @@ cpu_get_tsc_freq(struct cpu_info *ci) * known. This is required for Intel's Comet Lake and newer * processors to set LAPIC timer correctly. */ - if (ci->ci_data.cpu_cc_freq == 0) - freq = freq_from_cpuid = cpu_tsc_freq_cpuid(ci); + freq = freq_from_cpuid = cpu_tsc_freq_cpuid(ci); + if (freq != 0) aprint_debug_dev(ci->ci_dev, "TSC freq " "from CPUID %" PRIu64 " Hz\n", freq); diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index 52269505d36cc..b635c9fa5c0f1 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -69,6 +69,12 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) uint32_t descs[4]; uint32_t denominator, numerator; + if (vm_guest != VM_GUEST_NO) { + x86_cpuid(0x40000010, descs); + if (descs[0] > 0) + return descs[0] * 1000; /* TSC freq in khz */ + } + if (!((ci->ci_max_cpuid >= 0x15) && (cpu_vendor == CPUVENDOR_INTEL))) return 0; diff --git a/sys/arch/x86/x86/x86_machdep.c b/sys/arch/x86/x86/x86_machdep.c index 8a2364b6eccc7..a753c1088a46a 100644 --- a/sys/arch/x86/x86/x86_machdep.c +++ b/sys/arch/x86/x86/x86_machdep.c @@ -74,6 +74,8 @@ __KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.154 2023/10/04 20:28:06 ad Exp $") #include +#include + #include "tsc.h" #include "acpica.h" @@ -1528,10 +1530,14 @@ cpu_initclocks(void) * Re-calibrate TSC on boot CPU using most accurate time source, * thus making accurate TSC available for x86_initclock_func(). */ + addstage("before cpu_get_tsc_freq"); cpu_get_tsc_freq(curcpu()); + addstage("after cpu_get_tsc_freq"); + addstage("before x86_initclock_func"); /* Now start the clocks on this CPU (the boot CPU). */ (*x86_initclock_func)(); + addstage("after x86_initclock_func"); } int diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 42900357a1ceb..1574d5ff39256 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -232,6 +232,8 @@ extern void *_binary_splash_image_end; #include +#include + extern time_t rootfstime; #ifndef curlwp @@ -252,6 +254,43 @@ static void configure(void); static void configure2(void); static void configure3(void); void main(void); +static void howlong(void); + +extern uint32_t boottime_low; +extern uint32_t boottime_high; + +#define MAXSTAGES 256 +#define nitems(x) __arraycount(x) + +static volatile int nrecs = 0; + +struct bootstage { + const char *name; + uint64_t tsc; +} stages[MAXSTAGES]; + +static void +howlong(void) +{ + uint64_t entrytime = (uint64_t)boottime_high << 32 | boottime_low; + int i, limit; + + limit = MIN(nrecs, nitems(stages)); + for (i = 0; i < limit; i++) { + aprint_verbose("%s: %lums\n", stages[i].name, ((stages[i].tsc - entrytime) * 1000) / curcpu()->ci_data.cpu_cc_freq); + } +} + +void +addstage(const char *stage) +{ + if (nrecs < nitems(stages)) { + stages[nrecs].name = stage; + stages[nrecs].tsc = rdtsc(); + + atomic_add_int(&nrecs, 1); + } +} /* * System startup; initialize the world, create process 0, mount root @@ -272,6 +311,7 @@ main(void) CPU_INFO_ITERATOR cii; struct cpu_info *ci; + addstage("start"); #ifdef DIAGNOSTIC /* * Verify that CPU_INFO_FOREACH() knows about the boot CPU @@ -296,6 +336,7 @@ main(void) * in case of early panic or other messages. */ consinit(); + addstage("consinit"); #ifdef CNMAGIC cn_set_magic(CNMAGIC); #endif @@ -327,6 +368,7 @@ main(void) percpu_init(); + addstage("percpu_init"); /* Initialize radix trees (used by numerous subsystems). */ radix_tree_init(); @@ -365,6 +407,7 @@ main(void) module_init(); module_hook_init(); + addstage("module_init"); /* * Initialize the kernel authorization subsystem and start the * default security model, if any. We need to do this early @@ -436,6 +479,8 @@ main(void) sched_init(); + addstage("sched_init"); + /* Initialize processor-sets */ psets_init(); @@ -545,6 +590,7 @@ main(void) #ifdef __HAVE_LEGACY_INTRCNT evcnt_attach_legacy_intrcnt(); #endif + addstage("configure"); /* Enable deferred processing of RNG samples */ rnd_init_softint(); @@ -557,6 +603,7 @@ main(void) * system heartbeat on all CPUs. */ heartbeat_start(); + addstage("heartbeat_start"); ssp_init(); @@ -564,7 +611,9 @@ main(void) mm_init(); + addstage("before configure2"); configure2(); + addstage("configure2"); /* Initialize the rest of ipi(9) after CPUs have been detected. */ ipi_percpu_init(); @@ -581,6 +630,7 @@ main(void) /* Initialize exec structures */ exec_init(1); /* seminit calls exithook_establish() */ + addstage("exec_init"); #if NVERIEXEC > 0 /* @@ -590,6 +640,7 @@ main(void) #endif /* NVERIEXEC > 0 */ pax_init(); + addstage("pax_init"); #ifdef IPSEC /* Attach network crypto subsystem */ @@ -633,6 +684,7 @@ main(void) procinit_sysctl(); scdebug_init(); + addstage("scdebug_init"); /* * Create process 1 (init(8)). We do this now, as Unix has @@ -645,6 +697,7 @@ main(void) */ if (fork1(l, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL)) panic("fork init"); + addstage("fork"); /* * The initproc variable cannot be initialized in start_init as there @@ -653,6 +706,7 @@ main(void) mutex_enter(&proc_lock); initproc = proc_find_raw(1); mutex_exit(&proc_lock); + addstage("initproc"); /* * Load any remaining builtin modules, and hand back temporary @@ -676,6 +730,7 @@ main(void) * the root and dump devices. */ cpu_rootconf(); + addstage("cpu_rootconf"); cpu_dumpconf(); /* Mount the root file system. */ @@ -698,6 +753,7 @@ main(void) * don't have a non-volatile time-of-day device. */ inittodr(rootfstime); + addstage("inittodr"); /* * Now can look at time, having had a chance to verify the time @@ -736,9 +792,14 @@ main(void) NULL, NULL, "ioflush")) panic("fork syncer"); + addstage("uvm_swap_init"); + /* Wait for final configure threads to complete. */ config_finalize_mountroot(); + addstage("config_finalize_mountroot"); + + howlong(); /* * Okay, now we can let init(8) exec! It's off to userland! */ @@ -803,7 +864,9 @@ configure2(void) * Now that we've found all the hardware, start the real time * and statistics clocks. */ + addstage("before initclocks"); initclocks(); + addstage("after initclocks"); cold = 0; /* clocks are running, we're warm now! */ s = splsched(); diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 5ee8a7998b07f..a307caff35ca7 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -1139,7 +1139,7 @@ config_search_internal(device_t parent, void *aux, const struct cfargs_internal * const args) { struct cftable *ct; - cfdata_t cf; + cfdata_t cf = NULL; struct matchinfo m; KASSERT(config_initialized); @@ -1197,6 +1197,7 @@ config_search_internal(device_t parent, void *aux, } } rnd_add_uint32(&rnd_autoconf_source, 0); + return m.match; } @@ -1853,6 +1854,7 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, device_register_post_config(dev, aux); rnd_add_uint32(&rnd_autoconf_source, 0); + aprint_verbose("\n>>> attach %lums\n", (rdtsc() * 1000) / 3850000000); return dev; } From 1e0b369c764f4c97f6247fc562dcb778544a219c Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 08:10:45 +0100 Subject: [PATCH 067/114] fix: add sys/tstages.h --- sys/sys/tstages.h | 1 + 1 file changed, 1 insertion(+) create mode 100644 sys/sys/tstages.h diff --git a/sys/sys/tstages.h b/sys/sys/tstages.h new file mode 100644 index 0000000000000..d16a3f5ac9740 --- /dev/null +++ b/sys/sys/tstages.h @@ -0,0 +1 @@ +void addstage(const char *); From e37dc7fcc27784fc0c12a83181fdffb40a5ce227 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 12:08:56 +0100 Subject: [PATCH 068/114] fix: correctly detect all childs --- sys/dev/pv/pvbus.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c index f1101d623051a..296f71b0b251c 100644 --- a/sys/dev/pv/pvbus.c +++ b/sys/dev/pv/pvbus.c @@ -48,6 +48,14 @@ struct x86_bus_dma_tag pvbus_bus_dma_tag = { ._may_bounce = NULL, }; +static int pv_match(device_t, cfdata_t, void *); +static void pv_attach(device_t, device_t, void *); +static int pv_submatch(device_t, cfdata_t, const int *, void *); +static int pv_print(void *, const char *); + +CFATTACH_DECL_NEW(pv, sizeof(struct pv_softc), + pv_match, pv_attach, NULL, NULL); + static int pv_match(device_t parent, cfdata_t match, void *aux) { @@ -55,7 +63,8 @@ pv_match(device_t parent, cfdata_t match, void *aux) } static void -pv_attach(device_t parent, device_t self, void *aux) { +pv_attach(device_t parent, device_t self, void *aux) +{ struct pv_attach_args pvaa; pvaa.pvaa_memt = x86_bus_space_mem; @@ -64,8 +73,26 @@ pv_attach(device_t parent, device_t self, void *aux) { aprint_naive("\n"); aprint_normal("\n"); - config_found(self, &pvaa, NULL, CFARGS_NONE); + config_found(self, &pvaa, NULL, CFARGS(.search = pv_submatch)); } -CFATTACH_DECL_NEW(pv, sizeof(struct pv_softc), - pv_match, pv_attach, NULL, NULL); +static int +pv_submatch(device_t parent, cfdata_t cf, const int *ldesc, void *aux) +{ + struct pv_attach_args *pvaa = aux; + + if (config_probe(parent, cf, pvaa)) { + config_attach(parent, cf, pvaa, pv_print, CFARGS_NONE); + return 0; + } + return 0; +} + +static int +pv_print(void *aux, const char *pnp) +{ + + if (pnp != NULL) + printf("pv at %s", pnp); + return (UNCONF); +} From 16e6bb15008d80ca2cdbc5a6a6e0226b55699b0f Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 12:09:29 +0100 Subject: [PATCH 069/114] fix: increase cmdline length --- sys/dev/virtio/virtio_mmio_cmdline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/dev/virtio/virtio_mmio_cmdline.c b/sys/dev/virtio/virtio_mmio_cmdline.c index ec2fadc806634..d73ce80e41298 100644 --- a/sys/dev/virtio/virtio_mmio_cmdline.c +++ b/sys/dev/virtio/virtio_mmio_cmdline.c @@ -180,7 +180,7 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) struct virtio_mmio_cmdline_softc *sc = device_private(self); struct pv_attach_args *pvaa = aux; struct mmio_args *margs = &sc->margs; - char *v, *n, cmdline[128]; + char *v, *n, cmdline[LINE_MAX]; int error; static char *p = NULL; static int idx = 0; @@ -227,6 +227,7 @@ virtio_mmio_cmdline_attach(device_t parent, device_t self, void *aux) idx++; config_found(parent, pvaa, NULL, CFARGS_NONE); } + return; } } From 245d160ca7112237138c276c604d786771df6fca Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 12:10:43 +0100 Subject: [PATCH 070/114] fix: don't tc_init if mc146818_read fails --- sys/arch/x86/isa/clock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/arch/x86/isa/clock.c b/sys/arch/x86/isa/clock.c index 8305818c10993..4c9445227f419 100644 --- a/sys/arch/x86/isa/clock.c +++ b/sys/arch/x86/isa/clock.c @@ -335,6 +335,7 @@ startrtclock(void) char bits[128]; snprintb(bits, sizeof(bits), NVRAM_DIAG_BITS, s); printf("RTC BIOS diagnostic error %s\n", bits); + return; } tc_init(&i8254_timecounter); From 840e95288df6b80b48445d9c880737f1fe5cb022 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 12:11:27 +0100 Subject: [PATCH 071/114] feat: add pvclock (again) --- sys/dev/pv/files.pv | 4 + sys/dev/pv/pvclock.c | 217 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 sys/dev/pv/pvclock.c diff --git a/sys/dev/pv/files.pv b/sys/dev/pv/files.pv index 1775b9edf5af5..b247f5f5478ab 100644 --- a/sys/dev/pv/files.pv +++ b/sys/dev/pv/files.pv @@ -6,3 +6,7 @@ file dev/pv/pvbus.c pvbus needs-flag attach virtio at pv with mmio_cmdline: virtio_mmio file dev/virtio/virtio_mmio_cmdline.c mmio_cmdline + +device pvclock +attach pvclock at pv +file dev/pv/pvclock.c pvclock diff --git a/sys/dev/pv/pvclock.c b/sys/dev/pv/pvclock.c new file mode 100644 index 0000000000000..fadb5d2dae693 --- /dev/null +++ b/sys/dev/pv/pvclock.c @@ -0,0 +1,217 @@ +/* $OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $ */ + +/* + * Copyright (c) 2018 Reyk Floeter + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if !defined(__i386__) && !defined(__amd64__) +#error pvclock(4) is only supported on i386 and amd64 +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +uint pvclock_lastcount; + +struct pvclock_softc { + device_t sc_dev; + void *sc_time; + paddr_t sc_paddr; + struct timecounter *sc_tc; +}; + +static int pvclock_match(device_t, cfdata_t, void *); +static void pvclock_attach(device_t, device_t, void *); + +/* +void pvclock_read_time_info(struct pvclock_softc *, + struct pvclock_time_info *); +*/ +static inline uint32_t + pvclock_read_begin(const struct pvclock_time_info *); +static inline int + pvclock_read_done(const struct pvclock_time_info *, uint32_t); +static uint + pvclock_get_timecount(struct timecounter *); + +struct timecounter pvclock_timecounter = { + .tc_get_timecount = pvclock_get_timecount, + .tc_counter_mask = ~0u, + .tc_frequency = 0, + .tc_name = NULL, + .tc_quality = -2000, + .tc_priv = NULL, +}; + +CFATTACH_DECL_NEW(pvclock, sizeof(struct pvclock_softc), + pvclock_match, pvclock_attach, NULL, NULL); + + +static int +pvclock_match(device_t parent, cfdata_t cf, void *aux) +{ + u_int regs[6]; + /* + * pvclock is provided by different hypervisors, we currently + * only support the "kvmclock". + */ + x86_cpuid(0x40000000 + CPUID_OFFSET_KVM_FEATURES, regs); + /* + * We only implement support for the 2nd version of pvclock. + * The first version is basically the same but with different + * non-standard MSRs and it is deprecated. + */ + if ((regs[0] & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) + return (0); + + /* + * Only the "stable" clock with a sync'ed TSC is supported. + * In this case the host guarantees that the TSC is constant + * and invariant, either by the underlying TSC or by passing + * on a synchronized value. + */ + if ((regs[0] & + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) + return (0); + return (1); +} + +static inline uint32_t +pvclock_read_begin(const struct pvclock_time_info *ti) +{ + uint32_t ti_version = ti->ti_version & ~0x1; + virtio_membar_sync(); + return (ti_version); +} + +static inline int +pvclock_read_done(const struct pvclock_time_info *ti, + uint32_t ti_version) +{ + virtio_membar_sync(); + return (ti->ti_version == ti_version); +} + +static uint +pvclock_get_timecount(struct timecounter *tc) +{ + struct pvclock_softc *sc = tc->tc_priv; + struct pvclock_time_info *ti; + uint64_t tsc_timestamp, system_time, delta, ctr; + uint32_t ti_version, mul_frac; + int8_t shift; + uint8_t flags; + + ti = sc->sc_time; + do { + ti_version = pvclock_read_begin(ti); + system_time = ti->ti_system_time; + tsc_timestamp = ti->ti_tsc_timestamp; + mul_frac = ti->ti_tsc_to_system_mul; + shift = ti->ti_tsc_shift; + flags = ti->ti_flags; + } while (!pvclock_read_done(ti, ti_version)); + + /* + * The algorithm is described in + * linux/Documentation/virtual/kvm/msr.txt + */ + delta = rdtsc() - tsc_timestamp; + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + ctr = ((delta * mul_frac) >> 32) + system_time; + + if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) + return (ctr); + + if (ctr < pvclock_lastcount) + return (pvclock_lastcount); + + atomic_swap_uint(&pvclock_lastcount, ctr); + + return (ctr); +} + +static void +pvclock_attach(device_t parent, device_t self, void *aux) +{ + struct pvclock_softc *sc = device_private(self); + struct pvclock_time_info *ti; + paddr_t pa; + uint32_t ti_version; + uint8_t flags; + + aprint_naive("\n"); + aprint_normal("\n"); + + if ((sc->sc_time = (void *)uvm_km_alloc(kernel_map, + PAGE_SIZE, PAGE_SIZE, + UVM_KMF_WIRED | UVM_KMF_ZERO)) == NULL) { + aprint_error("time page allocation failed\n"); + return; + } + + if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { + aprint_error("time page PA extraction failed\n"); + uvm_km_free(kernel_map, (vaddr_t)sc->sc_time, + PAGE_SIZE, UVM_KMF_WIRED); + sc->sc_time = NULL; + return; + } + + wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); + sc->sc_paddr = pa; + + sc->sc_dev = self; + + ti = sc->sc_time; + do { + ti_version = pvclock_read_begin(ti); + flags = ti->ti_flags; + } while (!pvclock_read_done(ti, ti_version)); + + sc->sc_tc = &pvclock_timecounter; + sc->sc_tc->tc_name = device_xname(sc->sc_dev); + + sc->sc_tc->tc_frequency = 1000000000ULL; + sc->sc_tc->tc_priv = sc; + + pvclock_lastcount = 0; + + /* Better than HPET but below TSC */ + sc->sc_tc->tc_quality = 1500; + + if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { + /* if tsc is not stable, set a lower priority */ + /* Better than i8254 but below HPET */ + sc->sc_tc->tc_quality = 500; + } + + tc_init(sc->sc_tc); +} From 20a11666f5485412d4c4e0655b3236ed865ea210 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 12:21:54 +0100 Subject: [PATCH 072/114] fix: useless print --- sys/dev/pv/pvbus.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c index 296f71b0b251c..62c1ecc1d1fcc 100644 --- a/sys/dev/pv/pvbus.c +++ b/sys/dev/pv/pvbus.c @@ -51,7 +51,6 @@ struct x86_bus_dma_tag pvbus_bus_dma_tag = { static int pv_match(device_t, cfdata_t, void *); static void pv_attach(device_t, device_t, void *); static int pv_submatch(device_t, cfdata_t, const int *, void *); -static int pv_print(void *, const char *); CFATTACH_DECL_NEW(pv, sizeof(struct pv_softc), pv_match, pv_attach, NULL, NULL); @@ -82,17 +81,8 @@ pv_submatch(device_t parent, cfdata_t cf, const int *ldesc, void *aux) struct pv_attach_args *pvaa = aux; if (config_probe(parent, cf, pvaa)) { - config_attach(parent, cf, pvaa, pv_print, CFARGS_NONE); + config_attach(parent, cf, pvaa, NULL, CFARGS_NONE); return 0; } return 0; } - -static int -pv_print(void *aux, const char *pnp) -{ - - if (pnp != NULL) - printf("pv at %s", pnp); - return (UNCONF); -} From bdd66d2b5003916cbf19dc7da6b3f27a49441dfc Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 15 Jan 2024 18:57:36 +0100 Subject: [PATCH 073/114] feat: don't use lapic timecounter on GENPVH --- sys/arch/x86/x86/lapic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c index 0add792d5a557..4c81fa9c98147 100644 --- a/sys/arch/x86/x86/lapic.c +++ b/sys/arch/x86/x86/lapic.c @@ -50,6 +50,8 @@ __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.89 2022/09/07 00:40:19 knakahara Exp $") #include #include +#include + #include #include @@ -601,8 +603,8 @@ lapic_reset(void) static void lapic_initclock(void) { - - if (curcpu() == &cpu_info_primary) { + /* don't use lapic timecounter with VM_GUEST_GENPVH */ + if (curcpu() == &cpu_info_primary && vm_guest != VM_GUEST_GENPVH) { /* * Recalibrate the timer using the cycle counter, now that * the cycle counter itself has been recalibrated. From 6eef561ee2cfc480a881ebddfe6a7aa70b0a72e1 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 16 Jan 2024 06:22:42 +0100 Subject: [PATCH 074/114] fix: get TSC from CPU brand --- sys/arch/x86/x86/identcpu_subr.c | 68 ++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index b635c9fa5c0f1..de535e4f600db 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -43,6 +43,8 @@ __KERNEL_RCSID(0, "$NetBSD: identcpu_subr.c,v 1.9 2021/10/07 13:04:18 msaitoh Ex #include +#include + #ifdef _KERNEL #include #include @@ -62,6 +64,66 @@ __KERNEL_RCSID(0, "$NetBSD: identcpu_subr.c,v 1.9 2021/10/07 13:04:18 msaitoh Ex #include "cpuctl_i386.h" #endif +static uint64_t +cpu_tsc_freq_intel_brand(struct cpu_info *ci) +{ + char brand[48]; + u_int regs[4]; + uint64_t freq; + char *p; + u_int i; + + /* + * Intel Processor Identification and the CPUID Instruction + * Application Note 485. + * http://www.intel.com/assets/pdf/appnote/241618.pdf + */ + if (ci->ci_max_ext_cpuid >= 0x80000004) { + p = brand; + for (i = 0x80000002; i < 0x80000005; i++) { + x86_cpuid(i, regs); + memcpy(p, regs, sizeof(regs)); + p += sizeof(regs); + } + p = NULL; + for (i = 0; i < sizeof(brand) - 1; i++) + if (brand[i] == 'H' && brand[i + 1] == 'z') + p = brand + i; + if (p != NULL) { + p -= 5; + switch (p[4]) { + case 'M': + i = 1; + break; + case 'G': + i = 1000; + break; + case 'T': + i = 1000000; + break; + default: + return 0; + } +#define C2D(c) ((c) - '0') + if (p[1] == '.') { + freq = C2D(p[0]) * 1000; + freq += C2D(p[2]) * 100; + freq += C2D(p[3]) * 10; + freq *= i * 1000; + } else { + freq = C2D(p[0]) * 1000; + freq += C2D(p[1]) * 100; + freq += C2D(p[2]) * 10; + freq += C2D(p[3]); + freq *= i * 1000000; + } +#undef C2D + return freq; + } + } + return 0; +} + uint64_t cpu_tsc_freq_cpuid(struct cpu_info *ci) { @@ -69,6 +131,7 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) uint32_t descs[4]; uint32_t denominator, numerator; + addstage("in cpu_tsc_freq_cpuid"); if (vm_guest != VM_GUEST_NO) { x86_cpuid(0x40000010, descs); if (descs[0] > 0) @@ -145,9 +208,14 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) } #endif } + /* still no luck, get the frequency from brand */ + if (freq == 0) + freq = cpu_tsc_freq_intel_brand(ci); + if (freq != 0) aprint_verbose_dev(ci->ci_dev, "TSC freq CPUID %" PRIu64 " Hz\n", freq); + addstage("out cpu_tsc_freq_cpuid"); return freq; } From 77d6ff63e20a7a4442bb0ed0d784101484834756 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 16 Jan 2024 06:27:11 +0100 Subject: [PATCH 075/114] fix: no need for tracing --- sys/arch/x86/x86/identcpu_subr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index de535e4f600db..bc43986d209b0 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -43,8 +43,6 @@ __KERNEL_RCSID(0, "$NetBSD: identcpu_subr.c,v 1.9 2021/10/07 13:04:18 msaitoh Ex #include -#include - #ifdef _KERNEL #include #include @@ -131,7 +129,6 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) uint32_t descs[4]; uint32_t denominator, numerator; - addstage("in cpu_tsc_freq_cpuid"); if (vm_guest != VM_GUEST_NO) { x86_cpuid(0x40000010, descs); if (descs[0] > 0) @@ -215,7 +212,6 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) if (freq != 0) aprint_verbose_dev(ci->ci_dev, "TSC freq CPUID %" PRIu64 " Hz\n", freq); - addstage("out cpu_tsc_freq_cpuid"); return freq; } From 644408a8c75bb08342abf869adbecb7d938cb324 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 16 Jan 2024 08:49:53 +0100 Subject: [PATCH 076/114] fix: we don't use lapic timer --- sys/arch/x86/x86/cpu.c | 8 ++++++-- sys/arch/x86/x86/lapic.c | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 2bb03bde427d2..06633310d659d 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -86,6 +86,8 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.209 2023/07/16 19:55:43 riastradh Exp $"); #include #include +#include + #include #include "acpica.h" /* for NACPICA, for mp_verbose */ @@ -351,6 +353,7 @@ cpu_attach(device_t parent, device_t self, void *aux) sc->sc_dev = self; + addstage("in cpu_attach"); if (ncpu > maxcpus) { #ifndef _LP64 aprint_error(": too many CPUs, please use NetBSD/amd64\n"); @@ -436,7 +439,6 @@ cpu_attach(device_t parent, device_t self, void *aux) #ifdef SVS cpu_svs_init(ci); #endif - pmap_reference(pmap_kernel()); ci->ci_pmap = pmap_kernel(); ci->ci_tlbstate = TLBSTATE_STALE; @@ -467,7 +469,7 @@ cpu_attach(device_t parent, device_t self, void *aux) /* Enable lapic. */ lapic_enable(); lapic_set_lvt(); - if (!vm_guest_is_xenpvh_or_pvhvm()) + if (!vm_guest_is_xenpvh_or_pvhvm() && vm_guest != VM_GUEST_GENPVH) lapic_calibrate_timer(false); } #endif @@ -475,6 +477,7 @@ cpu_attach(device_t parent, device_t self, void *aux) again = true; } + /* further PCB init done later. */ switch (caa->cpu_role) { @@ -550,6 +553,7 @@ cpu_attach(device_t parent, device_t self, void *aux) } #endif + addstage("out cpu_attach"); /* * Postpone the "cpufeaturebus" scan. * It is safe to scan the pseudo-bus diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c index 4c81fa9c98147..08864892857ce 100644 --- a/sys/arch/x86/x86/lapic.c +++ b/sys/arch/x86/x86/lapic.c @@ -603,7 +603,10 @@ lapic_reset(void) static void lapic_initclock(void) { - /* don't use lapic timecounter with VM_GUEST_GENPVH */ + /* + * don't use lapic timecounter with VM_GUEST_GENPVH + * XXX use pvclock + */ if (curcpu() == &cpu_info_primary && vm_guest != VM_GUEST_GENPVH) { /* * Recalibrate the timer using the cycle counter, now that From bb7c2f345f1c5f68c181a72689a31c3b8e9fd6b4 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 16 Jan 2024 10:52:30 +0100 Subject: [PATCH 077/114] fix: avoid delay, wait for char --- sys/dev/ic/com.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sys/dev/ic/com.c b/sys/dev/ic/com.c index d271b5943469c..6f03af7885f76 100644 --- a/sys/dev/ic/com.c +++ b/sys/dev/ic/com.c @@ -122,6 +122,8 @@ __KERNEL_RCSID(0, "$NetBSD: com.c,v 1.384 2023/04/11 13:01:41 riastradh Exp $"); #include +#include + #include #include @@ -540,6 +542,7 @@ com_attach_subr(struct com_softc *sc) prop_dictionary_t dict; bool is_console = true; bool force_console = false; + dev_t dev; aprint_naive("\n"); @@ -551,6 +554,8 @@ com_attach_subr(struct com_softc *sc) callout_setfunc(&sc->sc_poll_callout, com_intr_poll, sc); mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_HIGH); + dev = device_unit(sc->sc_dev); + #if defined(COM_16650) sc->sc_type = COM_TYPE_16650; #elif defined(COM_16750) @@ -589,8 +594,9 @@ com_attach_subr(struct com_softc *sc) break; } + /* Wait for any pending character */ + com_common_putc(dev, &comcons_info.regs, 0, 0); /* Make sure the console is always "hardwired". */ - delay(10000); /* wait for output to finish */ if (is_console) { SET(sc->sc_hwflags, COM_HW_CONSOLE); } @@ -598,6 +604,7 @@ com_attach_subr(struct com_softc *sc) SET(sc->sc_swflags, TIOCFLAG_SOFTCAR); } + /* Probe for FIFO */ switch (sc->sc_type) { case COM_TYPE_HAYESP: From b0872120ab0b194869e4b8a2b927f0c4f847464c Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 16 Jan 2024 18:15:52 +0100 Subject: [PATCH 078/114] fix: introduced hv_type for vmms using KVM --- sys/arch/x86/x86/identcpu.c | 71 +++++++++++++++++++------------------ sys/arch/x86/x86/lapic.c | 6 ++-- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index 9856cf9fb15e1..8bfdd5986fc01 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -1029,7 +1029,8 @@ cpu_identify(struct cpu_info *ci) /* * Hypervisor */ -vm_guest_t vm_guest = VM_GUEST_NO; +vm_guest_t vm_guest = VM_GUEST_NO; +vm_guest_t hv_type = VM_GUEST_NO; struct vm_name_guest { const char *name; @@ -1055,6 +1056,16 @@ static const struct vm_name_guest vm_system_products[] = { { "KVM", VM_GUEST_VM }, /* KVM */ }; + +static inline void +real_hypervisor(vm_guest_t hv) +{ + if (vm_guest != VM_GUEST_GENPVH) + vm_guest = hv; + else + hv_type = hv; +} + void identify_hypervisor(void) { @@ -1064,49 +1075,41 @@ identify_hypervisor(void) int i; switch (vm_guest) { + /* guest type already known, no bios info */ case VM_GUEST_XENPV: case VM_GUEST_XENPVH: - case VM_GUEST_GENPVH: - /* guest type already known, no bios info */ return; default: break; } - /* - * [RFC] CPUID usage for interaction between Hypervisors and Linux. - * http://lkml.org/lkml/2008/10/1/246 - * - * KB1009458: Mechanisms to determine if software is running in - * a VMware virtual machine - * http://kb.vmware.com/kb/1009458 - */ - if (ISSET(cpu_feature[1], CPUID2_RAZ)) { - vm_guest = VM_GUEST_VM; - x86_cpuid(0x40000000, regs); - if (regs[0] >= 0x40000000) { - memcpy(&hv_vendor[0], ®s[1], sizeof(*regs)); - memcpy(&hv_vendor[4], ®s[2], sizeof(*regs)); - memcpy(&hv_vendor[8], ®s[3], sizeof(*regs)); - if (memcmp(hv_vendor, "VMwareVMware", 12) == 0) - vm_guest = VM_GUEST_VMWARE; - else if (memcmp(hv_vendor, "Microsoft Hv", 12) == 0) { - vm_guest = VM_GUEST_HV; + x86_cpuid(0x40000000, regs); + if (regs[0] >= 0x40000000) { + memcpy(&hv_vendor[0], ®s[1], sizeof(*regs)); + memcpy(&hv_vendor[4], ®s[2], sizeof(*regs)); + memcpy(&hv_vendor[8], ®s[3], sizeof(*regs)); + if (memcmp(hv_vendor, "VMwareVMware", 12) == 0) + vm_guest = VM_GUEST_VMWARE; + else if (memcmp(hv_vendor, "Microsoft Hv", 12) == 0) { #if NHYPERV > 0 - hyperv_early_init(); + hyperv_early_init(); #endif - } else if (memcmp(hv_vendor, "KVMKVMKVM\0\0\0", 12) == 0) - vm_guest = VM_GUEST_KVM; - else if (memcmp(hv_vendor, "XenVMMXenVMM", 12) == 0) - vm_guest = VM_GUEST_XENHVM; - /* FreeBSD bhyve: "bhyve bhyve " */ - /* OpenBSD vmm: "OpenBSDVMM58" */ - /* NetBSD nvmm: "___ NVMM ___" */ - } - // VirtualBox returns KVM, so keep going. - if (vm_guest != VM_GUEST_KVM) - return; + vm_guest = VM_GUEST_HV; + } else if (memcmp(hv_vendor, "KVMKVMKVM\0\0\0", 12) == 0) + /* + * The virtual machine manager (qemu, Firecracker...) + * may run KVM as the hypervisor + */ + real_hypervisor(VM_GUEST_KVM); + else if (memcmp(hv_vendor, "XenVMMXenVMM", 12) == 0) + vm_guest = VM_GUEST_XENHVM; + /* FreeBSD bhyve: "bhyve bhyve " */ + /* OpenBSD vmm: "OpenBSDVMM58" */ + /* NetBSD nvmm: "___ NVMM ___" */ } + // VirtualBox returns KVM, so keep going. + if (vm_guest != VM_GUEST_KVM) + return; /* * Examine SMBIOS strings for older hypervisors. diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c index 08864892857ce..c032f89e53229 100644 --- a/sys/arch/x86/x86/lapic.c +++ b/sys/arch/x86/x86/lapic.c @@ -604,10 +604,10 @@ static void lapic_initclock(void) { /* - * don't use lapic timecounter with VM_GUEST_GENPVH - * XXX use pvclock + * If the hypervisor is KVM, don't use lapic, instead + * use pvclock(4). */ - if (curcpu() == &cpu_info_primary && vm_guest != VM_GUEST_GENPVH) { + if (curcpu() == &cpu_info_primary && hv_type != VM_GUEST_KVM) { /* * Recalibrate the timer using the cycle counter, now that * the cycle counter itself has been recalibrated. From 799cf999460b1a8f3f508cdb9aa0f6e3cbd81134 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 16 Jan 2024 20:11:12 +0100 Subject: [PATCH 079/114] fix: s/low/high/ --- sys/arch/amd64/amd64/locore.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index e0e7997324796..e2c7ecb3da512 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -390,7 +390,7 @@ END(lwp0uarea) LABEL(boottime_low) .long 0 /* low part of rdtsc */ END(boottime_low) .type _C_LABEL(boottime_high), @object -LABEL(boottime_high) .long 0 /* low part of rdtsc */ +LABEL(boottime_high) .long 0 /* high part of rdtsc */ END(boottime_high) #ifndef XENPV From 6485c6d58b2ddffa6220c2784e13abba7ec8f547 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 17 Jan 2024 07:12:01 +0100 Subject: [PATCH 080/114] fix: introduce hv_type to avoid lapic when pvclock is available --- sys/arch/amd64/amd64/locore.S | 6 ++- sys/arch/x86/include/cpu.h | 2 + sys/arch/x86/x86/cpu.c | 7 ++- sys/arch/x86/x86/identcpu.c | 84 ++++++++++++++++++++++++----------- sys/dev/isa/com_isa.c | 2 + sys/dev/pv/pvclock.c | 4 +- sys/kern/init_main.c | 4 +- sys/kern/subr_autoconf.c | 4 +- 8 files changed, 79 insertions(+), 34 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index e2c7ecb3da512..0d1f6b184e0bd 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1087,13 +1087,15 @@ ENTRY(start_genpvh) push %ebx xorl %eax, %eax cpuid - cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */ + /* Check if we can call CPUID with eax=1 */ + cmpl $0x1, %eax jb .start_genpvh xorl %eax, %eax inc %eax cpuid shr $31, %ecx - testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */ + /* Check if bit 31 of ECX (hypervisor) is set */ + testb $1, %cl jz .start_genpvh xorl %eax, %eax inc %eax diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h index c82da8311574a..76b36efaa255f 100644 --- a/sys/arch/x86/include/cpu.h +++ b/sys/arch/x86/include/cpu.h @@ -517,9 +517,11 @@ typedef enum vm_guest { VM_GUEST_KVM, VM_GUEST_VIRTUALBOX, VM_GUEST_GENPVH, + VM_GUEST_NVMM, VM_LAST } vm_guest_t; extern vm_guest_t vm_guest; +extern vm_guest_t hv_type; static __inline bool __unused vm_guest_is_xenpv(void) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 06633310d659d..19c11b1deedc3 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -469,7 +469,12 @@ cpu_attach(device_t parent, device_t self, void *aux) /* Enable lapic. */ lapic_enable(); lapic_set_lvt(); - if (!vm_guest_is_xenpvh_or_pvhvm() && vm_guest != VM_GUEST_GENPVH) + /* + * If the hypervisor is KVM, don't use lapic, instead + * use pvclock(4). + */ + if (!vm_guest_is_xenpvh_or_pvhvm() && + hv_type != VM_GUEST_KVM) lapic_calibrate_timer(false); } #endif diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index 8bfdd5986fc01..b31bffa237e84 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -1045,7 +1045,6 @@ static const struct vm_name_guest vm_bios_vendors[] = { { "BHYVE", VM_GUEST_VM }, /* bhyve */ { "Seabios", VM_GUEST_VM }, /* KVM */ { "innotek GmbH", VM_GUEST_VIRTUALBOX }, /* Oracle VirtualBox */ - { "Generic PVH", VM_GUEST_GENPVH}, /* Generic PVH */ }; static const struct vm_name_guest vm_system_products[] = { @@ -1053,12 +1052,14 @@ static const struct vm_name_guest vm_system_products[] = { { "Virtual Machine", VM_GUEST_VM }, /* Microsoft VirtualPC */ { "VirtualBox", VM_GUEST_VIRTUALBOX }, /* Sun xVM VirtualBox */ { "Parallels Virtual Platform", VM_GUEST_VM }, /* Parallels VM */ - { "KVM", VM_GUEST_VM }, /* KVM */ + { "Generic PVH", VM_GUEST_GENPVH}, /* Generic PVH */ + { "KVM", VM_GUEST_KVM }, /* KVM */ + { "NVMM", VM_GUEST_NVMM} /* NVMM */ }; static inline void -real_hypervisor(vm_guest_t hv) +pvh_real_hypervisor(vm_guest_t hv) { if (vm_guest != VM_GUEST_GENPVH) vm_guest = hv; @@ -1073,43 +1074,72 @@ identify_hypervisor(void) char hv_vendor[12]; const char *p; int i; + bool is_vm = false; switch (vm_guest) { /* guest type already known, no bios info */ case VM_GUEST_XENPV: case VM_GUEST_XENPVH: return; + case VM_GUEST_GENPVH: + is_vm = true; default: break; } - x86_cpuid(0x40000000, regs); - if (regs[0] >= 0x40000000) { - memcpy(&hv_vendor[0], ®s[1], sizeof(*regs)); - memcpy(&hv_vendor[4], ®s[2], sizeof(*regs)); - memcpy(&hv_vendor[8], ®s[3], sizeof(*regs)); - if (memcmp(hv_vendor, "VMwareVMware", 12) == 0) - vm_guest = VM_GUEST_VMWARE; - else if (memcmp(hv_vendor, "Microsoft Hv", 12) == 0) { + /* + * [RFC] CPUID usage for interaction between Hypervisors and Linux. + * http://lkml.org/lkml/2008/10/1/246 + * + * KB1009458: Mechanisms to determine if software is running in + * a VMware virtual machine + * http://kb.vmware.com/kb/1009458 + * + * XXX: this test makes GENPVH hang at boot + */ + if (!is_vm && ISSET(cpu_feature[1], CPUID2_RAZ)) + is_vm = true; + + if (is_vm) { + x86_cpuid(0x40000000, regs); + if (regs[0] >= 0x40000000) { + memcpy(&hv_vendor[0], ®s[1], sizeof(*regs)); + memcpy(&hv_vendor[4], ®s[2], sizeof(*regs)); + memcpy(&hv_vendor[8], ®s[3], sizeof(*regs)); + if (memcmp(hv_vendor, "VMwareVMware", 12) == 0) + vm_guest = VM_GUEST_VMWARE; + else if (memcmp(hv_vendor, "Microsoft Hv", 12) == 0) { + vm_guest = VM_GUEST_HV; #if NHYPERV > 0 - hyperv_early_init(); + hyperv_early_init(); #endif - vm_guest = VM_GUEST_HV; - } else if (memcmp(hv_vendor, "KVMKVMKVM\0\0\0", 12) == 0) - /* - * The virtual machine manager (qemu, Firecracker...) - * may run KVM as the hypervisor - */ - real_hypervisor(VM_GUEST_KVM); - else if (memcmp(hv_vendor, "XenVMMXenVMM", 12) == 0) - vm_guest = VM_GUEST_XENHVM; - /* FreeBSD bhyve: "bhyve bhyve " */ - /* OpenBSD vmm: "OpenBSDVMM58" */ - /* NetBSD nvmm: "___ NVMM ___" */ + } else if (memcmp(hv_vendor, "KVMKVMKVM\0\0\0", 12) == 0) + /* + * The virtual machine manager (qemu, Firecracker...) + * may run KVM as the hypervisor + */ + pvh_real_hypervisor(VM_GUEST_KVM); + else if (memcmp(hv_vendor, "XenVMMXenVMM", 12) == 0) + vm_guest = VM_GUEST_XENHVM; + else if (memcmp(hv_vendor, "___ NVMM ___", 12) == 0) + pvh_real_hypervisor(VM_GUEST_NVMM); + /* FreeBSD bhyve: "bhyve bhyve " */ + /* OpenBSD vmm: "OpenBSDVMM58" */ + /* NetBSD nvmm: "___ NVMM ___" */ + } + + for (i = 0; i < __arraycount(vm_system_products); i++) { + if (vm_system_products[i].guest == vm_guest) + aprint_verbose("VMM: %s\n", + vm_system_products[i].name); + if (vm_system_products[i].guest == hv_type) + aprint_verbose("Hypervisor: %s\n", + vm_system_products[i].name); + } + // VirtualBox returns KVM, so keep going. + if (vm_guest != VM_GUEST_KVM) + return; } - // VirtualBox returns KVM, so keep going. - if (vm_guest != VM_GUEST_KVM) - return; /* * Examine SMBIOS strings for older hypervisors. diff --git a/sys/dev/isa/com_isa.c b/sys/dev/isa/com_isa.c index e3c14bce8796a..7b2fecd987370 100644 --- a/sys/dev/isa/com_isa.c +++ b/sys/dev/isa/com_isa.c @@ -78,6 +78,8 @@ __KERNEL_RCSID(0, "$NetBSD: com_isa.c,v 1.41 2018/12/08 17:46:13 thorpej Exp $") #include #include +#include + #include #include #ifdef COM_HAYESP diff --git a/sys/dev/pv/pvclock.c b/sys/dev/pv/pvclock.c index fadb5d2dae693..3967240caf2b9 100644 --- a/sys/dev/pv/pvclock.c +++ b/sys/dev/pv/pvclock.c @@ -104,7 +104,7 @@ static inline uint32_t pvclock_read_begin(const struct pvclock_time_info *ti) { uint32_t ti_version = ti->ti_version & ~0x1; - virtio_membar_sync(); + virtio_membar_consumer(); return (ti_version); } @@ -112,7 +112,7 @@ static inline int pvclock_read_done(const struct pvclock_time_info *ti, uint32_t ti_version) { - virtio_membar_sync(); + virtio_membar_consumer(); return (ti->ti_version == ti_version); } diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 1574d5ff39256..55f3eafa067e9 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -277,7 +277,9 @@ howlong(void) limit = MIN(nrecs, nitems(stages)); for (i = 0; i < limit; i++) { - aprint_verbose("%s: %lums\n", stages[i].name, ((stages[i].tsc - entrytime) * 1000) / curcpu()->ci_data.cpu_cc_freq); + printf("%s: %lums\n", stages[i].name, + ((stages[i].tsc - entrytime) * 1000) / + curcpu()->ci_data.cpu_cc_freq); } } diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index a307caff35ca7..059a681b148ab 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -115,6 +115,8 @@ __KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.314 2023/07/18 11:57:37 riastrad #include +#include + #include /* @@ -1854,7 +1856,7 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, device_register_post_config(dev, aux); rnd_add_uint32(&rnd_autoconf_source, 0); - aprint_verbose("\n>>> attach %lums\n", (rdtsc() * 1000) / 3850000000); + addstage(device_xname(dev)); return dev; } From e23b8c910caba6399a9c92f732506548f0974c13 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 17 Jan 2024 10:08:36 +0100 Subject: [PATCH 081/114] fix: NVMM fixed, reports freq un Mhz instead of khz --- sys/arch/x86/x86/cpu.c | 14 +++++++++----- sys/arch/x86/x86/identcpu.c | 4 ++-- sys/arch/x86/x86/identcpu_subr.c | 11 ++++++++--- sys/arch/x86/x86/lapic.c | 8 ++++++-- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 19c11b1deedc3..b508992487dc7 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -75,6 +75,8 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.209 2023/07/16 19:55:43 riastradh Exp $"); #include "acpica.h" #include "hpet.h" +#include "pvclock.h" + #include #include #include @@ -473,8 +475,11 @@ cpu_attach(device_t parent, device_t self, void *aux) * If the hypervisor is KVM, don't use lapic, instead * use pvclock(4). */ - if (!vm_guest_is_xenpvh_or_pvhvm() && - hv_type != VM_GUEST_KVM) + if (!vm_guest_is_xenpvh_or_pvhvm() +#if NPVCLOCK > 0 + && hv_type != VM_GUEST_KVM +#endif + ) lapic_calibrate_timer(false); } #endif @@ -1426,9 +1431,8 @@ cpu_get_tsc_freq(struct cpu_info *ci) uint64_t freq = 0, freq_from_cpuid, t0, t1; int64_t overhead; - if (CPU_IS_PRIMARY(ci) && cpu_hascounter()) { - if (ci->ci_data.cpu_cc_freq != 0) - return; + if (CPU_IS_PRIMARY(ci) && cpu_hascounter() && + ci->ci_data.cpu_cc_freq == 0) { /* * If it's the first call of this function, try to get TSC * freq from CPUID by calling cpu_tsc_freq_cpuid(). diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index b31bffa237e84..636fe0ae49a70 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -1062,7 +1062,7 @@ static inline void pvh_real_hypervisor(vm_guest_t hv) { if (vm_guest != VM_GUEST_GENPVH) - vm_guest = hv; + vm_guest = hv_type = hv; else hv_type = hv; } @@ -1081,6 +1081,7 @@ identify_hypervisor(void) case VM_GUEST_XENPV: case VM_GUEST_XENPVH: return; + /* continue for hypervisor detection */ case VM_GUEST_GENPVH: is_vm = true; default: @@ -1125,7 +1126,6 @@ identify_hypervisor(void) pvh_real_hypervisor(VM_GUEST_NVMM); /* FreeBSD bhyve: "bhyve bhyve " */ /* OpenBSD vmm: "OpenBSDVMM58" */ - /* NetBSD nvmm: "___ NVMM ___" */ } for (i = 0; i < __arraycount(vm_system_products); i++) { diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index bc43986d209b0..f798779703bab 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -129,10 +129,15 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) uint32_t descs[4]; uint32_t denominator, numerator; - if (vm_guest != VM_GUEST_NO) { + /* NVMM tsc report is wrong */ + if (ci->ci_max_ext_cpuid >= 0x40000010) { + int mul = 1000; /* TSC freq in khz... */ + if (hv_type == VM_GUEST_NVMM) + mul *= 1000; /* ...except for NVMM */ x86_cpuid(0x40000010, descs); - if (descs[0] > 0) - return descs[0] * 1000; /* TSC freq in khz */ + if (descs[0] > 0) { + return descs[0] * mul; + } } if (!((ci->ci_max_cpuid >= 0x15) && (cpu_vendor == CPUVENDOR_INTEL))) diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c index c032f89e53229..459c44836e4eb 100644 --- a/sys/arch/x86/x86/lapic.c +++ b/sys/arch/x86/x86/lapic.c @@ -42,7 +42,7 @@ __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.89 2022/09/07 00:40:19 knakahara Exp $") #include "opt_multiprocessor.h" #include "opt_ntp.h" #include "opt_xen.h" - +#include "pvclock.h" #include #include @@ -603,11 +603,15 @@ lapic_reset(void) static void lapic_initclock(void) { +#if NPVCLOCK > 0 /* * If the hypervisor is KVM, don't use lapic, instead * use pvclock(4). */ - if (curcpu() == &cpu_info_primary && hv_type != VM_GUEST_KVM) { + if (hv_type == VM_GUEST_KVM) + return; +#endif + if (curcpu() == &cpu_info_primary) { /* * Recalibrate the timer using the cycle counter, now that * the cycle counter itself has been recalibrated. From e5e3fee6734565ec653b365aed9b9354aef1eb96 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 17 Jan 2024 11:31:50 +0100 Subject: [PATCH 082/114] fix: back to return --- sys/arch/x86/x86/cpu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index b508992487dc7..8ceb4bf30c2de 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -1431,8 +1431,10 @@ cpu_get_tsc_freq(struct cpu_info *ci) uint64_t freq = 0, freq_from_cpuid, t0, t1; int64_t overhead; - if (CPU_IS_PRIMARY(ci) && cpu_hascounter() && - ci->ci_data.cpu_cc_freq == 0) { + if (ci->ci_data.cpu_cc_freq != 0) + return; + + if (CPU_IS_PRIMARY(ci) && cpu_hascounter()) { /* * If it's the first call of this function, try to get TSC * freq from CPUID by calling cpu_tsc_freq_cpuid(). From 53bc7bc3a21378308a3eeafe7af7f97e719892c6 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 17 Jan 2024 12:32:15 +0100 Subject: [PATCH 083/114] fix: cleanup and organize get tsc by cpuid --- sys/arch/x86/x86/identcpu_subr.c | 42 +++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index f798779703bab..ef2dfca34bde9 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -63,7 +63,7 @@ __KERNEL_RCSID(0, "$NetBSD: identcpu_subr.c,v 1.9 2021/10/07 13:04:18 msaitoh Ex #endif static uint64_t -cpu_tsc_freq_intel_brand(struct cpu_info *ci) +tsc_freq_intel_brand(struct cpu_info *ci) { char brand[48]; u_int regs[4]; @@ -116,31 +116,42 @@ cpu_tsc_freq_intel_brand(struct cpu_info *ci) freq *= i * 1000000; } #undef C2D + aprint_verbose( + "got tsc from cpu brand\n"); return freq; } } return 0; } -uint64_t -cpu_tsc_freq_cpuid(struct cpu_info *ci) +static uint64_t +tsc_freq_cpuid_vm(struct cpu_info *ci) { - uint64_t freq = 0, khz; uint32_t descs[4]; - uint32_t denominator, numerator; - /* NVMM tsc report is wrong */ if (ci->ci_max_ext_cpuid >= 0x40000010) { int mul = 1000; /* TSC freq in khz... */ if (hv_type == VM_GUEST_NVMM) mul *= 1000; /* ...except for NVMM */ x86_cpuid(0x40000010, descs); if (descs[0] > 0) { + aprint_verbose( + "got tsc from vmware compatible cpuid\n"); return descs[0] * mul; } } - if (!((ci->ci_max_cpuid >= 0x15) && (cpu_vendor == CPUVENDOR_INTEL))) + return 0; +} + +static uint64_t +tsc_freq_cpuid(struct cpu_info *ci) +{ + uint64_t freq = 0, khz; + uint32_t descs[4]; + uint32_t denominator, numerator; + + if (!(ci->ci_max_cpuid >= 0x15)) return 0; x86_cpuid(0x15, descs); @@ -210,9 +221,22 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) } #endif } + return freq; +} + +uint64_t +cpu_tsc_freq_cpuid(struct cpu_info *ci) +{ + uint64_t freq = 0; + + if (freq == 0 && cpu_vendor == CPUVENDOR_INTEL) + freq = tsc_freq_cpuid(ci); + /* vmware compatible tsc query */ + if (freq == 0 && vm_guest != VM_GUEST_NO) + freq = tsc_freq_cpuid_vm(ci); /* still no luck, get the frequency from brand */ - if (freq == 0) - freq = cpu_tsc_freq_intel_brand(ci); + if (freq == 0 && cpu_vendor == CPUVENDOR_INTEL) + freq = tsc_freq_intel_brand(ci); if (freq != 0) aprint_verbose_dev(ci->ci_dev, "TSC freq CPUID %" PRIu64 From 1134d9fcdfc0b81967c2a8802b2d2f0305adca88 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Wed, 17 Jan 2024 14:40:36 +0100 Subject: [PATCH 084/114] fix: cleanup cpu_tsc_freq_cpuid --- sys/arch/x86/x86/identcpu_subr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index ef2dfca34bde9..a3546d7f85bda 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -229,12 +229,12 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) { uint64_t freq = 0; - if (freq == 0 && cpu_vendor == CPUVENDOR_INTEL) + if (cpu_vendor == CPUVENDOR_INTEL) freq = tsc_freq_cpuid(ci); - /* vmware compatible tsc query */ + /* VMware compatible tsc query */ if (freq == 0 && vm_guest != VM_GUEST_NO) freq = tsc_freq_cpuid_vm(ci); - /* still no luck, get the frequency from brand */ + /* Still no luck, get the frequency from brand */ if (freq == 0 && cpu_vendor == CPUVENDOR_INTEL) freq = tsc_freq_intel_brand(ci); From 810119f52ee5648d896c1b559a0b1fc1a082f459 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 18 Jan 2024 13:10:07 +0100 Subject: [PATCH 085/114] feat: added cperciva@ TSLOG --- sys/kern/kern_tslog.c | 152 ++++++++++++++++++++++++++++++++++++++++++ sys/sys/tslog.h | 69 +++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 sys/kern/kern_tslog.c create mode 100644 sys/sys/tslog.h diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c new file mode 100644 index 0000000000000..473c3e05155c3 --- /dev/null +++ b/sys/kern/kern_tslog.c @@ -0,0 +1,152 @@ +/*- + * Copyright (c) 2017 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifndef TSLOGSIZE +#define TSLOGSIZE 262144 +#endif + +#define nitems(x) __arraycount(x) + +static volatile int nrecs = 0; +static struct timestamp { + lwpid_t lid; + int type; + const char * f; + const char * s; + uint64_t tsc; +} timestamps[TSLOGSIZE]; + +void tslog(const lwp_t *, int, const char *, const char *); +static int sysctl_debug_tslog(SYSCTLFN_PROTO); + +void +tslog(const lwp_t *l, int type, const char *f, const char *s) +{ + uint64_t tsc = rdtsc(); + + /* A NULL thread is lwp0 before curthread is set. */ + if (l == NULL) + l = &lwp0; + + /* Store record. */ + if (nrecs < nitems(timestamps)) { + timestamps[nrecs].lid = l->l_lid; + timestamps[nrecs].type = type; + timestamps[nrecs].f = f; + timestamps[nrecs].s = s; + timestamps[nrecs].tsc = tsc; + + /* Grab a slot. */ + atomic_add_int(&nrecs, 1); + } +} + +static int +sysctl_debug_tslog(SYSCTLFN_ARGS) +{ + char buf[LINE_MAX]; + char *where = oldp; + size_t buflen, slen, i, limit, needed = 0; + int error = 0; + static bool first = true; + static size_t max = 0; + + buflen = *oldlenp; + /* Add data logged within the kernel. */ + limit = MIN(nrecs, nitems(timestamps)); + for (i = 0; i < limit; i++) { + snprintf(buf, LINE_MAX, "0x%x %llu", + timestamps[i].lid, + (unsigned long long)timestamps[i].tsc); + switch (timestamps[i].type) { + case TS_ENTER: + strcat(buf, " ENTER"); + break; + case TS_EXIT: + strcat(buf, " EXIT"); + break; + case TS_THREAD: + strcat(buf, " THREAD"); + break; + case TS_EVENT: + strcat(buf, " EVENT"); + break; + } + snprintf(buf, LINE_MAX, "%s %s", buf, + timestamps[i].f ? timestamps[i].f : "(null)"); + if (timestamps[i].s) + snprintf(buf, LINE_MAX, "%s %s\n", buf, + timestamps[i].s); + else + strcat(buf, "\n"); + + slen = strlen(buf) + 1; + + if (!first) { + if (buflen < slen) { + /* still not enough space */ + first = true; + continue; + } + if (i > 0) + where--; /* overwrite last \0 */ + if ((error = copyout(buf, where, slen))) + break; + where += slen; + buflen -= slen; + } + + needed += slen; + } + first = false; + + if (needed > max) { + max = needed; + } + *oldlenp = max; + + return error; +} + +SYSCTL_SETUP(sysctl_tslog_setup, "tslog sysctl") +{ + sysctl_createv(NULL, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_STRING, "tslog", + SYSCTL_DESCR("Dump recorded event timestamps"), + sysctl_debug_tslog, 0, NULL, 0, + CTL_CREATE, CTL_EOL); +} diff --git a/sys/sys/tslog.h b/sys/sys/tslog.h new file mode 100644 index 0000000000000..701ea2fd0747b --- /dev/null +++ b/sys/sys/tslog.h @@ -0,0 +1,69 @@ +/*- + * Copyright (c) 2017 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TSLOG_H_ +#define _TSLOG_H_ + +#ifdef _KERNEL +#ifdef TSLOG +#include +#include +#endif + +#define TS_ENTER 0 +#define TS_EXIT 1 +#define TS_THREAD 2 +#define TS_EVENT 3 + +#define TSENTER() TSRAW(curlwp, TS_ENTER, __func__, NULL) +#define TSENTER2(x) TSRAW(curlwp, TS_ENTER, __func__, x) +#define TSEXIT() TSRAW(curlwp, TS_EXIT, __func__, NULL) +#define TSEXIT2(x) TSRAW(curlwp, TS_EXIT, __func__, x) +#define TSTHREAD(td, x) TSRAW(td, TS_THREAD, x, NULL) +#define TSEVENT(x) TSRAW(curlwp, TS_EVENT, x, NULL) +#define TSEVENT2(x, y) TSRAW(curlwp, TS_EVENT, x, y) +#define TSLINE() TSEVENT2(__FILE__, __XSTRING(__LINE__)) +#define TSWAIT(x) TSEVENT2("WAIT", x); +#define TSUNWAIT(x) TSEVENT2("UNWAIT", x); +#define TSHOLD(x) TSEVENT2("HOLD", x); +#define TSRELEASE(x) TSEVENT2("RELEASE", x); +#define TSFORK(p, pp) TSRAW_USER(p, pp, NULL, NULL) +#define TSEXEC(p, name) TSRAW_USER(p, (pid_t)(-1), name, NULL) +#define TSNAMEI(p, name) TSRAW_USER(p, (pid_t)(-1), NULL, name) +#define TSPROCEXIT(p) TSRAW_USER(p, (pid_t)(-1), NULL, NULL) + +#ifdef TSLOG +#define TSRAW(a, b, c, d) tslog(a, b, c, d) +void tslog(const lwp_t *, int, const char *, const char *); +#define TSRAW_USER(a, b, c, d) tslog_user(a, b, c, d) +void tslog_user(pid_t, pid_t, const char *, const char *); +#else +#define TSRAW(a, b, c, d) /* Timestamp logging disabled */ +#define TSRAW_USER(a, b, c, d) /* Timestamp logging disabled */ +#endif + +#endif /* _KERNEL */ +#endif /* _TSLOG_H_ */ From 70d01b242b5812703ad694c7b78f0345fec60dee Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 18 Jan 2024 20:44:13 +0100 Subject: [PATCH 086/114] feat: removed most of tstages, now to TSLOG --- sys/arch/x86/x86/cpu.c | 4 --- sys/arch/x86/x86/identcpu.c | 2 +- sys/arch/x86/x86/lapic.c | 2 -- sys/arch/x86/x86/x86_machdep.c | 6 ---- sys/dev/ic/com.c | 2 -- sys/dev/isa/com_isa.c | 2 -- sys/kern/files.kern | 1 + sys/kern/init_main.c | 58 +++------------------------------- sys/kern/kern_tslog.c | 1 - sys/kern/subr_autoconf.c | 6 ++-- 10 files changed, 11 insertions(+), 73 deletions(-) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 8ceb4bf30c2de..b78e2528a066e 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -88,8 +88,6 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.209 2023/07/16 19:55:43 riastradh Exp $"); #include #include -#include - #include #include "acpica.h" /* for NACPICA, for mp_verbose */ @@ -355,7 +353,6 @@ cpu_attach(device_t parent, device_t self, void *aux) sc->sc_dev = self; - addstage("in cpu_attach"); if (ncpu > maxcpus) { #ifndef _LP64 aprint_error(": too many CPUs, please use NetBSD/amd64\n"); @@ -563,7 +560,6 @@ cpu_attach(device_t parent, device_t self, void *aux) } #endif - addstage("out cpu_attach"); /* * Postpone the "cpufeaturebus" scan. * It is safe to scan the pseudo-bus diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c index 636fe0ae49a70..3ab8c5c3844ce 100644 --- a/sys/arch/x86/x86/identcpu.c +++ b/sys/arch/x86/x86/identcpu.c @@ -1052,8 +1052,8 @@ static const struct vm_name_guest vm_system_products[] = { { "Virtual Machine", VM_GUEST_VM }, /* Microsoft VirtualPC */ { "VirtualBox", VM_GUEST_VIRTUALBOX }, /* Sun xVM VirtualBox */ { "Parallels Virtual Platform", VM_GUEST_VM }, /* Parallels VM */ - { "Generic PVH", VM_GUEST_GENPVH}, /* Generic PVH */ { "KVM", VM_GUEST_KVM }, /* KVM */ + { "Generic PVH", VM_GUEST_GENPVH}, /* Generic PVH */ { "NVMM", VM_GUEST_NVMM} /* NVMM */ }; diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c index 459c44836e4eb..ea75591f20c82 100644 --- a/sys/arch/x86/x86/lapic.c +++ b/sys/arch/x86/x86/lapic.c @@ -50,8 +50,6 @@ __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.89 2022/09/07 00:40:19 knakahara Exp $") #include #include -#include - #include #include diff --git a/sys/arch/x86/x86/x86_machdep.c b/sys/arch/x86/x86/x86_machdep.c index a753c1088a46a..8a2364b6eccc7 100644 --- a/sys/arch/x86/x86/x86_machdep.c +++ b/sys/arch/x86/x86/x86_machdep.c @@ -74,8 +74,6 @@ __KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.154 2023/10/04 20:28:06 ad Exp $") #include -#include - #include "tsc.h" #include "acpica.h" @@ -1530,14 +1528,10 @@ cpu_initclocks(void) * Re-calibrate TSC on boot CPU using most accurate time source, * thus making accurate TSC available for x86_initclock_func(). */ - addstage("before cpu_get_tsc_freq"); cpu_get_tsc_freq(curcpu()); - addstage("after cpu_get_tsc_freq"); - addstage("before x86_initclock_func"); /* Now start the clocks on this CPU (the boot CPU). */ (*x86_initclock_func)(); - addstage("after x86_initclock_func"); } int diff --git a/sys/dev/ic/com.c b/sys/dev/ic/com.c index 6f03af7885f76..dd3ce6fe4973d 100644 --- a/sys/dev/ic/com.c +++ b/sys/dev/ic/com.c @@ -122,8 +122,6 @@ __KERNEL_RCSID(0, "$NetBSD: com.c,v 1.384 2023/04/11 13:01:41 riastradh Exp $"); #include -#include - #include #include diff --git a/sys/dev/isa/com_isa.c b/sys/dev/isa/com_isa.c index 7b2fecd987370..e3c14bce8796a 100644 --- a/sys/dev/isa/com_isa.c +++ b/sys/dev/isa/com_isa.c @@ -78,8 +78,6 @@ __KERNEL_RCSID(0, "$NetBSD: com_isa.c,v 1.41 2018/12/08 17:46:13 thorpej Exp $") #include #include -#include - #include #include #ifdef COM_HAYESP diff --git a/sys/kern/files.kern b/sys/kern/files.kern index bf9ea639b0184..af0e944a30937 100644 --- a/sys/kern/files.kern +++ b/sys/kern/files.kern @@ -96,6 +96,7 @@ file kern/kern_time.c kern file kern/kern_timeout.c kern file kern/kern_turnstile.c kern file kern/kern_todr.c kern +file kern/kern_tslog.c kern file kern/kern_uidinfo.c kern file kern/kern_uuid.c kern file kern/kgdb_stub.c kgdb diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 55f3eafa067e9..3fa12fa9cb829 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -232,7 +232,7 @@ extern void *_binary_splash_image_end; #include -#include +#include extern time_t rootfstime; @@ -259,39 +259,13 @@ static void howlong(void); extern uint32_t boottime_low; extern uint32_t boottime_high; -#define MAXSTAGES 256 -#define nitems(x) __arraycount(x) - -static volatile int nrecs = 0; - -struct bootstage { - const char *name; - uint64_t tsc; -} stages[MAXSTAGES]; - static void howlong(void) { uint64_t entrytime = (uint64_t)boottime_high << 32 | boottime_low; - int i, limit; - limit = MIN(nrecs, nitems(stages)); - for (i = 0; i < limit; i++) { - printf("%s: %lums\n", stages[i].name, - ((stages[i].tsc - entrytime) * 1000) / + printf("boot: %lums\n", ((rdtsc() - entrytime) * 1000) / curcpu()->ci_data.cpu_cc_freq); - } -} - -void -addstage(const char *stage) -{ - if (nrecs < nitems(stages)) { - stages[nrecs].name = stage; - stages[nrecs].tsc = rdtsc(); - - atomic_add_int(&nrecs, 1); - } } /* @@ -313,7 +287,7 @@ main(void) CPU_INFO_ITERATOR cii; struct cpu_info *ci; - addstage("start"); + TSENTER(); #ifdef DIAGNOSTIC /* * Verify that CPU_INFO_FOREACH() knows about the boot CPU @@ -338,7 +312,6 @@ main(void) * in case of early panic or other messages. */ consinit(); - addstage("consinit"); #ifdef CNMAGIC cn_set_magic(CNMAGIC); #endif @@ -369,8 +342,6 @@ main(void) kprintf_init(); percpu_init(); - - addstage("percpu_init"); /* Initialize radix trees (used by numerous subsystems). */ radix_tree_init(); @@ -409,7 +380,6 @@ main(void) module_init(); module_hook_init(); - addstage("module_init"); /* * Initialize the kernel authorization subsystem and start the * default security model, if any. We need to do this early @@ -481,8 +451,6 @@ main(void) sched_init(); - addstage("sched_init"); - /* Initialize processor-sets */ psets_init(); @@ -592,7 +560,6 @@ main(void) #ifdef __HAVE_LEGACY_INTRCNT evcnt_attach_legacy_intrcnt(); #endif - addstage("configure"); /* Enable deferred processing of RNG samples */ rnd_init_softint(); @@ -605,7 +572,6 @@ main(void) * system heartbeat on all CPUs. */ heartbeat_start(); - addstage("heartbeat_start"); ssp_init(); @@ -613,9 +579,7 @@ main(void) mm_init(); - addstage("before configure2"); configure2(); - addstage("configure2"); /* Initialize the rest of ipi(9) after CPUs have been detected. */ ipi_percpu_init(); @@ -632,7 +596,6 @@ main(void) /* Initialize exec structures */ exec_init(1); /* seminit calls exithook_establish() */ - addstage("exec_init"); #if NVERIEXEC > 0 /* @@ -642,7 +605,6 @@ main(void) #endif /* NVERIEXEC > 0 */ pax_init(); - addstage("pax_init"); #ifdef IPSEC /* Attach network crypto subsystem */ @@ -686,7 +648,6 @@ main(void) procinit_sysctl(); scdebug_init(); - addstage("scdebug_init"); /* * Create process 1 (init(8)). We do this now, as Unix has @@ -699,7 +660,6 @@ main(void) */ if (fork1(l, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL)) panic("fork init"); - addstage("fork"); /* * The initproc variable cannot be initialized in start_init as there @@ -708,7 +668,6 @@ main(void) mutex_enter(&proc_lock); initproc = proc_find_raw(1); mutex_exit(&proc_lock); - addstage("initproc"); /* * Load any remaining builtin modules, and hand back temporary @@ -732,7 +691,6 @@ main(void) * the root and dump devices. */ cpu_rootconf(); - addstage("cpu_rootconf"); cpu_dumpconf(); /* Mount the root file system. */ @@ -755,7 +713,6 @@ main(void) * don't have a non-volatile time-of-day device. */ inittodr(rootfstime); - addstage("inittodr"); /* * Now can look at time, having had a chance to verify the time @@ -794,14 +751,9 @@ main(void) NULL, NULL, "ioflush")) panic("fork syncer"); - addstage("uvm_swap_init"); - /* Wait for final configure threads to complete. */ config_finalize_mountroot(); - addstage("config_finalize_mountroot"); - - howlong(); /* * Okay, now we can let init(8) exec! It's off to userland! */ @@ -810,6 +762,8 @@ main(void) cv_broadcast(&lbolt); mutex_exit(&proc_lock); + TSEXIT(); + howlong(); /* The scheduler is an infinite loop. */ uvm_scheduler(); /* NOTREACHED */ @@ -866,9 +820,7 @@ configure2(void) * Now that we've found all the hardware, start the real time * and statistics clocks. */ - addstage("before initclocks"); initclocks(); - addstage("after initclocks"); cold = 0; /* clocks are running, we're warm now! */ s = splsched(); diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 473c3e05155c3..fbb2d12efa1e2 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -128,7 +128,6 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) where += slen; buflen -= slen; } - needed += slen; } first = false; diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 059a681b148ab..654a0579eb1e9 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -115,7 +115,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.314 2023/07/18 11:57:37 riastrad #include -#include +#include #include @@ -1761,6 +1761,8 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, KASSERT(KERNEL_LOCKED_P()); + TSENTER2(cf->cf_name); + dev = config_devalloc(parent, cf, args); if (!dev) panic("config_attach: allocation of device softc failed"); @@ -1856,7 +1858,7 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, device_register_post_config(dev, aux); rnd_add_uint32(&rnd_autoconf_source, 0); - addstage(device_xname(dev)); + TSEXIT2(cf->cf_name); return dev; } From b7cc1aeac7b2e2d8afa99a6e17bc1834e11f0c4a Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 18 Jan 2024 20:45:09 +0100 Subject: [PATCH 087/114] feat: horrible python script to add TSLOG to every main() function --- tsenable.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tsenable.py diff --git a/tsenable.py b/tsenable.py new file mode 100644 index 0000000000000..1c1046b88a331 --- /dev/null +++ b/tsenable.py @@ -0,0 +1,32 @@ +import re +import sys + +keywords = ["defined", "sizeof", "return", "while", "calc_cache_size", "fork1", + "panic"] + +with open(sys.argv[1], 'r') as file: + lines = file.readlines() + +in_section = False +for line in lines: + if 'TSENTER()' in line: + in_section = True + print(line.rstrip()) + elif 'TSEXIT()' in line: + in_section = False + print(line.rstrip()) + elif in_section and re.match(r'[^\w]+\s*/?\*+/?\s*.*', line): + print(line.rstrip()) + elif in_section and any(k in line for k in keywords): + print(line.rstrip()) + elif in_section and re.match(r'.+[a-z0-9_]+\([^\)]*\)', line): + m = re.findall(r'(\s*)([a-z0-9_]+)\([^\)]*\)', line) + if m: + s, f = m[0] + print(f'{s}TSENTER2("{f}");') + print(line.rstrip()) + print(f'{s}TSEXIT2("{f}");\n') + else: + print(line.rstrip()) + else: + print(line.rstrip()) From f6ce891a9bdf2266966bd068166e6fdf045f4b06 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 18 Jan 2024 21:27:23 +0100 Subject: [PATCH 088/114] fix: whitelisted ctob --- tsenable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsenable.py b/tsenable.py index 1c1046b88a331..c696618f47be7 100644 --- a/tsenable.py +++ b/tsenable.py @@ -2,7 +2,7 @@ import sys keywords = ["defined", "sizeof", "return", "while", "calc_cache_size", "fork1", - "panic"] + "panic", "ctob"] with open(sys.argv[1], 'r') as file: lines = file.readlines() From f46ad9c74e9c69a94789163e750be341d2e03b97 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 19 Jan 2024 09:18:11 +0100 Subject: [PATCH 089/114] feat: tslog_user imported, other measurements added --- sys/dev/pv/pvclock.c | 31 ++++++++ sys/kern/kern_exec.c | 6 +- sys/kern/kern_exit.c | 3 + sys/kern/kern_fork.c | 2 + sys/kern/kern_lwp.c | 3 + sys/kern/kern_tslog.c | 166 +++++++++++++++++++++++++++++++++++++++++- sys/sys/tslog.h | 31 ++++++++ 7 files changed, 237 insertions(+), 5 deletions(-) diff --git a/sys/dev/pv/pvclock.c b/sys/dev/pv/pvclock.c index 3967240caf2b9..09ef8a5661408 100644 --- a/sys/dev/pv/pvclock.c +++ b/sys/dev/pv/pvclock.c @@ -1,3 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emile 'iMil' Heitor. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /* $OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $ */ /* diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index a45322c3d16a3..84a6c163a7385 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -112,6 +112,8 @@ __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.521 2023/10/08 12:38:58 ad Exp $"); #include #include +#include + #include #include @@ -821,7 +823,6 @@ execve_loadvm(struct lwp *l, bool has_path, const char *path, int fd, epp->ep_xfd = fd; } - /* * initialize the fields of the exec package. */ @@ -1480,6 +1481,9 @@ execve1(struct lwp *l, bool has_path, const char *path, int fd, &data); if (error) return error; + + TSEXEC(l->l_proc->p_pid, data.ed_pathstring); + error = execve_runproc(l, &data, false, false); return error; } diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index e11b711e01b6f..93c5c15dfd293 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -108,6 +108,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.298 2023/10/08 12:38:58 ad Exp $"); #include #include #include +#include #include @@ -204,6 +205,8 @@ exit1(struct lwp *l, int exitcode, int signo) ksiginfoq_t kq; int wakeinit; + TSPROCEXIT(l->l_proc->p_pid); + p = l->l_proc; /* Verify that we hold no locks other than p->p_lock. */ diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index eb4e4995b3c20..c9b72d48cac77 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -95,6 +95,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.230 2023/02/25 08:22:00 skrll Exp $" #include #include #include +#include /* * DTrace SDT provider definitions @@ -317,6 +318,7 @@ fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize, atomic_dec_uint(&nprocs); return EAGAIN; } + TSFORK(p2->p_pid, p1->p_pid); /* * We are now committed to the fork. From here on, we may diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index f12bc4997d587..3ddee4201f181 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -253,6 +253,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.269 2023/12/20 21:03:50 andvar Exp $" #include #include #include +#include #include #include @@ -1233,6 +1234,8 @@ lwp_exit(struct lwp *l) */ cpu_lwp_free(l, 0); + TSTHREAD(l, l->l_name); + if (current) { /* Switch away into oblivion. */ lwp_lock(l); diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index fbb2d12efa1e2..ae8b133b2cf2b 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -1,3 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emile 'iMil' Heitor. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /*- * Copyright (c) 2017 Colin Percival * All rights reserved. @@ -26,6 +57,7 @@ #include #include +#include #include #include #include @@ -44,8 +76,8 @@ static volatile int nrecs = 0; static struct timestamp { lwpid_t lid; int type; - const char * f; - const char * s; + const char *f; + const char *s; uint64_t tsc; } timestamps[TSLOGSIZE]; @@ -77,7 +109,7 @@ tslog(const lwp_t *l, int type, const char *f, const char *s) static int sysctl_debug_tslog(SYSCTLFN_ARGS) { - char buf[LINE_MAX]; + char buf[LINE_MAX] = ""; char *where = oldp; size_t buflen, slen, i, limit, needed = 0; int error = 0; @@ -140,12 +172,138 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) return error; } +MALLOC_DEFINE(M_TSLOGUSER, "tsloguser", "Strings used by userland tslog"); +static struct procdata { + pid_t ppid; + uint64_t tsc_forked; + uint64_t tsc_exited; + char *execname; + char *namei; + int reused; +} procs[PID_MAX + 1]; + +void +tslog_user(pid_t pid, pid_t ppid, const char *execname, const char *namei) +{ + uint64_t tsc = rdtsc(); + size_t len; + + /* If we wrapped, do nothing. */ + if (procs[pid].reused) + return; + + /* If we have a ppid, we're recording a fork. */ + if (ppid != (pid_t)(-1)) { + /* If we have a ppid already, we wrapped. */ + if (procs[pid].ppid) { + procs[pid].reused = 1; + return; + } + + /* Fill in some fields. */ + procs[pid].ppid = ppid; + procs[pid].tsc_forked = tsc; + return; + } + + /* If we have an execname, record it. */ + if (execname != NULL) { + if (procs[pid].execname != NULL) + free(procs[pid].execname, M_TSLOGUSER); + len = strlen(execname) + 1; + procs[pid].execname = malloc(len, + M_TSLOGUSER, M_WAITOK | M_ZERO); + strlcpy(procs[pid].execname, execname, len); + printf("%u %s\n", pid, procs[pid].execname); + return; + } + + /* Record the first namei for the process. */ + if (namei != NULL) { + len = strlen(namei) + 1; + if (procs[pid].namei == NULL) { + procs[pid].namei = malloc(len, + M_TSLOGUSER, M_WAITOK | M_ZERO); + strlcpy(procs[pid].namei, namei, len); + } + return; + } + + /* Otherwise we're recording an exit. */ + procs[pid].tsc_exited = tsc; +} + +static int +sysctl_debug_tslog_user(SYSCTLFN_ARGS) +{ + char buf[LINE_MAX]; + char *where = oldp; + size_t buflen, slen, needed = 0; + int error = 0; + static bool first = true; + static size_t max = 0; + pid_t pid; + + buflen = *oldlenp; + /* Export the data we logged. */ + for (pid = 0; pid <= PID_MAX; pid++) { + if (procs[pid].tsc_forked == 0 && + procs[pid].execname == NULL && + procs[pid].namei == NULL && + procs[pid].tsc_exited == 0) + continue; + snprintf(buf, LINE_MAX, "%zu", (size_t)pid); + snprintf(buf, LINE_MAX, "%s %zu", buf, (size_t)procs[pid].ppid); + snprintf(buf, LINE_MAX, "%s %llu", buf, + (unsigned long long)procs[pid].tsc_forked); + snprintf(buf, LINE_MAX, "%s %llu", buf, + (unsigned long long)procs[pid].tsc_exited); + snprintf(buf, LINE_MAX, "%s \"%s\"", buf, procs[pid].execname ? + procs[pid].execname : ""); + snprintf(buf, LINE_MAX, "%s \"%s\"", buf, procs[pid].namei ? + procs[pid].namei : ""); + strcat(buf, "\n"); + + slen = strlen(buf) + 1; + + if (!first) { + if (buflen < slen) { + /* still not enough space */ + first = true; + continue; + } + if (pid > 0) + where--; /* overwrite last \0 */ + if ((error = copyout(buf, where, slen))) + break; + where += slen; + buflen -= slen; + } + needed += slen; + } + first = false; + + if (needed > max) { + max = needed; + } + *oldlenp = max; + + return (error); +} + SYSCTL_SETUP(sysctl_tslog_setup, "tslog sysctl") { sysctl_createv(NULL, 0, NULL, NULL, - CTLFLAG_PERMANENT, + CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "tslog", SYSCTL_DESCR("Dump recorded event timestamps"), sysctl_debug_tslog, 0, NULL, 0, CTL_CREATE, CTL_EOL); + sysctl_createv(NULL, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READONLY, + CTLTYPE_STRING, "tslog_user", + SYSCTL_DESCR("Dump recorded userland event timestamps"), + sysctl_debug_tslog_user, 0, NULL, 0, + CTL_CREATE, CTL_EOL); } + diff --git a/sys/sys/tslog.h b/sys/sys/tslog.h index 701ea2fd0747b..b03a04c72714b 100644 --- a/sys/sys/tslog.h +++ b/sys/sys/tslog.h @@ -1,3 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Emile 'iMil' Heitor. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /*- * Copyright (c) 2017 Colin Percival * All rights reserved. From d65d1a78c4dd12982324e1cf4ada3e795652649e Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 19 Jan 2024 09:29:02 +0100 Subject: [PATCH 090/114] fix: remaining debug --- sys/kern/kern_tslog.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index ae8b133b2cf2b..20e56497bebce 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -214,7 +214,6 @@ tslog_user(pid_t pid, pid_t ppid, const char *execname, const char *namei) procs[pid].execname = malloc(len, M_TSLOGUSER, M_WAITOK | M_ZERO); strlcpy(procs[pid].execname, execname, len); - printf("%u %s\n", pid, procs[pid].execname); return; } From dd1db6b9dee53deeae40767e5112850c39e82634 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 19 Jan 2024 11:38:29 +0100 Subject: [PATCH 091/114] fix: print before free --- sys/kern/kern_lwp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index 3ddee4201f181..859f3bb7a9884 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -1226,6 +1226,7 @@ lwp_exit(struct lwp *l) mutex_exit(p->p_lock); cv_broadcast(&p->p_lwpcv); + TSTHREAD(l, l->l_name); /* * We can no longer block. At this point, lwp_free() may already * be gunning for us. On a multi-CPU system, we may be off p_lwps. @@ -1234,8 +1235,6 @@ lwp_exit(struct lwp *l) */ cpu_lwp_free(l, 0); - TSTHREAD(l, l->l_name); - if (current) { /* Switch away into oblivion. */ lwp_lock(l); From 8442b252c248fe74ac99ef9472ebef7da1d7dcee Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 19 Jan 2024 11:38:48 +0100 Subject: [PATCH 092/114] fix: attach to debug node --- sys/kern/kern_tslog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 20e56497bebce..1d2b8e1dfc494 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -297,12 +297,12 @@ SYSCTL_SETUP(sysctl_tslog_setup, "tslog sysctl") CTLTYPE_STRING, "tslog", SYSCTL_DESCR("Dump recorded event timestamps"), sysctl_debug_tslog, 0, NULL, 0, - CTL_CREATE, CTL_EOL); + CTL_DEBUG, CTL_CREATE, CTL_EOL); sysctl_createv(NULL, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "tslog_user", SYSCTL_DESCR("Dump recorded userland event timestamps"), sysctl_debug_tslog_user, 0, NULL, 0, - CTL_CREATE, CTL_EOL); + CTL_DEBUG, CTL_CREATE, CTL_EOL); } From 6a3fad42bd0add5b9d7a30a7a423638b9c26f0a4 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 19 Jan 2024 11:39:07 +0100 Subject: [PATCH 093/114] feat: multiline support --- tsenable.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tsenable.py b/tsenable.py index c696618f47be7..310c4048925c0 100644 --- a/tsenable.py +++ b/tsenable.py @@ -1,15 +1,21 @@ import re import sys -keywords = ["defined", "sizeof", "return", "while", "calc_cache_size", "fork1", - "panic", "ctob"] +keywords = ["defined", "sizeof", "return", "while", "panic"] +multiline = [ "calc_cache_size", "fork1", "ctob"] with open(sys.argv[1], 'r') as file: lines = file.readlines() in_section = False +exit_func = None for line in lines: - if 'TSENTER()' in line: + if exit_func: + print(line.rstrip()) + print(exit_func) + exit_func = None + continue + elif 'TSENTER()' in line: in_section = True print(line.rstrip()) elif 'TSEXIT()' in line: @@ -25,7 +31,11 @@ s, f = m[0] print(f'{s}TSENTER2("{f}");') print(line.rstrip()) - print(f'{s}TSEXIT2("{f}");\n') + exit_func = f'{s}TSEXIT2("{f}");\n' + if any(l in line for l in multiline): + continue; + print(exit_func) + exit_func = None else: print(line.rstrip()) else: From cc30a734243efa40e7c0a63d568a9c17c65dec20 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 19 Jan 2024 12:11:50 +0100 Subject: [PATCH 094/114] fix: tslog kthread_create --- sys/kern/kern_kthread.c | 2 ++ sys/kern/kern_lwp.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index f182d764b6ccf..374dd9e9553b2 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.49 2023/09/23 14:40:42 ad Exp $") #include #include #include +#include #include #include @@ -96,6 +97,7 @@ kthread_create(pri_t pri, int flag, struct cpu_info *ci, va_end(ap); } + TSTHREAD(l, l->l_name); /* * Set parameters. */ diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index 859f3bb7a9884..f12bc4997d587 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -253,7 +253,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.269 2023/12/20 21:03:50 andvar Exp $" #include #include #include -#include #include #include @@ -1226,7 +1225,6 @@ lwp_exit(struct lwp *l) mutex_exit(p->p_lock); cv_broadcast(&p->p_lwpcv); - TSTHREAD(l, l->l_name); /* * We can no longer block. At this point, lwp_free() may already * be gunning for us. On a multi-CPU system, we may be off p_lwps. From 471967899fb781cb6672f76202152e4602cdd30f Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 20 Jan 2024 14:19:44 +0100 Subject: [PATCH 095/114] fix: cleaned up tslog --- sys/kern/kern_tslog.c | 74 +++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 1d2b8e1dfc494..4df514a7e28b2 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -72,7 +72,7 @@ #define nitems(x) __arraycount(x) -static volatile int nrecs = 0; +static volatile long nrecs = 0; static struct timestamp { lwpid_t lid; int type; @@ -102,7 +102,7 @@ tslog(const lwp_t *l, int type, const char *f, const char *s) timestamps[nrecs].tsc = tsc; /* Grab a slot. */ - atomic_add_int(&nrecs, 1); + atomic_add_long(&nrecs, 1); } } @@ -111,12 +111,15 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) { char buf[LINE_MAX] = ""; char *where = oldp; - size_t buflen, slen, i, limit, needed = 0; + size_t slen, i, limit; int error = 0; - static bool first = true; - static size_t max = 0; + static size_t needed = 0; - buflen = *oldlenp; + /* sysctl first tries with a size of 1024 */ + if (*oldlenp < needed) { + *oldlenp = needed; + return ENOMEM; + } /* Add data logged within the kernel. */ limit = MIN(nrecs, nitems(timestamps)); for (i = 0; i < limit; i++) { @@ -137,8 +140,8 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) strcat(buf, " EVENT"); break; } - snprintf(buf, LINE_MAX, "%s %s", buf, - timestamps[i].f ? timestamps[i].f : "(null)"); + snprintf(buf, LINE_MAX, "%s %s %lu", buf, + timestamps[i].f ? timestamps[i].f : "(null)", where - (char *)oldp); if (timestamps[i].s) snprintf(buf, LINE_MAX, "%s %s\n", buf, timestamps[i].s); @@ -147,27 +150,19 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) slen = strlen(buf) + 1; - if (!first) { - if (buflen < slen) { - /* still not enough space */ - first = true; - continue; - } + if (where == NULL) /* 1st pass, calculate needed */ + needed += slen; + else { if (i > 0) where--; /* overwrite last \0 */ if ((error = copyout(buf, where, slen))) break; where += slen; - buflen -= slen; } - needed += slen; - } - first = false; - - if (needed > max) { - max = needed; } - *oldlenp = max; + /* Come back with an address */ + if (oldp == NULL) + *oldlenp = needed; return error; } @@ -235,15 +230,18 @@ tslog_user(pid_t pid, pid_t ppid, const char *execname, const char *namei) static int sysctl_debug_tslog_user(SYSCTLFN_ARGS) { - char buf[LINE_MAX]; + pid_t pid; + char buf[LINE_MAX] = ""; char *where = oldp; - size_t buflen, slen, needed = 0; + size_t slen; int error = 0; - static bool first = true; - static size_t max = 0; - pid_t pid; + static size_t needed = 0; - buflen = *oldlenp; + /* sysctl first tries with a size of 1024 */ + if (*oldlenp < needed) { + *oldlenp = needed; + return ENOMEM; + } /* Export the data we logged. */ for (pid = 0; pid <= PID_MAX; pid++) { if (procs[pid].tsc_forked == 0 && @@ -265,27 +263,19 @@ sysctl_debug_tslog_user(SYSCTLFN_ARGS) slen = strlen(buf) + 1; - if (!first) { - if (buflen < slen) { - /* still not enough space */ - first = true; - continue; - } + if (where == NULL) /* 1st pass, calculate needed */ + needed += slen; + else { if (pid > 0) where--; /* overwrite last \0 */ if ((error = copyout(buf, where, slen))) break; where += slen; - buflen -= slen; } - needed += slen; - } - first = false; - - if (needed > max) { - max = needed; } - *oldlenp = max; + /* Come back with an address */ + if (oldp == NULL) + *oldlenp = needed; return (error); } From c46d084d7a68095daed196cdeee7b17b2ecc8933 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 20 Jan 2024 15:02:14 +0100 Subject: [PATCH 096/114] fix: leftover debug --- sys/kern/kern_tslog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 4df514a7e28b2..ddb7ea543cc89 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -140,8 +140,8 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) strcat(buf, " EVENT"); break; } - snprintf(buf, LINE_MAX, "%s %s %lu", buf, - timestamps[i].f ? timestamps[i].f : "(null)", where - (char *)oldp); + snprintf(buf, LINE_MAX, "%s %s", buf, + timestamps[i].f ? timestamps[i].f : "(null)"); if (timestamps[i].s) snprintf(buf, LINE_MAX, "%s %s\n", buf, timestamps[i].s); From 14d4c5cb1f4ecb4716768ec4efe609365a9b1073 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Sat, 20 Jan 2024 16:53:54 +0100 Subject: [PATCH 097/114] fix: check if a thread is alive before recording it --- sys/kern/kern_kthread.c | 1 - sys/kern/kern_tslog.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 374dd9e9553b2..32b662a96a3b7 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -96,7 +96,6 @@ kthread_create(pri_t pri, int flag, struct cpu_info *ci, vsnprintf(l->l_name, MAXCOMLEN, fmt, ap); va_end(ap); } - TSTHREAD(l, l->l_name); /* * Set parameters. diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index ddb7ea543cc89..01b689d19468b 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -94,7 +94,7 @@ tslog(const lwp_t *l, int type, const char *f, const char *s) l = &lwp0; /* Store record. */ - if (nrecs < nitems(timestamps)) { + if (lwp_alive(__UNCONST(l)) && nrecs < nitems(timestamps)) { timestamps[nrecs].lid = l->l_lid; timestamps[nrecs].type = type; timestamps[nrecs].f = f; From 3cd2361bcbe4277de31a03ea7efdfd38880b97e6 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 22 Jan 2024 10:46:09 +0100 Subject: [PATCH 098/114] fix: copy thread names in order to avoir name garbage --- sys/kern/kern_tslog.c | 52 +++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 01b689d19468b..913a64aec7d9a 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -72,11 +73,26 @@ #define nitems(x) __arraycount(x) +static __inline uint64_t +atomic_fetchadd_long(volatile uint64_t *p, uint64_t v) +{ + uint64_t oldval, newval; + + do { + oldval = *p; + newval = oldval + v; + } while (atomic_cas_ulong(p, oldval, newval) != oldval); + + return oldval; +} + +#define MAX_FUNC_NAME 128 + static volatile long nrecs = 0; static struct timestamp { - lwpid_t lid; + const lwp_t *l; int type; - const char *f; + char f[MAX_FUNC_NAME]; const char *s; uint64_t tsc; } timestamps[TSLOGSIZE]; @@ -88,24 +104,28 @@ void tslog(const lwp_t *l, int type, const char *f, const char *s) { uint64_t tsc = rdtsc(); + long pos; /* A NULL thread is lwp0 before curthread is set. */ if (l == NULL) l = &lwp0; - /* Store record. */ - if (lwp_alive(__UNCONST(l)) && nrecs < nitems(timestamps)) { - timestamps[nrecs].lid = l->l_lid; - timestamps[nrecs].type = type; - timestamps[nrecs].f = f; - timestamps[nrecs].s = s; - timestamps[nrecs].tsc = tsc; - - /* Grab a slot. */ - atomic_add_long(&nrecs, 1); + /* Grab a slot. */ + pos = atomic_fetchadd_long(&nrecs, 1); + if (pos < nitems(timestamps)) { + timestamps[pos].l = l; + timestamps[pos].type = type; + if (f != NULL) + strlcpy(timestamps[pos].f, f, MAX_FUNC_NAME); + else + strcpy(timestamps[pos].f, "(null)"); + timestamps[pos].s = s; + timestamps[pos].tsc = tsc; } } +#undef MAX_FUNC_NAME + static int sysctl_debug_tslog(SYSCTLFN_ARGS) { @@ -123,9 +143,8 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) /* Add data logged within the kernel. */ limit = MIN(nrecs, nitems(timestamps)); for (i = 0; i < limit; i++) { - snprintf(buf, LINE_MAX, "0x%x %llu", - timestamps[i].lid, - (unsigned long long)timestamps[i].tsc); + snprintf(buf, LINE_MAX, "0x%x %lu", + timestamps[i].l->l_lid, timestamps[i].tsc); switch (timestamps[i].type) { case TS_ENTER: strcat(buf, " ENTER"); @@ -140,8 +159,7 @@ sysctl_debug_tslog(SYSCTLFN_ARGS) strcat(buf, " EVENT"); break; } - snprintf(buf, LINE_MAX, "%s %s", buf, - timestamps[i].f ? timestamps[i].f : "(null)"); + snprintf(buf, LINE_MAX, "%s %s", buf, timestamps[i].f); if (timestamps[i].s) snprintf(buf, LINE_MAX, "%s %s\n", buf, timestamps[i].s); From bdb316a0438f3f01104dc5e5e495f89b611d8aef Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 22 Jan 2024 11:24:48 +0100 Subject: [PATCH 099/114] docs: explain the copy --- sys/kern/kern_tslog.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 913a64aec7d9a..e35fef1e696e2 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -115,6 +115,11 @@ tslog(const lwp_t *l, int type, const char *f, const char *s) if (pos < nitems(timestamps)) { timestamps[pos].l = l; timestamps[pos].type = type; + /* + * Must record it: + * - the compiled string is a format string + * - kernel thread might be destroyed + */ if (f != NULL) strlcpy(timestamps[pos].f, f, MAX_FUNC_NAME); else From 488c432b3bd3a914bfce5a3ad51cb49c6a053c82 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 22 Jan 2024 12:16:29 +0100 Subject: [PATCH 100/114] docs: more precision --- sys/kern/kern_tslog.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index e35fef1e696e2..cb564742e2326 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -116,9 +116,12 @@ tslog(const lwp_t *l, int type, const char *f, const char *s) timestamps[pos].l = l; timestamps[pos].type = type; /* - * Must record it: + * Must record it for TSTHREAD * - the compiled string is a format string * - kernel thread might be destroyed + * + * As this variable might also be used for + * function names, MAXCOMLEN is not enough. */ if (f != NULL) strlcpy(timestamps[pos].f, f, MAX_FUNC_NAME); From d5588e09d092d9545373e0c0ea1eb61e8123b139 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 22 Jan 2024 17:44:49 +0100 Subject: [PATCH 101/114] fix: pid0 has no process, fix spaces --- sys/kern/kern_tslog.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index cb564742e2326..59c5d0fa523bb 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -86,6 +86,7 @@ atomic_fetchadd_long(volatile uint64_t *p, uint64_t v) return oldval; } +/* from nm(1), biggest function name is 67 chars long */ #define MAX_FUNC_NAME 128 static volatile long nrecs = 0; @@ -276,14 +277,14 @@ sysctl_debug_tslog_user(SYSCTLFN_ARGS) procs[pid].tsc_exited == 0) continue; snprintf(buf, LINE_MAX, "%zu", (size_t)pid); - snprintf(buf, LINE_MAX, "%s %zu", buf, (size_t)procs[pid].ppid); - snprintf(buf, LINE_MAX, "%s %llu", buf, + snprintf(buf, LINE_MAX, "%s %zu", buf, (size_t)procs[pid].ppid); + snprintf(buf, LINE_MAX, "%s %llu", buf, (unsigned long long)procs[pid].tsc_forked); - snprintf(buf, LINE_MAX, "%s %llu", buf, + snprintf(buf, LINE_MAX, "%s %llu", buf, (unsigned long long)procs[pid].tsc_exited); - snprintf(buf, LINE_MAX, "%s \"%s\"", buf, procs[pid].execname ? + snprintf(buf, LINE_MAX, "%s \"%s\"", buf, procs[pid].execname ? procs[pid].execname : ""); - snprintf(buf, LINE_MAX, "%s \"%s\"", buf, procs[pid].namei ? + snprintf(buf, LINE_MAX, "%s \"%s\"", buf, procs[pid].namei ? procs[pid].namei : ""); strcat(buf, "\n"); @@ -292,7 +293,7 @@ sysctl_debug_tslog_user(SYSCTLFN_ARGS) if (where == NULL) /* 1st pass, calculate needed */ needed += slen; else { - if (pid > 0) + if (pid > 1) where--; /* overwrite last \0 */ if ((error = copyout(buf, where, slen))) break; From fd5e0bfa65f899b7366f4405b45099d604e2386f Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 22 Jan 2024 17:52:23 +0100 Subject: [PATCH 102/114] fix: 85 --- sys/kern/kern_tslog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 59c5d0fa523bb..ec28bc90c7d29 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -86,7 +86,7 @@ atomic_fetchadd_long(volatile uint64_t *p, uint64_t v) return oldval; } -/* from nm(1), biggest function name is 67 chars long */ +/* from nm(1), GENERIC biggest symbol is 85 chars long */ #define MAX_FUNC_NAME 128 static volatile long nrecs = 0; From 058b032ba6456e7fdf3f82daf4d96e1e2a515c55 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 22 Jan 2024 20:25:02 +0100 Subject: [PATCH 103/114] feat: make boot time display optional --- sys/arch/amd64/amd64/locore.S | 4 ++++ sys/kern/init_main.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 0d1f6b184e0bd..16f02c1da878a 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -386,12 +386,14 @@ END(biosextmem) .type _C_LABEL(lwp0uarea), @object LABEL(lwp0uarea) .quad 0 END(lwp0uarea) +#ifdef BOOTTIME .type _C_LABEL(boottime_low), @object LABEL(boottime_low) .long 0 /* low part of rdtsc */ END(boottime_low) .type _C_LABEL(boottime_high), @object LABEL(boottime_high) .long 0 /* high part of rdtsc */ END(boottime_high) +#endif #ifndef XENPV .globl gdt64_lo @@ -1042,9 +1044,11 @@ END(start) /* entry point for Xen PVH */ .code32 ENTRY(start_genpvh) +#ifdef BOOTTIME rdtsc movl %eax, RELOC(boottime_low) movl %edx, RELOC(boottime_high) +#endif /* Xen doesn't start us with a valid gdt */ movl $RELOC(gdtdesc32), %eax lgdt (%eax) diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 3fa12fa9cb829..3e5e18449040a 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -254,19 +254,22 @@ static void configure(void); static void configure2(void); static void configure3(void); void main(void); -static void howlong(void); +#ifdef BOOTTIME +static void boottime(void); extern uint32_t boottime_low; extern uint32_t boottime_high; static void -howlong(void) +boottime(void) { uint64_t entrytime = (uint64_t)boottime_high << 32 | boottime_low; - printf("boot: %lums\n", ((rdtsc() - entrytime) * 1000) / - curcpu()->ci_data.cpu_cc_freq); + printf_nolog("boot: %lums (entry tsc: %lu)\n", + ((rdtsc() - entrytime) * 1000) / + curcpu()->ci_data.cpu_cc_freq, entrytime); } +#endif /* * System startup; initialize the world, create process 0, mount root @@ -763,7 +766,9 @@ main(void) mutex_exit(&proc_lock); TSEXIT(); - howlong(); +#ifdef BOOTTIME + boottime(); +#endif /* The scheduler is an infinite loop. */ uvm_scheduler(); /* NOTREACHED */ From 90b1b77db8cf8f2a91aaa1c7fd8bd851c4dc878c Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 23 Jan 2024 06:40:21 +0100 Subject: [PATCH 104/114] docs: added tslog man --- share/man/man4/tslog.4 | 155 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 share/man/man4/tslog.4 diff --git a/share/man/man4/tslog.4 b/share/man/man4/tslog.4 new file mode 100644 index 0000000000000..d329c00582031 --- /dev/null +++ b/share/man/man4/tslog.4 @@ -0,0 +1,155 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" Ported from FreeBSD +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2022 Mateusz Piotrowski <0mp@FreeBSD.org> +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd January, 2024 +.Dt TSLOG 4 +.Os +.Sh NAME +.Nm tslog +.Nd Boot-time event tracing facility +.Sh SYNOPSIS +To compile this boot-time event tracing facility into the kernel, +place the following line in the kernel configuration file: +.Bd -ragged -offset indent +.Cd "option TSLOG" +.Ed +.Sh DESCRIPTION +.Nm +is a boot-time event tracing facility. +It is suitable for tracing recursive events +based on function entries and exits. +Its purpose is to ease pinpointing and reducing the overall +.Nx +boot time by generating detailed timing information. +.Pp +.Nm +is able to trace the boot loader, kernel initialization, and userland processes. +.Pp +In userland, it records the following details for each process ID: +.Bl -dash +.It +The timestamp of the +.Xr fork 2 +which creates the given process ID and the parent process ID. +.It +The path passed to +.Xr execve 2 , +if any. +.It +The first path resolved by +.Xr namei 9 , +if any. +.It +The timestamp of the +.Xr exit 3 +which terminates the process. +.El +.Sh SYSCTL VARIABLES +The following +.Xr sysctl 8 +variables are available: +.Bl -tag -width indent +.It Va debug.tslog +Dump the +.Nm +buffer of recorded loader and kernel event timestamps. +.It Va debug.tslog_user +Dump the +.Nm +buffer +of recorded userland event timestamps. +.El +.Sh FLAMEGRAPHS +The +.Nm +buffer dumps +can be used to generate flamegraphs of the +.Nx +boot process for visual analysis. +See +.Lk https://github.com/cperciva/freebsd-boot-profiling +for more information. +.Sh SEE ALSO +.Xr dtrace 1 +.Sh HISTORY +.Nm +first appeared in +.Fx 12.0 . +Support for tracing boot loaders and userland process +was added in +.Fx 13.2 . +.Pp +.Nm +first appeared in +.Nx 11.0 . +.Ss TSLOG vs. DTrace +.Xr dtrace 1 +is not always the right tool for profiling early kernel initialization. +The reason is it requires some kernel subroutines +which are not yet available early in the boot process, e.g.: +traps, memory allocation, or thread scheduling. +.Nm +depends on fewer kernel subroutines than +.Xr dtrace 1 +and because of that can trace early kernel initialization. +.Sh AUTHORS +.An -nosplit +.Nm +was originally written by +.An Colin Percival Aq Mt cperciva@FreeBSD.org for +.Fx, +.Nx port was made by +.An Emile 'iMil' Heitor Aq Mt imil@NetBSD.org +.Pp +This manual page was originally written by +.An Mateusz Piotrowski Aq Mt 0mp@FreeBSD.org . From dcaf8a9b4d1766e691030d86994a598a515ea82d Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 23 Jan 2024 08:45:54 +0100 Subject: [PATCH 105/114] docs: added pvclock(4) man --- share/man/man4/pvclock.4 | 74 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 share/man/man4/pvclock.4 diff --git a/share/man/man4/pvclock.4 b/share/man/man4/pvclock.4 new file mode 100644 index 0000000000000..6df0498b7ee8b --- /dev/null +++ b/share/man/man4/pvclock.4 @@ -0,0 +1,74 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" ported from OpenBSD +.\" $OpenBSD: pvclock.4,v 1.2 2018/11/23 17:21:37 jmc Exp $ +.\" +.\" Copyright (c) 2018 Reyk Floeter +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd January 2024 +.Dt PVCLOCK 4 +.Os +.Sh NAME +.Nm pvclock +.Nd paravirtual clock driver +.Sh SYNOPSIS +.Cd "pvclock* at pv?" +.Sh DESCRIPTION +The +.Nm +driver supports the paravirtual clock that is available in KVM and +other hypervisors. +.Nm +uses a shared page between the host and the hypervisor to synchronize +the TSC clock in an efficient way. +.Sh SEE ALSO +.Xr pv 4 +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11 . +.Sh AUTHORS +.An -nosplit +The +.Nm +driver was written by +.An Reyk Floeter Aq Mt reyk@openbsd.org . +.Nx port was made by +.An Emile 'iMil' Heitor Aq Mt imil@NetBSD.org . From 38ff85ba7c21112c64f02b0ff4ed79cec8e078ca Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Thu, 25 Jan 2024 19:35:58 +0100 Subject: [PATCH 106/114] feat: added AMD 17+ family MSR freq, disabled NVMM for invtsc --- sys/arch/x86/include/specialreg.h | 4 + sys/arch/x86/x86/identcpu_subr.c | 197 ++++++++++++++++++++---------- 2 files changed, 137 insertions(+), 64 deletions(-) diff --git a/sys/arch/x86/include/specialreg.h b/sys/arch/x86/include/specialreg.h index e510eaa2d2011..d59df568646e7 100644 --- a/sys/arch/x86/include/specialreg.h +++ b/sys/arch/x86/include/specialreg.h @@ -1357,6 +1357,10 @@ #define MSR_HWCR 0xc0010015 #define HWCR_TLBCACHEDIS 0x00000008 #define HWCR_FFDIS 0x00000040 +#define HWCR_TSCFREQSEL 0x01000000 + +#define MSR_PSTATEDEF(_n) (0xc0010064 + (_n)) +#define PSTATEDEF_EN 0x8000000000000000ULL #define MSR_NB_CFG 0xc001001f #define NB_CFG_DISIOREQLOCK 0x0000000000000008ULL diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index a3546d7f85bda..f09ac9aba1512 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -62,68 +62,6 @@ __KERNEL_RCSID(0, "$NetBSD: identcpu_subr.c,v 1.9 2021/10/07 13:04:18 msaitoh Ex #include "cpuctl_i386.h" #endif -static uint64_t -tsc_freq_intel_brand(struct cpu_info *ci) -{ - char brand[48]; - u_int regs[4]; - uint64_t freq; - char *p; - u_int i; - - /* - * Intel Processor Identification and the CPUID Instruction - * Application Note 485. - * http://www.intel.com/assets/pdf/appnote/241618.pdf - */ - if (ci->ci_max_ext_cpuid >= 0x80000004) { - p = brand; - for (i = 0x80000002; i < 0x80000005; i++) { - x86_cpuid(i, regs); - memcpy(p, regs, sizeof(regs)); - p += sizeof(regs); - } - p = NULL; - for (i = 0; i < sizeof(brand) - 1; i++) - if (brand[i] == 'H' && brand[i + 1] == 'z') - p = brand + i; - if (p != NULL) { - p -= 5; - switch (p[4]) { - case 'M': - i = 1; - break; - case 'G': - i = 1000; - break; - case 'T': - i = 1000000; - break; - default: - return 0; - } -#define C2D(c) ((c) - '0') - if (p[1] == '.') { - freq = C2D(p[0]) * 1000; - freq += C2D(p[2]) * 100; - freq += C2D(p[3]) * 10; - freq *= i * 1000; - } else { - freq = C2D(p[0]) * 1000; - freq += C2D(p[1]) * 100; - freq += C2D(p[2]) * 10; - freq += C2D(p[3]); - freq *= i * 1000000; - } -#undef C2D - aprint_verbose( - "got tsc from cpu brand\n"); - return freq; - } - } - return 0; -} - static uint64_t tsc_freq_cpuid_vm(struct cpu_info *ci) { @@ -212,7 +150,7 @@ tsc_freq_cpuid(struct cpu_info *ci) (uint64_t)descs[1] * 1000000); } } -#if defined(_KERNEL) && NLAPIC > 0 +#if defined(_KERNEL) && NLAPIC > 0 if ((khz != 0) && (lapic_per_second == 0)) { lapic_per_second = khz * 1000; aprint_debug_dev(ci->ci_dev, @@ -224,6 +162,133 @@ tsc_freq_cpuid(struct cpu_info *ci) return freq; } +/* Ported from OpenBSD's sys/arch/amd64/amd64/tsc.c */ +static uint64_t +tsc_freq_amd_msr(struct cpu_info *ci) +{ + uint64_t base, def, divisor, multiplier, freq; + uint32_t family = CPUID_TO_FAMILY(ci->ci_signature); + + /* + * All 10h+ CPUs have Core::X86::Msr:HWCR and the TscFreqSel + * bit. If TscFreqSel hasn't been set, the TSC isn't advancing + * at the core P0 frequency and we need to calibrate by hand. + */ + if (family < 0x10) + return 0; + if (!ISSET(rdmsr(MSR_HWCR), HWCR_TSCFREQSEL)) + return 0; + + /* + * In 10h+ CPUs, Core::X86::Msr::PStateDef defines the voltage + * and frequency for each core P-state. We want the P0 frequency. + * If the En bit isn't set, the register doesn't define a valid + * P-state. + */ + def = rdmsr(MSR_PSTATEDEF(0)); + if (!ISSET(def, PSTATEDEF_EN)) + return 0; + + switch (family) { + case 0x17: + case 0x19: + printf("\nAMD FAMILY MATCHES %u\n", family); + /* + * PPR for AMD Family 17h [...]: + * Models 01h,08h B2, Rev 3.03, pp. 33, 139-140 + * Model 18h B1, Rev 3.16, pp. 36, 143-144 + * Model 60h A1, Rev 3.06, pp. 33, 155-157 + * Model 71h B0, Rev 3.06, pp. 28, 150-151 + * + * PPR for AMD Family 19h [...]: + * Model 21h B0, Rev 3.05, pp. 33, 166-167 + * + * OSRR for AMD Family 17h processors, + * Models 00h-2Fh, Rev 3.03, pp. 130-131 + */ + base = 200000000; /* 200.0 MHz */ + divisor = (def >> 8) & 0x3f; + if (divisor <= 0x07 || divisor >= 0x2d) + return 0; /* reserved */ + if (divisor >= 0x1b && divisor % 2 == 1) + return 0; /* reserved */ + multiplier = def & 0xff; + if (multiplier <= 0x0f) + return 0; /* reserved */ + break; + default: + return 0; + } + freq = base * multiplier / divisor; + + printf("\nGOT %lu FROM AMD\n", freq); + + return base * multiplier / divisor; +} + +/* Ported from sys/x86/x86/tsc.c */ +static uint64_t +tsc_freq_intel_brand(struct cpu_info *ci) +{ + char brand[48]; + u_int regs[4]; + uint64_t freq; + char *p; + u_int i; + + /* + * Intel Processor Identification and the CPUID Instruction + * Application Note 485. + * http://www.intel.com/assets/pdf/appnote/241618.pdf + */ + if (ci->ci_max_ext_cpuid >= 0x80000004) { + p = brand; + for (i = 0x80000002; i < 0x80000005; i++) { + x86_cpuid(i, regs); + memcpy(p, regs, sizeof(regs)); + p += sizeof(regs); + } + p = NULL; + for (i = 0; i < sizeof(brand) - 1; i++) + if (brand[i] == 'H' && brand[i + 1] == 'z') + p = brand + i; + if (p != NULL) { + p -= 5; + switch (p[4]) { + case 'M': + i = 1; + break; + case 'G': + i = 1000; + break; + case 'T': + i = 1000000; + break; + default: + return 0; + } +#define C2D(c) ((c) - '0') + if (p[1] == '.') { + freq = C2D(p[0]) * 1000; + freq += C2D(p[2]) * 100; + freq += C2D(p[3]) * 10; + freq *= i * 1000; + } else { + freq = C2D(p[0]) * 1000; + freq += C2D(p[1]) * 100; + freq += C2D(p[2]) * 10; + freq += C2D(p[3]); + freq *= i * 1000000; + } +#undef C2D + aprint_verbose( + "got tsc from cpu brand\n"); + return freq; + } + } + return 0; +} + uint64_t cpu_tsc_freq_cpuid(struct cpu_info *ci) { @@ -231,8 +296,12 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) if (cpu_vendor == CPUVENDOR_INTEL) freq = tsc_freq_cpuid(ci); + /* Try AMD MSR's */ + if (freq == 0 && cpu_vendor == CPUVENDOR_AMD) + freq = tsc_freq_amd_msr(ci); /* VMware compatible tsc query */ - if (freq == 0 && vm_guest != VM_GUEST_NO) + if (freq == 0 && vm_guest != VM_GUEST_NO && + hv_type != VM_GUEST_NVMM) /* NVMM replies junk */ freq = tsc_freq_cpuid_vm(ci); /* Still no luck, get the frequency from brand */ if (freq == 0 && cpu_vendor == CPUVENDOR_INTEL) From 30a7678e54b35b6c6f8068c52438da752212347d Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 26 Jan 2024 08:43:27 +0100 Subject: [PATCH 107/114] feat: add vmware-style CPUID to get CPU frequency --- sys/dev/nvmm/x86/nvmm_x86_svm.c | 9 ++++++++- sys/dev/nvmm/x86/nvmm_x86_vmx.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sys/dev/nvmm/x86/nvmm_x86_svm.c b/sys/dev/nvmm/x86/nvmm_x86_svm.c index 0c71e0d3f2e0f..e40579c43e4ac 100644 --- a/sys/dev/nvmm/x86/nvmm_x86_svm.c +++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c @@ -839,7 +839,7 @@ svm_inkernel_advance(struct vmcb *vmcb) } #define SVM_CPUID_MAX_BASIC 0xD -#define SVM_CPUID_MAX_HYPERVISOR 0x40000000 +#define SVM_CPUID_MAX_HYPERVISOR 0x40000010 #define SVM_CPUID_MAX_EXTENDED 0x8000001F static uint32_t svm_cpuid_max_basic __read_mostly; static uint32_t svm_cpuid_max_extended __read_mostly; @@ -980,6 +980,13 @@ svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); break; + case 0x40000010: /* VMware-style CPU freq */ + cpudata->gprs[NVMM_X64_GPR_RAX] = curcpu()->ci_data.cpu_cc_freq; + cpudata->gprs[NVMM_X64_GPR_RBX] = 0; + cpudata->gprs[NVMM_X64_GPR_RCX] = 0; + cpudata->gprs[NVMM_X64_GPR_RDX] = 0; + break; + case 0x80000000: cpudata->vmcb->state.rax = svm_cpuid_max_extended; break; diff --git a/sys/dev/nvmm/x86/nvmm_x86_vmx.c b/sys/dev/nvmm/x86/nvmm_x86_vmx.c index 7e9a113692fdb..dba7b13469800 100644 --- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c +++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c @@ -1220,7 +1220,7 @@ vmx_exit_exc_nmi(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, } #define VMX_CPUID_MAX_BASIC 0x16 -#define VMX_CPUID_MAX_HYPERVISOR 0x40000000 +#define VMX_CPUID_MAX_HYPERVISOR 0x40000010 #define VMX_CPUID_MAX_EXTENDED 0x80000008 static uint32_t vmx_cpuid_max_basic __read_mostly; static uint32_t vmx_cpuid_max_extended __read_mostly; @@ -1432,6 +1432,13 @@ vmx_inkernel_handle_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); break; + case 0x40000010: /* VMware-style CPU freq */ + cpudata->gprs[NVMM_X64_GPR_RAX] = curcpu()->ci_data.cpu_cc_freq; + cpudata->gprs[NVMM_X64_GPR_RBX] = 0; + cpudata->gprs[NVMM_X64_GPR_RCX] = 0; + cpudata->gprs[NVMM_X64_GPR_RDX] = 0; + break; + case 0x80000000: cpudata->gprs[NVMM_X64_GPR_RAX] = vmx_cpuid_max_extended; break; From 07916a1bb79f60ff9f266ebb1545de7a677567e7 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 26 Jan 2024 08:44:36 +0100 Subject: [PATCH 108/114] fix: values read from NVMM are now correct (patch) --- sys/arch/x86/x86/identcpu_subr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index f09ac9aba1512..1e2e654772c73 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -68,14 +68,11 @@ tsc_freq_cpuid_vm(struct cpu_info *ci) uint32_t descs[4]; if (ci->ci_max_ext_cpuid >= 0x40000010) { - int mul = 1000; /* TSC freq in khz... */ - if (hv_type == VM_GUEST_NVMM) - mul *= 1000; /* ...except for NVMM */ x86_cpuid(0x40000010, descs); if (descs[0] > 0) { aprint_verbose( "got tsc from vmware compatible cpuid\n"); - return descs[0] * mul; + return descs[0] * 1000; } } @@ -300,8 +297,7 @@ cpu_tsc_freq_cpuid(struct cpu_info *ci) if (freq == 0 && cpu_vendor == CPUVENDOR_AMD) freq = tsc_freq_amd_msr(ci); /* VMware compatible tsc query */ - if (freq == 0 && vm_guest != VM_GUEST_NO && - hv_type != VM_GUEST_NVMM) /* NVMM replies junk */ + if (freq == 0 && vm_guest != VM_GUEST_NO) freq = tsc_freq_cpuid_vm(ci); /* Still no luck, get the frequency from brand */ if (freq == 0 && cpu_vendor == CPUVENDOR_INTEL) From 1970a6c43ca14fa9779b244068e4048a782e391e Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 26 Jan 2024 09:11:55 +0100 Subject: [PATCH 109/114] fix: debug leftover --- sys/arch/x86/x86/identcpu_subr.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index 1e2e654772c73..7e1a8b598f08d 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -189,7 +189,6 @@ tsc_freq_amd_msr(struct cpu_info *ci) switch (family) { case 0x17: case 0x19: - printf("\nAMD FAMILY MATCHES %u\n", family); /* * PPR for AMD Family 17h [...]: * Models 01h,08h B2, Rev 3.03, pp. 33, 139-140 @@ -216,14 +215,11 @@ tsc_freq_amd_msr(struct cpu_info *ci) default: return 0; } - freq = base * multiplier / divisor; - - printf("\nGOT %lu FROM AMD\n", freq); return base * multiplier / divisor; } -/* Ported from sys/x86/x86/tsc.c */ +/* Ported from FreeBSD sys/x86/x86/tsc.c */ static uint64_t tsc_freq_intel_brand(struct cpu_info *ci) { From 5f661cfd1327b8c39f848f7b8cdcf4b864004969 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Fri, 26 Jan 2024 13:13:37 +0100 Subject: [PATCH 110/114] fix: unused variable --- sys/arch/x86/x86/identcpu_subr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index 7e1a8b598f08d..1bee6f5d2f8b3 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -163,7 +163,7 @@ tsc_freq_cpuid(struct cpu_info *ci) static uint64_t tsc_freq_amd_msr(struct cpu_info *ci) { - uint64_t base, def, divisor, multiplier, freq; + uint64_t base, def, divisor, multiplier; uint32_t family = CPUID_TO_FAMILY(ci->ci_signature); /* From c4df8199ccb1db535aafd457c2d61ae203bc25c5 Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Mon, 29 Jan 2024 14:04:57 +0100 Subject: [PATCH 111/114] fix: include pvclock.h when option activated + add prototype for tslog_user --- sys/arch/x86/x86/cpu.c | 4 ++-- sys/arch/x86/x86/lapic.c | 3 ++- sys/kern/kern_tslog.c | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index b78e2528a066e..10e307b534522 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -74,9 +74,9 @@ __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.209 2023/07/16 19:55:43 riastradh Exp $"); #include "ioapic.h" #include "acpica.h" #include "hpet.h" - +#if NPVCLOCK > 0 #include "pvclock.h" - +#endif #include #include #include diff --git a/sys/arch/x86/x86/lapic.c b/sys/arch/x86/x86/lapic.c index ea75591f20c82..a816c160d6730 100644 --- a/sys/arch/x86/x86/lapic.c +++ b/sys/arch/x86/x86/lapic.c @@ -42,8 +42,9 @@ __KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.89 2022/09/07 00:40:19 knakahara Exp $") #include "opt_multiprocessor.h" #include "opt_ntp.h" #include "opt_xen.h" +#if NPVCLOCK > 0 #include "pvclock.h" - +#endif #include #include #include diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index ec28bc90c7d29..15819cf6188a9 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -99,6 +99,7 @@ static struct timestamp { } timestamps[TSLOGSIZE]; void tslog(const lwp_t *, int, const char *, const char *); +void tslog_user(pid_t, pid_t, const char *, const char *); static int sysctl_debug_tslog(SYSCTLFN_PROTO); void From 5982418f7baf116c5a7a1434b0154250d609fd7b Mon Sep 17 00:00:00 2001 From: Emile 'iMil' Heitor Date: Tue, 30 Jan 2024 05:53:46 +0100 Subject: [PATCH 112/114] fix: freq > 32 bits --- sys/arch/x86/x86/identcpu_subr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/arch/x86/x86/identcpu_subr.c b/sys/arch/x86/x86/identcpu_subr.c index 1bee6f5d2f8b3..309690fa2619c 100644 --- a/sys/arch/x86/x86/identcpu_subr.c +++ b/sys/arch/x86/x86/identcpu_subr.c @@ -70,9 +70,11 @@ tsc_freq_cpuid_vm(struct cpu_info *ci) if (ci->ci_max_ext_cpuid >= 0x40000010) { x86_cpuid(0x40000010, descs); if (descs[0] > 0) { + /* Needed to avoid overflow */ + uint64_t freq = descs[0]; aprint_verbose( "got tsc from vmware compatible cpuid\n"); - return descs[0] * 1000; + return freq * 1000; } } From 408f1af63a6030738bdacfff657905763fd38fcb Mon Sep 17 00:00:00 2001 From: csdvrx <49044530+csdvrx@users.noreply.github.com> Date: Tue, 30 Jan 2024 19:35:10 -0600 Subject: [PATCH 113/114] Initial replication of iMil results --- compile.sh | 45 ++ sys/arch/amd64/conf/MICROVM | 63 ++ sys/arch/amd64/conf/MICROVM.MAYBE | 1230 +++++++++++++++++++++++++++++ sys/dev/pv/pvclock.c | 6 +- sys/kern/kern_tslog.c | 2 +- 5 files changed, 1342 insertions(+), 4 deletions(-) create mode 100755 compile.sh create mode 100644 sys/arch/amd64/conf/MICROVM create mode 100644 sys/arch/amd64/conf/MICROVM.MAYBE diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000000000..873eeae2a0e06 --- /dev/null +++ b/compile.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# Replication of iMil result reported on https://mail-index.netbsd.org/tech-kern/2024/01/23/msg029450.html +KERNELCONFIG="MICROVM" +TOOLCHAIN="$HOME/obj" +CPUS=4 + +# Get the sources +#git clone https://github.com/NetBSDfr/NetBSD-src + +# Show the branches available +#git branch -a + +# Change to result-replication, based on the perf branch +git branch| grep result-replication || git switch result-replication + +#cf https://www.netbsd.org/docs/guide/en/chap-build.html#chap-boot-cross-compiling-kernel +echo "# Building the cross compilation toolchain into ${TOOLCHAIN}": + +stat -c "%y %A %n size: %s" ${TOOLCHAIN}/tooldir*x86_64/bin/nbmake \ + || sh ./build.sh -U -O ${TOOLCHAIN} -j ${CPUS} -m amd64 -a x86_64 tools \ + || exit 1 + +echo "# Using the kernel configuration ${KERNELCONFIG}": +stat -c "%y %A %n size: %s" sys/arch/amd64/conf/${KERNELCONFIG} \ + || exit 2 + +echo "# Checking pvclock is disabled as it requires the missing " +grep -v ^pvclock sys/arch/amd64/conf/${KERNELCONFIG} \ + || exit 3 + +echo "# Refreshing the kernel build with -u" \ + && sh ./build.sh -u -U -O ${TOOLCHAIN} -j ${CPUS} -m amd64 kernel=${KERNELCONFIG} \ + && stat -c "%y %A %n size: %s" ${TOOLCHAIN}/sys/arch/amd64/compile/${KERNELCONFIG}/netbsd \ + || exit 4 + +## Alternatives: +# Use nbconfig and nbmake directly with sys/arch/amd64/conf/kern-compile.sh + +# Don't just update the kernel: remove the '-u' that's for update only + +# Compile from scratch the release and the kernel +#sh ./build.sh -U -O ${TOOLCHAIN} -j ${CPUS} -m amd64 -a x86_64 release kernel=${KERNELCONFIG} + +# TODO: add qemu scripts, which should have console=com etc +# TODO: add a basic init_args parsing arguments like sys/arch/x86/x86/x86_autoconf.c diff --git a/sys/arch/amd64/conf/MICROVM b/sys/arch/amd64/conf/MICROVM new file mode 100644 index 0000000000000..9b0108b4459dc --- /dev/null +++ b/sys/arch/amd64/conf/MICROVM @@ -0,0 +1,63 @@ +# start of kernel configuration + +machine amd64 x86 xen +include "conf/std" # MI standard options +include "arch/xen/conf/std.xenversion" +options CPU_IN_CKSUM +options EXEC_ELF64 # exec ELF binaries +options EXEC_SCRIPT # exec #! scripts +options MTRR +options MULTIPROCESSOR +options CHILD_MAX=1024 # 160 is too few +options OPEN_MAX=1024 # 128 is too few +mainbus0 at root +cpu* at mainbus? +ioapic* at mainbus? apid ? +options INCLUDE_CONFIG_FILE # embed config file in kernel binary +maxusers 8 # estimated number of users +options INSECURE # disable kernel security levels - X needs this options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT +options PIPE_SOCKETPAIR # smaller, but slower pipe(2) +options XENPVHVM +options XEN +hypervisor* at mainbus? # Xen hypervisor +xenbus* at hypervisor? # Xen virtual bus +xencons* at hypervisor? # Xen virtual console +makeoptions COPTS="-O2 -fno-omit-frame-pointer" + +# Inspired by https://mail-index.netbsd.org/tech-kern/2024/01/23/msg029450.html +# but with more basic filesystems included +#include "conf/filesystems.config" +file-system FFS +file-system EXT2FS +file-system KERNFS +file-system PROCFS + +options FFS_NO_SNAPSHOT # No FFS snapshot support +options WAPBL # File system journaling support +options INET # IP + ICMP + TCP + UDP +options INET6 # IPV6 +config netbsd root on ? type ? + +# some microvms don't have ACPI +options MPBIOS # configure CPUs and APICs using MPBIOS +options MPTABLE_LINUX_BUG_COMPAT # fix to locate correct ACPI location +isa0 at mainbus? +com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports + +# new pv bus for MMIO backed devices +pv* at pvbus? +# FIXME: not working due to missing in sys/dev/pv/pvclock.c +#pvclock* at pv? # pvclock uses KVM capabilities +virtio* at pv? # virtio attaches to pv with MMIO +ld* at virtio? # Virtio disk device +vioif* at virtio? # Virtio network device +viornd* at virtio? # Virtio entropy device +viocon* at virtio? + +pseudo-device bpfilter +pseudo-device loop +pseudo-device pty + +# for using https://github.com/cperciva/freebsd-boot-profiling +options TSLOG # enable tslog(4) tracing facility +options BOOTTIME # prints boot time relative to rdtsc diff --git a/sys/arch/amd64/conf/MICROVM.MAYBE b/sys/arch/amd64/conf/MICROVM.MAYBE new file mode 100644 index 0000000000000..010eebb5e3dba --- /dev/null +++ b/sys/arch/amd64/conf/MICROVM.MAYBE @@ -0,0 +1,1230 @@ +# MICROVM + +machine amd64 x86 xen +include "conf/std" # MI standard options +include "arch/xen/conf/std.xenversion" + +options CPU_IN_CKSUM +options EXEC_ELF64 # exec ELF binaries +options EXEC_SCRIPT # exec #! scripts +options MTRR +options MULTIPROCESSOR + +options CHILD_MAX=1024 # 160 is too few +options OPEN_MAX=1024 # 128 is too few + +#options SELFRELOC + +options KGDB # remote debugger +options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600 +makeoptions DEBUG="-g" # compile full symbol table + +options CONSDEVNAME="\"com\"" +#options CONS_OVERRIDE + +mainbus0 at root +cpu* at mainbus? +ioapic* at mainbus? apid ? + +options INCLUDE_CONFIG_FILE # embed config file in kernel binary +maxusers 8 # estimated number of users + +options INSECURE # disable kernel security levels - X needs this + +options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT +#options NTP # NTP phase/frequency locked loop + +#options KTRACE # system call tracing via ktrace(1) + +#options CPU_UCODE # cpu ucode loading support + +# Note: SysV IPC parameters could be changed dynamically, see sysctl(8). +#options SYSVMSG # System V-like message queues +#options SYSVSEM # System V-like semaphores +#options SYSVSHM # System V-like memory sharing + +#options MODULAR # new style module(7) framework +#options MODULAR_DEFAULT_AUTOLOAD +#options USERCONF # userconf(4) support +options PIPE_SOCKETPAIR # smaller, but slower pipe(2) +#options SYSCTL_INCLUDE_DESCR # Include sysctl descriptions in kernel + +# CPU-related options +#options USER_LDT # User-settable LDT, used by Wine +#options SVS # Separate Virtual Space +#options PCPU_IDT # Per CPU IDTs + +# GCC Spectre variant 2 mitigation +#makeoptions SPECTRE_V2_GCC_MITIGATION=1 +#options SPECTRE_V2_GCC_MITIGATION + +# CPU features +#acpicpu* at cpu? # ACPI CPU (including frequency scaling) +#coretemp* at cpu? # Intel on-die thermal sensor +#est0 at cpu0 # Intel Enhanced SpeedStep (non-ACPI) +#hyperv0 at cpu0 # Microsoft Hyper-V +#odcm0 at cpu0 # On-demand clock modulation +#powernow0 at cpu0 # AMD PowerNow! and Cool'n'Quiet (non-ACPI) +#vmt0 at cpu0 # VMware Tools + +#Xen PV support for PVH and HVM guests +options XENPVHVM +options XEN +# Generic PVH support (qemu, firecracker...) +hypervisor* at mainbus? # Xen hypervisor +xenbus* at hypervisor? # Xen virtual bus +xencons* at hypervisor? # Xen virtual console +#xennet* at xenbus? # Xen virtual network interface +#xbd* at xenbus? # Xen virtual block device +# experimental: PVH dom0 support +#options DOM0OPS +#pseudo-device xenevt +#pseudo-device xvif +#pseudo-device xbdback + + +# Alternate buffer queue strategies for better responsiveness under high +# disk I/O load. +#options BUFQ_READPRIO +#options BUFQ_PRIOCSCAN + +# Diagnostic/debugging support options +#options DIAGNOSTIC # inexpensive kernel consistency checks + # XXX to be commented out on release branch +#options DEBUG # expensive debugging checks/support +#options LOCKDEBUG # expensive locking checks/support + +# +# Because gcc omits the frame pointer for any -O level, the line below +# is needed to make backtraces in DDB work. +# +makeoptions COPTS="-O2 -fno-omit-frame-pointer" +#options DDB # in-kernel debugger +#options DDB_COMMANDONENTER="bt" # execute command when ddb is entered +#options DDB_ONPANIC=1 # see also sysctl(7): `ddb.onpanic' +#options DDB_HISTORY_SIZE=512 # enable history editing in DDB +#options KGDB # remote debugger +#options KGDB_DEVNAME="\"com\"",KGDB_DEVADDR=0x3f8,KGDB_DEVRATE=9600 +#makeoptions DEBUG="-g" # compile full symbol table for CTF +#options DDB_COMMANDONENTER="trace;show registers" +#options SYSCALL_STATS # per syscall counts +#options SYSCALL_TIMES # per syscall times +#options SYSCALL_TIMES_HASCOUNTER # use 'broken' rdtsc (soekris) +#options KDTRACE_HOOKS # kernel DTrace hooks + +# Kernel Undefined Behavior Sanitizer (kUBSan). +#options KUBSAN # mandatory +#options UBSAN_ALWAYS_FATAL # optional: panic on all kUBSan reports + +# Kernel Address Sanitizer (kASan). You need to disable SVS to use it. +# The quarantine is optional and can help KASAN find more use-after-frees. +# Use KASAN_PANIC if you want panics instead of warnings. +#makeoptions KASAN=1 # mandatory +#options KASAN # mandatory +#no options SVS # mandatory +#options POOL_QUARANTINE # optional +#options KASAN_PANIC # optional + +# Kernel Concurrency Sanitizer (kCSan). +#makeoptions KCSAN=1 # mandatory +#options KCSAN # mandatory +#options KCSAN_PANIC # optional + +# Kernel Memory Sanitizer (kMSan). You need to disable SVS and kernel modules +# to use it. POOL_NOCACHE is optional and can help KMSAN find uninitialized +# memory in pool caches. Note that KMSAN requires at least 4GB of RAM. +#makeoptions KMSAN=1 # mandatory +#options KMSAN # mandatory +#no options SVS # mandatory +#no options MODULAR # mandatory +#no options MODULAR_DEFAULT_AUTOLOAD # mandatory +#options POOL_NOCACHE # optional +#options KMSAN_PANIC # optional + +# Kernel Code Coverage Driver. +#makeoptions KCOV=1 +#options KCOV + +# Fault Injection Driver. +#options FAULT + +# Heartbeat checks +#options HEARTBEAT +#options HEARTBEAT_MAX_PERIOD_DEFAULT=15 + +# Compatibility options +# x86_64 never shipped with a.out binaries; the two options below are +# only relevant to 32-bit i386 binaries +#options EXEC_AOUT # required by binaries from before 1.5 +#options COMPAT_NOMID # NetBSD 0.8, 386BSD, and BSDI + +# NetBSD backward compatibility. Support goes from COMPAT_15 up until +# the latest release. Note that really old compat (< COMPAT_16) is only +# useful for 32-bit i386 binaries. +#include "conf/compat_netbsd15.config" + +#options COMPAT_386BSD_MBRPART # recognize old partition ID + +#options COMPAT_NETBSD32 +#options EXEC_ELF32 + +# Wedge support +#options DKWEDGE_AUTODISCOVER # Automatically add dk(4) instances +#options DKWEDGE_METHOD_GPT # Supports GPT partitions as wedges +#options DKWEDGE_METHOD_BSDLABEL # Support disklabel entries as wedges +#options DKWEDGE_METHOD_MBR # Support MBR partitions as wedges +#options DKWEDGE_METHOD_APPLE # Support Apple partitions as wedges +#options DKWEDGE_METHOD_RDB # Support RDB partitions as wedges + +# File systems +#include "conf/filesystems.config" +file-system FFS +file-system EXT2FS +file-system KERNFS +file-system PROCFS + +# File system options +# ffs +#options FFS_EI # FFS Endian Independent support +options FFS_NO_SNAPSHOT # No FFS snapshot support +#options QUOTA # legacy UFS quotas +#options QUOTA2 # new, in-filesystem UFS quotas +#options UFS_ACL # UFS Access Control Lists +#options UFS_DIRHASH # UFS Large Directory Hashing +#options UFS_EXTATTR # Extended attribute support for UFS1 +options WAPBL # File system journaling support +# lfs +#options LFS_DIRHASH # LFS version of UFS_DIRHASH +# ext2fs +#options EXT2FS_SYSTEM_FLAGS # makes ext2fs file flags (append and + # immutable) behave as system flags. +# other +#options DISKLABEL_EI # disklabel Endian Independent support +#options NFSSERVER # Network File System server + +# Networking options +#options GATEWAY # packet forwarding +options INET # IP + ICMP + TCP + UDP +options INET6 # IPV6 +#options IPSEC # IP security +#options IPSEC_DEBUG # debug for IP security +#options MPLS # MultiProtocol Label Switching (needs mpls) +#options MROUTING # IP multicast routing +#options PIM # Protocol Independent Multicast +#options NETATALK # AppleTalk networking protocols +#options CAN # Controller Area Network protocol +#options PPP_BSDCOMP # BSD-Compress compression support for PPP +#options PPP_DEFLATE # Deflate compression support for PPP +#options PPP_FILTER # Active filter support for PPP (requires bpf) +#options TCP_DEBUG # Record last TCP_NDEBUG packets with SO_DEBUG +#options TCP_SIGNATURE # Enable RFC-2385 TCP md5 signatures + +#options ALTQ # Manipulate network interfaces' output queues +#options ALTQ_BLUE # Stochastic Fair Blue +#options ALTQ_CBQ # Class-Based Queueing +#options ALTQ_CDNR # Diffserv Traffic Conditioner +#options ALTQ_FIFOQ # First-In First-Out Queue +#options ALTQ_FLOWVALVE # RED/flow-valve (red-penalty-box) +#options ALTQ_HFSC # Hierarchical Fair Service Curve +#options ALTQ_LOCALQ # Local queueing discipline +#options ALTQ_PRIQ # Priority Queueing +#options ALTQ_RED # Random Early Detection +#options ALTQ_RIO # RED with IN/OUT +#options ALTQ_WFQ # Weighted Fair Queueing + +# These options enable verbose messages for several subsystems. +# Warning, these may compile large string tables into the kernel! +#options ACPIVERBOSE # verbose ACPI configuration messages +#options MIIVERBOSE # verbose PHY autoconfig messages +#options PCIVERBOSE # verbose PCI device autoconfig messages +#options PCI_CONFIG_DUMP # verbosely dump PCI config space +#options PCMCIAVERBOSE # verbose PCMCIA configuration messages +#options SCSIVERBOSE # human readable SCSI error messages +#options USBVERBOSE # verbose USB device autoconfig messages +#options HDAUDIOVERBOSE # human readable HDAUDIO device names + +#options NFS_BOOT_DHCP,NFS_BOOT_BOOTPARAM + +# +# wscons options +# +# builtin terminal emulations +#options WSEMUL_VT100 # VT100 / VT220 emulation +#options WSEMUL_SUN # sun terminal emulation +#options WSEMUL_DEFAULT="\"vt100\"" # NB: default is "sun" if enabled +# different kernel output - see dev/wscons/wsdisplayvar.h +#options WSDISPLAY_CUSTOM_OUTPUT # color customization from wsconsctl(8) +#options WS_KERNEL_FG=WSCOL_GREEN +#options WS_KERNEL_BG=WSCOL_BLACK +# customization of console border color +#options WSDISPLAY_CUSTOM_BORDER # custom border colors via wsconsctl(8) +# compatibility to other console drivers +#options WSDISPLAY_COMPAT_PCVT # emulate some ioctls +#options WSDISPLAY_COMPAT_SYSCONS # emulate some ioctls +#options WSDISPLAY_COMPAT_USL # wsconscfg VT handling +#options WSDISPLAY_COMPAT_RAWKBD # can get raw scancodes +# don't attach pckbd as the console if no PS/2 keyboard is found +#options PCKBD_CNATTACH_MAY_FAIL +# see dev/pckbport/wskbdmap_mfii.c for implemented layouts +#options PCKBD_LAYOUT="(KB_DE | KB_NODEAD)" +# allocate a number of virtual screens at autoconfiguration time +#options WSDISPLAY_DEFAULTSCREENS=4 +# use a large software cursor that doesn't blink +#options PCDISPLAY_SOFTCURSOR +# modify the screen type of the console; defaults to "80x25" +#options VGA_CONSOLE_SCREENTYPE="\"80x24\"" +# work around a hardware bug that loaded fonts don't work; found on ATI cards +#options VGA_CONSOLE_ATI_BROKEN_FONTSEL +# console scrolling support. +#options WSDISPLAY_SCROLLSUPPORT +# enable VGA raster mode capable of displaying multilingual text on console +#options VGA_RASTERCONSOLE +# enable splash screen support; requires genfb or radeonfb +#options SPLASHSCREEN + +# Kernel root file system and dump configuration. +config netbsd root on ? type ? +#config netbsd root on sd0a type ffs +#config netbsd root on ? type nfs + +# +# Device configuration +# + +# IPMI support +#ipmi0 at mainbus? +#ipmi_acpi* at acpi? +#ipmi0 at ipmi_acpi? + +# ACPI will be used if present. If not it will fall back to MPBIOS +#acpi0 at mainbus0 +#options ACPI_SCANPCI # find PCI roots using ACPI +options MPBIOS # configure CPUs and APICs using MPBIOS +options MPBIOS_SCANPCI # MPBIOS configures PCI roots +#options PCI_INTR_FIXUP # fixup PCI interrupt routing via ACPI +#options PCI_BUS_FIXUP # fixup PCI bus numbering +#options PCI_ADDR_FIXUP # fixup PCI I/O addresses +#options ACPI_ACTIVATE_DEV # If set, activate inactive devices +#options VGA_POST # in-kernel support for VGA POST + +# ACPI devices +#acpiacad* at acpi? # ACPI AC Adapter +#acpibat* at acpi? # ACPI Battery +#acpibut* at acpi? # ACPI Button +#acpidalb* at acpi? # ACPI Direct Application Launch Button +#acpiec* at acpi? # ACPI Embedded Controller (late) +#acpiecdt* at acpi? # ACPI Embedded Controller (early) +#acpifan* at acpi? # ACPI Fan +#acpilid* at acpi? # ACPI Lid Switch +#acpipmtr* at acpi? # ACPI Power Meter (experimental) +#acpismbus* at acpi? # ACPI SMBus CMI (experimental) +#acpitz* at acpi? # ACPI Thermal Zone +#acpivga* at acpi? # ACPI Display Adapter +#acpiout* at acpivga? # ACPI Display Output Device +#acpiwdrt* at acpi? # ACPI Watchdog Resource Table +#acpiwmi* at acpi? # ACPI WMI Mapper + +# Mainboard devices +#aibs* at acpi? # ASUSTeK AI Booster hardware monitor +#asus* at acpi? # ASUS hotkeys +#attimer* at acpi? # AT Timer +#com0 at acpi? # Serial communications interface +#com1 at acpi? # Serial communications interface +#com* at acpi? # Serial communications interface +#fdc* at acpi? # Floppy disk controller +#fd* at fdc? drive ? # the drives themselves +#fujbp* at acpi? # Fujitsu Brightness & Pointer +#fujhk* at acpi? # Fujitsu Hotkeys +##hpacel* at acpi? # HP 3D DriveGuard accelerometer +##hpqlb* at acpi? # HP Quick Launch Buttons +#hpet* at acpihpetbus? # High Precision Event Timer (table) +#hpet* at acpinodebus? # High Precision Event Timer (device) +#joy* at acpi? # Joystick/Game port +#lpt0 at acpi? # Parallel port +#lpt1 at acpi? # Parallel port +#lpt* at acpi? # Parallel port +#mpu* at acpi? # Roland MPU-401 MIDI UART +#pckbc* at acpi? # PC keyboard controller +#pcppi* at acpi? # AT-style speaker sound +#qemufwcfg* at acpi? # QEMU Firmware Configuration device +#sdhc* at acpi? # SD Host Controller +#sony* at acpi? # Sony Notebook Controller +#spic* at acpi? # Sony Programmable I/O Controller +#wsmouse* at spic? # mouse +#thinkpad* at acpi? # IBM/Lenovo Thinkpad hotkeys +#tpm* at acpi? # ACPI TPM (Experimental) +#ug* at acpi? # Abit uGuru Hardware monitor +#valz* at acpi? # Toshiba Dynabook hotkeys +#wb* at acpi? # Winbond W83L518D SD/MMC reader +#sdmmc* at wb? # SD/MMC bus +#wmidell* at acpiwmibus? # Dell WMI mappings +#wmieeepc* at acpiwmibus? # Asus Eee PC WMI mappings +#wmihp* at acpiwmibus? # HP WMI mappings +#wmimsi* at acpiwmibus? # MSI WMI mappings + +# Basic Bus Support + +# PCI bus support +pci* at mainbus? bus ? +#pci* at pchb? bus ? +#pci* at ppb? bus ? + +# PCI bridges +#pchb* at pci? dev ? function ? # PCI-Host bridges +#options AGP_X86 +#pcib* at pci? dev ? function ? # PCI-ISA bridges +#ppb* at pci? dev ? function ? # PCI-PCI bridges +## XXX 'puc's aren't really bridges, but there's no better place for them here +#puc* at pci? dev ? function ? # PCI "universal" comm. cards +# +#amdpcib* at pci? dev ? function ? # AMD 8111 PCI-ISA w/ HPET +#hpet* at amdpcib? +# +#pwdog* at pci? dev ? function ? # QUANCOM PWDOG1 +# +#ichlpcib* at pci? dev ? function ? # Intel ICH PCI-LPC w/ timecounter, +# # watchdog, gpio, Speedstep and HPET +#fwhrng* at ichlpcib? # Intel 82802 FWH Random Number Generator +##hpet* at ichlpcib? +#tco* at tcoichbus? # TCO watch dog timer +# +#aapic* at pci? dev ? function ? # AMD 8131 IO apic +# +#agp* at pchb? + +# ISA bus support +isa0 at mainbus? +#isa0 at pcib? +#isa0 at amdpcib? +#isa0 at ichlpcib? + +# CardBus bridge support +#cbb* at pci? dev ? function ? +#cardslot* at cbb? +# +## CardBus bus support +#cardbus* at cardslot? +#pcmcia* at cardslot? +# +## Console Devices +# +## wscons +#pckbc0 at isa? # pc keyboard controller +#pckbd* at pckbc? # PC keyboard +#pms* at pckbc? # PS/2 mouse for wsmouse +##options PMS_DISABLE_POWERHOOK # Disable PS/2 reset on resume +#options PMS_SYNAPTICS_TOUCHPAD # Enable support for Synaptics Touchpads +#options PMS_ELANTECH_TOUCHPAD # Enable support for Elantech Touchpads +#options PMS_ALPS_TOUCHPAD # Enable support for Alps Touchpads +#vga* at pci? dev ? function ? +#genfb* at pci? dev ? function ? +#options VCONS_DRAW_INTR +#wsdisplay* at vga? console ? +#wsdisplay* at wsemuldisplaydev? +#wskbd* at pckbd? console ? +#wsmouse* at pms? mux 0 +#wsmouse* at wsmousedev? +# +#attimer0 at isa? +#pcppi0 at isa? +#sysbeep0 at pcppi? +# +## DRI legacy drivers +##i915drm* at drm? # Intel i915, i945 DRM driver +##mach64drm* at drm? # mach64 (3D Rage Pro, Rage) DRM driver +##mgadrm* at drm? # Matrox G[24]00, G[45]50 DRM driver +##r128drm* at drm? # ATI Rage 128 DRM driver +##radeondrm* at drm? # ATI Radeon DRM driver +##savagedrm* at drm? # S3 Savage DRM driver +##sisdrm* at drm? # SiS DRM driver +##tdfxdrm* at drm? # 3dfx (voodoo) DRM driver +# +## DRMKMS drivers +#i915drmkms* at pci? dev ? function ? +#intelfb* at intelfbbus? +# +#radeon* at pci? dev ? function ? +#radeondrmkmsfb* at radeonfbbus? +# +##amdgpu* at pci? dev ? function ? +##amdgpufb* at amdgpufbbus? +# +#nouveau* at pci? dev ? function ? +#nouveaufb* at nouveaufbbus? +# +## DRMUMS drivers -- make them loadable, but not statically linked in +#options DRM_LEGACY +##viadrmums* at drm? +# +##options DRM_MAX_RESOLUTION_HORIZONTAL=1920 # Limit DRM size in horizontal dimension +##options DRM_MAX_RESOLUTION_VERTICAL=1080 # Limit DRM size in vertical dimension +# +## Cryptographic Devices +# +## PCI cryptographic devices +#amdccp* at pci? dev ? function ? # AMD Cryptographic Coprocessor +#hifn* at pci? dev ? function ? # Hifn 7755/7811/795x +##qat* at pci? dev ? function ? # Intel QuickAssist +#ubsec* at pci? dev ? function ? # Broadcom 5501/5601/580x/582x +# +## Trusted Platform Module +#tpm* at isa? iomem 0xfed40000 irq 7 +# +## Serial Devices +# +## PCI serial interfaces +#com* at puc? port ? # 16x50s on "universal" comm boards +#cy* at pci? dev ? function ? # Cyclades Cyclom-Y serial boards +#cz* at pci? dev ? function ? # Cyclades-Z multi-port serial boards +# +## PCMCIA serial interfaces +#com* at pcmcia? function ? # Modems and serial cards +# +#pcmcom* at pcmcia? function ? # PCMCIA multi-port serial cards +#com* at pcmcom? slave ? # ...and the slave devices +# +## CardBus serial interfaces +#com* at cardbus? function ? # Modems and serial cards + +# ISA serial interfaces +#options COM_HAYESP # adds Hayes ESP serial board support +com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports +#com1 at isa? port 0x2f8 irq 3 + +# Parallel Printer Interfaces + +# PCI parallel printer interfaces +#lpt* at puc? port ? # || ports on "universal" comm boards +# +## ISA parallel printer interfaces +#lpt0 at isa? port 0x378 irq 7 # standard PC parallel ports +#lpt1 at isa? port 0x278 +# +## Hardware monitors +# +#amdnb_misc* at pci? # AMD NB Misc Configuration +#amdtemp* at amdnb_misc? # AMD CPU Temperature sensors +# +#amdsmn* at pci? # AMD SMN Configuration +#amdzentemp* at amdsmnbus? # AMD Ryzen Family 17h CPU temp sensors +# +## Winbond LPC Super I/O +##wbsio* at isa? port 0x2e +##wbsio* at isa? port 0x4e +# +## IBM Hawk Integrated Systems Management Processor +##ibmhawk0 at iic? addr 0x37 +# +## LM7[89] and compatible hardware monitors +## Use flags to select temp sensor type (see lm(4) man page for details) +##lm0 at isa? port 0x290 flags 0x0 # other common ports: 0x280, 0x310 +##lm* at wbsio? +# +## SMSC LPC47B397 hardware monitor functions +##smsc0 at isa? port 0x02e +# +## SMSC LPC47M192 hardware monitor +##smscmon* at iic? addr 0x2c +##smscmon* at iic? addr 0x2d # (alternate address) +# +## AMD 768 and 8111 power/ACPI controllers +#amdpm* at pci? dev ? function ? # RNG and SMBus 1.0 interface +##iic* at amdpm? # sensors below are on this bus +# +## NVIDIA nForce2/3/4 SMBus controller +#nfsmbc* at pci? dev ? function ? +#nfsmb* at nfsmbc? +#iic* at nfsmb? +# +## Intel PIIX4 power management controllers +#piixpm* at pci? dev ? function ? # PIIX4 compatible PM controller +#iic* at piixpm? # SMBus on PIIX4 +# +## Intel ICH SMBus controller +#ichsmb* at pci? dev ? function ? +#iic* at ichsmb? +# +## Intel S1200,C2000 (non-pch) SMBus controller +#ismt* at pci? dev ? function ? +#iic* at ismt? +# +## DesignWare I2C controller as found in some Intel PCH and AMD FCH devices. +#dwiic* at acpi? # DesignWare I2C controller +#dwiic* at pci? # DesignWare I2C controller +#iic* at dwiic? +# +## Thermal monitor and fan controller +##dbcool* at iic? addr 0x2C # Unknown other motherboard(s) +##dbcool* at iic? addr 0x2D # Tyan S2881 +##dbcool* at iic? addr 0x2E # Tyan S2882-D +# +## IBM Thinkpad Active Protection System +##aps0 at isa? port 0x1600 +# +## Fintek Super I/O with hardware monitor +##finsio0 at isa? port 0x4e +# +## iTE IT87xxF Super I/O with watchdog and sensors support +##itesio0 at isa? port 0x2e +# +## Abit uGuru Hardware system monitor +##ug0 at isa? port 0xe0 +# +## Serial Presence Detect capable memory modules +##spdmem* at iic? addr 0x50 +##spdmem* at iic? addr 0x51 +##spdmem* at iic? addr 0x52 +##spdmem* at iic? addr 0x53 +##spdmem* at iic? addr 0x54 +##spdmem* at iic? addr 0x55 +##spdmem* at iic? addr 0x56 +##spdmem* at iic? addr 0x57 +##sdtemp* at iic? addr 0x18 +##sdtemp* at iic? addr 0x19 +##sdtemp* at iic? addr 0x1a +##sdtemp* at iic? addr 0x1b +##sdtemp* at iic? addr 0x1c +##sdtemp* at iic? addr 0x1d +##sdtemp* at iic? addr 0x1e +##sdtemp* at iic? addr 0x1f +# +## Intel GPIO +#igpio* at acpi? +# +## I2C HID devices +#ihidev* at iic? +# +## I2C Mice +#ims* at ihidev? reportid ? +#wsmouse* at ims? mux 0 +# +## I2O devices +#iop* at pci? dev ? function ? # I/O processor +#iopsp* at iop? tid ? # SCSI/FC-AL ports +#ld* at iop? tid ? # block devices +## XXX dpti.c wants a processor type that is not assigned for x86-64 +##dpti* at iop? tid 0 # DPT/Adaptec control interface +# +## GPIO devices +#gpio* at gpiobus? +# +## 1- Wire support +##gpioow* at gpio? offset ? mask ? # 1-wire bitbanging via gpio +#gpioow* at gpio? +#onewire* at gpioow? +# +## 1-Wire devices +#owtemp* at onewire? # Temperature sensors +# +## I2C support +##gpioiic* at gpio? +##iic* at gpioiic? +# +## Keylock support +##gpiolock* at gpio? +# +## Pulsing GPIO pins in software +##gpiopwm* at gpio? +# +## Soekris 6501 GPIO/LED driver (provides gpiobus, needs gpio) +##soekrisgpio0 at isa? port 0x680 +# +## Nuvoton NCT5104D SuperIO providing GPIO +#nct0 at isa? port ? +# +## SCSI Controllers and Devices +# +## PCI SCSI controllers +#adv* at pci? dev ? function ? # AdvanSys 1200[A,B], 9xx[U,UA] SCSI +#adw* at pci? dev ? function ? # AdvanSys 9x0UW[D], 3940U[2,3]W SCSI +#ahc* at pci? dev ? function ? # Adaptec [23]94x, aic78x0 SCSI +#ahd* at pci? dev ? function ? # Adaptec aic790x SCSI +#bha* at pci? dev ? function ? # BusLogic 9xx SCSI +#dpt* at pci? dev ? function ? # DPT SmartCache/SmartRAID +#iha* at pci? dev ? function ? # Initio INIC-940/950 SCSI +#isp* at pci? dev ? function ? # Qlogic ISP [12]0x0 SCSI/FibreChannel +#mfi* at pci? dev ? function ? # LSI MegaRAID SAS +#mfii* at pci? dev ? function ? # LSI MegaRAID SAS (Fusion and newer) +#mly* at pci? dev ? function ? # Mylex AcceleRAID and eXtremeRAID +#mpt* at pci? dev ? function ? # LSILogic 9x9 and 53c1030 (Fusion-MPT) +#mpii* at pci? dev ? function ? # LSI Logic Fusion-MPT II +#njs* at pci? dev ? function ? # Workbit NinjaSCSI-32 +#pcscp* at pci? dev ? function ? # AMD 53c974 PCscsi-PCI SCSI +#siop* at pci? dev ? function ? # Symbios 53c8xx SCSI +#esiop* at pci? dev ? function ? # Symbios 53c875 and newer SCSI +##options SIOP_SYMLED # drive the act. LED in software +#trm* at pci? dev ? function ? # Tekram DC-395U/UW/F, DC-315/U SCSI +# +## PCMCIA SCSI controllers +#aic* at pcmcia? function ? # Adaptec APA-1460 SCSI +#esp* at pcmcia? function ? # Qlogic ESP406/FAS408 SCSI +#spc* at pcmcia? function ? # Fujitsu MB87030/MB89352 SCSI +# +## CardBus SCSI cards +#adv* at cardbus? function ? # AdvanSys 1200[A,B], 9xx[U,UA] SCSI +#ahc* at cardbus? function ? # Adaptec ADP-1480 +#njs* at cardbus? function ? # Workbit NinjaSCSI-32 +# +## SCSI bus support +scsibus* at scsi? +# +## SCSI devices +#sd* at scsibus? target ? lun ? # SCSI disk drives +#st* at scsibus? target ? lun ? # SCSI tape drives +#cd* at scsibus? target ? lun ? # SCSI CD-ROM drives +#ch* at scsibus? target ? lun ? # SCSI autochangers +#ses* at scsibus? target ? lun ? # SCSI Enclosure Services devices +#ss* at scsibus? target ? lun ? # SCSI scanners +#uk* at scsibus? target ? lun ? # SCSI unknown +# +# +## RAID controllers and devices +#aac* at pci? dev ? function ? # Adaptec AAC family +#amr* at pci? dev ? function ? # AMI/LSI Logic MegaRAID +#arcmsr* at pci? dev ? function ? # Areca SATA RAID controllers +#cac* at pci? dev ? function ? # Compaq PCI array controllers +#ciss* at pci? dev ? function ? # HP Smart Array controllers +#icp* at pci? dev ? function ? # ICP-Vortex GDT & Intel RAID +#mlx* at pci? dev ? function ? # Mylex DAC960 & DEC SWXCR family +#twe* at pci? dev ? function ? # 3ware Escalade RAID controllers +#twa* at pci? dev ? function ? # 3ware Escalade 9xxx RAID controllers +# +#ld* at aac? unit ? +#ld* at amr? unit ? +#ld* at cac? unit ? +#ld* at icp? unit ? +#ld* at twe? unit ? +#ld* at twa? unit ? +#ld* at mlx? unit ? +# +#icpsp* at icp? unit ? # SCSI pass-through +# +## IDE and related devices +## PCI IDE controllers - see pciide(4) for supported hardware. +## The 0x0001 flag force the driver to use DMA, even if the driver doesn't know +## how to set up DMA modes for this chip. This may work, or may cause +## a machine hang with some controllers. +#pciide* at pci? dev ? function ? flags 0x0000 # GENERIC pciide driver +#acardide* at pci? dev ? function ? # Acard IDE controllers +#aceride* at pci? dev ? function ? # Acer Lab IDE controllers +#ahcisata* at pci? dev ? function ? # AHCI SATA controllers +#artsata* at pci? dev ? function ? # Intel i31244 SATA controller +#cmdide* at pci? dev ? function ? # CMD tech IDE controllers +#cypide* at pci? dev ? function ? # Cypress IDE controllers +#hptide* at pci? dev ? function ? # Triones/HighPoint IDE controllers +#iteide* at pci? dev ? function ? # IT Express IDE controllers +#ixpide* at pci? dev ? function ? # ATI IXP IDE controllers +#jmide* at pci? dev ? function ? # JMicron PCI-e PATA/SATA controllers +#ahcisata* at jmide? +#mvsata* at pci? dev ? function ? # Marvell Hercules-I/II +#optiide* at pci? dev ? function ? # Opti IDE controllers +#piixide* at pci? dev ? function ? # Intel IDE controllers +#pdcide* at pci? dev ? function ? # Promise IDE controllers +#pdcsata* at pci? dev ? function ? # Promise SATA150 controllers +#satalink* at pci? dev ? function ? # SiI SATALink controllers +#siisata* at pci? dev ? function ? # SiI SteelVine controllers +#siside* at pci? dev ? function ? # SiS IDE controllers +#slide* at pci? dev ? function ? # Symphony Labs IDE controllers +#svwsata* at pci? dev ? function ? # ServerWorks SATA controllers +#toshide* at pci? dev ? function ? # TOSHIBA PICCOLO controllers +#viaide* at pci? dev ? function ? # VIA/AMD/Nvidia IDE controllers +# +## PCMCIA IDE controllers +#wdc* at pcmcia? function ? +# +## CardBus IDE controllers +#njata* at cardbus? function ? flags 0x01 # Workbit NinjaATA-32 +#siisata* at cardbus? function ? # SiI SteelVine controllers +# +## ISA ST506, ESDI, and IDE controllers +## Use flags 0x01 if you want to try to use 32bits data I/O (the driver will +## fall back to 16bits I/O if 32bits I/O are not functional). +## Some controllers pass the initial 32bit test, but will fail later. +#wdc0 at isa? port 0x1f0 irq 14 flags 0x00 +#wdc1 at isa? port 0x170 irq 15 flags 0x00 +# +## ATA (IDE) bus support +#atabus* at ata? +#options ATADEBUG +# +## IDE drives +## Flags are used only with controllers that support DMA operations +## and mode settings (e.g. some pciide controllers) +## The lowest order four bits (rightmost digit) of the flags define the PIO +## mode to use, the next set of four bits the DMA mode and the third set the +## UltraDMA mode. For each set of four bits, the 3 lower bits define the mode +## to use, and the last bit must be 1 for this setting to be used. +## For DMA and UDMA, 0xf (1111) means 'disable'. +## 0x0fac means 'use PIO mode 4, DMA mode 2, disable UltraDMA'. +## (0xc=1100, 0xa=1010, 0xf=1111) +## 0x0000 means "use whatever the drive claims to support". +#wd* at atabus? drive ? flags 0x0000 +# +## ATAPI bus support +#atapibus* at atapi? +# +# +## ATA RAID configuration support, as found on some Promise controllers. +#pseudo-device ataraid +#ld* at ataraid? vendtype ? unit ? +# +## ATAPI devices +## flags have the same meaning as for IDE drives. +#cd* at atapibus? drive ? flags 0x0000 # ATAPI CD-ROM drives +#sd* at atapibus? drive ? flags 0x0000 # ATAPI disk drives +#st* at atapibus? drive ? flags 0x0000 # ATAPI tape drives +#uk* at atapibus? drive ? flags 0x0000 # ATAPI unknown +# +# +## NVM Express controllers and devices +#nvme* at pci? dev ? function ? +#ld* at nvme? nsid ? +# +# +## Miscellaneous mass storage devices +# +## ISA floppy +##fdc0 at isa? port 0x3f0 irq 6 drq 2 # standard PC floppy controllers +##fdc1 at isa? port 0x370 irq ? drq ? +# +## Network Interfaces +# +## PCI network interfaces +#age* at pci? dev ? function ? # Attansic/Atheros L1 Gigabit Ethernet +#alc* at pci? dev ? function ? # Attansic/Atheros L1C/L2C Ethernet +#ale* at pci? dev ? function ? # Attansic/Atheros L1E Ethernet +#an* at pci? dev ? function ? # Aironet PC4500/PC4800 (802.11) +#aq* at pci? dev ? function ? # Aquantia AQC 10 gigabit +#ath* at pci? dev ? function ? # Atheros 5210/5211/5212 802.11 +#athn* at pci? dev ? function ? # Atheros AR9k (802.11a/g/n) +#atw* at pci? dev ? function ? # ADMtek ADM8211 (802.11) +#bce* at pci? dev ? function ? # Broadcom 440x 10/100 Ethernet +#bge* at pci? dev ? function ? # Broadcom 570x gigabit Ethernet +#bnx* at pci? dev ? function ? # Broadcom NetXtremeII gigabit Ethernet +#bwi* at pci? dev ? function ? # Broadcom BCM43xx wireless +#bwfm* at pci? dev ? function ? # Broadcom FullMAC +#cas* at pci? dev ? function ? # Sun Cassini/Cassini+ Ethernet +#dge* at pci? dev ? function ? # Intel 82597 10GbE LR +#ena* at pci? dev ? function ? # Amazon.com Elastic Network Adapter +#ep* at pci? dev ? function ? # 3Com 3c59x +#epic* at pci? dev ? function ? # SMC EPIC/100 Ethernet +##eqos* at pci? dev ? function ? # DesignWare Ethernet QoS +#et* at pci? dev ? function ? # Agere/LSI ET1310/ET1301 Gigabit +#ex* at pci? dev ? function ? # 3Com 90x[BC] +#fxp* at pci? dev ? function ? # Intel EtherExpress PRO 10+/100B +#gem* at pci? dev ? function ? # Apple GMAC and Sun ERI gigabit enet +#gsip* at pci? dev ? function ? # NS83820 Gigabit Ethernet +#hme* at pci? dev ? function ? # Sun Microelectronics STP2002-STQ +#iavf* at pci? dev ? function ? # Intel Adaptive Virtual Function +#igc* at pci? dev ? function ? # Intel I225 2.5 gigabit +#ipw* at pci? dev ? function ? # Intel PRO/Wireless 2100 +#iwi* at pci? dev ? function ? # Intel PRO/Wireless 2200BG +#iwm* at pci? dev ? function ? # Intel Centrino 7260 +#iwn* at pci? dev ? function ? # Intel PRO/Wireless 4965AGN +#ixg* at pci? dev ? function ? # Intel 8259x 10 gigabit +#ixl* at pci? dev ? function ? # Intel Ethernet 700 Series +#ixv* at pci? dev ? function ? # Intel 8259x 10G virtual function +#jme* at pci? dev ? function ? # JMicron JMC2[56]0 ethernet +#kse* at pci? dev ? function ? # Micrel KSZ8841/8842 ethernet +#lii* at pci? dev ? function ? # Atheros L2 Fast-Ethernet +#malo* at pci? dev ? function ? # Marvell Libertas Wireless +#mcx* at pci? dev ? function ? # Mellanox 5th generation Ethernet +#mskc* at pci? dev ? function ? # Marvell Yukon 2 Gigabit Ethernet +#msk* at mskc? # Marvell Yukon 2 Gigabit Ethernet +#mtd* at pci? dev ? function ? # Myson MTD803 3-in-1 Ethernet +#ne* at pci? dev ? function ? # NE2000-compatible Ethernet +#nfe* at pci? dev ? function ? # NVIDIA nForce Ethernet +#ntwoc* at pci? dev ? function ? # Riscom/N2 PCI Sync Serial +#pcn* at pci? dev ? function ? # AMD PCnet-PCI Ethernet +#ral* at pci? dev ? function ? # Ralink Technology RT25x0 802.11a/b/g +#re* at pci? dev ? function ? # Realtek 8139C+/8169/8169S/8110S +#rge* at pci? dev ? function ? # Realtek 8125 +#rtk* at pci? dev ? function ? # Realtek 8129/8139 +#rtw* at pci? dev ? function ? # Realtek 8180L (802.11) +#rtwn* at pci? dev ? function ? # Realtek 8188CE/8192CE 802.11b/g/n +#sf* at pci? dev ? function ? # Adaptec AIC-6915 Ethernet +#sip* at pci? dev ? function ? # SiS 900/DP83815 Ethernet +#skc* at pci? dev ? function ? # SysKonnect SK9821 Gigabit Ethernet +#sk* at skc? # SysKonnect SK9821 Gigabit Ethernet +#ste* at pci? dev ? function ? # Sundance ST-201 Ethernet +#stge* at pci? dev ? function ? # Sundance/Tamarack TC9021 Gigabit +#ti* at pci? dev ? function ? # Alteon ACEnic gigabit Ethernet +#tl* at pci? dev ? function ? # ThunderLAN-based Ethernet +#tlp* at pci? dev ? function ? # DECchip 21x4x and clones +#txp* at pci? dev ? function ? # 3com 3cr990 +#vge* at pci? dev ? function ? # VIATech VT612X Gigabit Ethernet +#vmx* at pci? dev ? function ? # VMware VMXNET3 +#vr* at pci? dev ? function ? # VIA Rhine Fast Ethernet +#wi* at pci? dev ? function ? # Intersil Prism Mini-PCI (802.11b) +#wm* at pci? dev ? function ? # Intel 82543/82544 gigabit +#wpi* at pci? dev ? function ? # Intel PRO/Wireless 3945ABG +#xge* at pci? dev ? function ? # Neterion (S2io) Xframe-I 10GbE +# +## PCMCIA network interfaces +#an* at pcmcia? function ? # Aironet PC4500/PC4800 (802.11) +#awi* at pcmcia? function ? # BayStack 650/660 (802.11FH/DS) +#cnw* at pcmcia? function ? # Xircom/Netwave AirSurfer +#cs* at pcmcia? function ? # CS89xx Ethernet +#ep* at pcmcia? function ? # 3Com 3c589 and 3c562 Ethernet +#malo* at pcmcia? function ? # Marvell Libertas +#mbe* at pcmcia? function ? # MB8696x based Ethernet +#ne* at pcmcia? function ? # NE2000-compatible Ethernet +#ray* at pcmcia? function ? # Raytheon Raylink (802.11) +#sm* at pcmcia? function ? # Megahertz Ethernet +#wi* at pcmcia? function ? # Lucent/Intersil WaveLan IEEE (802.11) +#xirc* at pcmcia? function ? # Xircom CreditCard Ethernet +#com* at xirc? +#xi* at xirc? +# +#mhzc* at pcmcia? function ? # Megahertz Ethernet/Modem combo cards +#com* at mhzc? +#sm* at mhzc? +# +## CardBus network cards +#ath* at cardbus? function ? # Atheros 5210/5211/5212 802.11 +#athn* at cardbus? function ? # Atheros AR9k (802.11a/g/n) - UNTESTED +#atw* at cardbus? function ? # ADMtek ADM8211 (802.11) +#ex* at cardbus? function ? # 3Com 3C575TX +#fxp* at cardbus? function ? # Intel i8255x +#malo* at cardbus? function ? # Marvell Libertas Wireless +#ral* at cardbus? function ? # Ralink Technology RT25x0 802.11a/b/g +#re* at cardbus? function ? # Realtek 8139C+/8169/8169S/8110S +#rtk* at cardbus? function ? # Realtek 8129/8139 +#rtw* at cardbus? function ? # Realtek 8180L (802.11) +#tlp* at cardbus? function ? # DECchip 21143 +# +## MII/PHY support +#acphy* at mii? phy ? # DAltima AC101 and AMD Am79c874 PHYs +#amhphy* at mii? phy ? # AMD 79c901 Ethernet PHYs +#atphy* at mii? phy ? # Attansic/Atheros PHYs +#bmtphy* at mii? phy ? # Broadcom BCM5201 and BCM5202 PHYs +#brgphy* at mii? phy ? # Broadcom BCM5400-family PHYs +#ciphy* at mii? phy ? # Cicada CS8201 Gig-E PHYs +#dmphy* at mii? phy ? # Davicom DM9101 PHYs +#etphy* at mii? phy ? # Agere/LSI ET1011 TruePHY Gig-E PHYs +#exphy* at mii? phy ? # 3Com internal PHYs +#gentbi* at mii? phy ? # Generic Ten-Bit 1000BASE-[CLS]X PHYs +#glxtphy* at mii? phy ? # Level One LXT-1000 PHYs +#gphyter* at mii? phy ? # NS83861 Gig-E PHY +#icsphy* at mii? phy ? # Integrated Circuit Systems ICS189x +#igphy* at mii? phy ? # Intel IGP01E1000 +#ihphy* at mii? phy ? # Intel 82577 PHYs +#ikphy* at mii? phy ? # Intel 82563 PHYs +#inphy* at mii? phy ? # Intel 82555 PHYs +#iophy* at mii? phy ? # Intel 82553 PHYs +#ipgphy* at mii? phy ? # IC PLUS IP1000A/IP1001 PHYs +#jmphy* at mii? phy ? # Jmicron JMP202/211 PHYs +#lxtphy* at mii? phy ? # Level One LXT-970 PHYs +#makphy* at mii? phy ? # Marvell Semiconductor 88E1000 PHYs +#micphy* at mii? phy ? # Micrel KSZ[89]xxx PHYs +#nsphy* at mii? phy ? # NS83840 PHYs +#nsphyter* at mii? phy ? # NS83843 PHYs +#pnaphy* at mii? phy ? # generic HomePNA PHYs +#qsphy* at mii? phy ? # Quality Semiconductor QS6612 PHYs +#rgephy* at mii? phy ? # Realtek 8169S/8110 internal PHYs +#rlphy* at mii? phy ? # Realtek 8139/8201L PHYs +#smscphy* at mii? phy ? # SMSC LAN87xx PHYs +#sqphy* at mii? phy ? # Seeq 80220/80221/80223 PHYs +#tlphy* at mii? phy ? # ThunderLAN PHYs +#tqphy* at mii? phy ? # TDK Semiconductor PHYs +#ukphy* at mii? phy ? # generic unknown PHYs +#urlphy* at mii? phy ? # Realtek RTL8150L internal PHYs +# +# +## USB Controller and Devices +# +## Virtual USB controller +##pseudo-device vhci +# +## PCI USB controllers +#xhci* at pci? dev ? function ? # eXtensible Host Controller +#ehci* at pci? dev ? function ? # Enhanced Host Controller +#ohci* at pci? dev ? function ? # Open Host Controller +#uhci* at pci? dev ? function ? # Universal Host Controller (Intel) +# +## CardBus USB controllers +#ehci* at cardbus? function ? # Enhanced Host Controller +#ohci* at cardbus? function ? # Open Host Controller +#uhci* at cardbus? function ? # Universal Host Controller (Intel) +# +## ISA USB controllers +##slhci0 at isa? port 0x300 irq 5 # ScanLogic SL811HS +# +## PCMCIA USB controllers +#slhci* at pcmcia? function ? # ScanLogic SL811HS +# +## USB bus support +##usb* at vhci? +#usb* at xhci? +#usb* at ehci? +#usb* at ohci? +#usb* at uhci? +#usb* at slhci? +# +#include "dev/usb/usbdevices.config" +# +## PCI IEEE1394 controllers +#fwohci* at pci? dev ? function ? # IEEE1394 Open Host Controller +# +## CardBus IEEE1394 controllers +#fwohci* at cardbus? function ? # IEEE1394 Open Host Controller +# +#ieee1394if* at fwohci? +#fwip* at ieee1394if? # IP over IEEE1394 +#sbp* at ieee1394if? euihi ? euilo ? +# +## Audio Devices +# +## PCI audio devices +#auacer* at pci? dev ? function ? # ALi M5455 integrated AC'97 Audio +#auich* at pci? dev ? function ? # Intel/AMD/nVidia AC'97 Audio +#auixp* at pci? dev ? function ? # ATI IXP AC'97 Audio +#autri* at pci? dev ? function ? # Trident 4DWAVE based AC'97 Audio +#auvia* at pci? dev ? function ? # VIA AC'97 audio +#clcs* at pci? dev ? function ? # Cirrus Logic CS4280 +#clct* at pci? dev ? function ? # Cirrus Logic CS4281 +#cmpci* at pci? dev ? function ? # C-Media CMI8338/8738 +#eap* at pci? dev ? function ? # Ensoniq AudioPCI +#emuxki* at pci? dev ? function ? # Creative SBLive! and PCI512 +#esa* at pci? dev ? function ? # ESS Allegro-1 / Maestro-3 PCI Audio +#esm* at pci? dev ? function ? # ESS Maestro-1/2/2e PCI Audio Accelerator +#eso* at pci? dev ? function ? # ESS Solo-1 PCI AudioDrive +#fms* at pci? dev ? function ? # Forte Media FM801 +#neo* at pci? dev ? function ? # NeoMagic 256 AC'97 Audio +#sv* at pci? dev ? function ? # S3 SonicVibes +#yds* at pci? dev ? function ? # Yamaha DS-1 PCI Audio +# +## OPL[23] FM synthesizers +##opl0 at isa? port 0x388 # use only if not attached to sound card +#opl* at cmpci? flags 1 +#opl* at eso? +#opl* at fms? +#opl* at sv? +# +## High Definition Audio +#hdaudio* at pci? dev ? function ? # High Definition Audio +#hdafg* at hdaudiobus? +# +## Audio support +#audio* at audiobus? +# +## The spkr driver provides a simple tone interface to the built in speaker. +#spkr* at pcppi? # PC speaker +#spkr* at audio? # PC speaker (synthesized) +##wsbell* at spkr? # Bell for wscons display (module by default) +# +## MPU 401 UARTs +##mpu* at isa? port 0x330 irq 9 # MPU401 or compatible card +#mpu* at cmpci? +#mpu* at eso? +#mpu* at yds? +# +## MIDI support +#midi* at midibus? +#midi* at pcppi? # MIDI interface to the PC speaker +# +## FM-Radio devices +## PCI radio devices +##gtp* at pci? dev ? function ? # Guillemot Maxi Radio FM 2000 Radio Card +# +## Radio support +##radio* at gtp? +# +# +## Video capture devices +# +#coram* at pci? dev ? function ? # Conexant CX23885 PCI-E TV +#cxdtv* at pci? dev ? function ? # Conexant CX2388[0-3] PCI TV +# +#video* at videobus? # Analog capture interface +#dtv* at dtvbus? # Digital capture interface +# +# +## TV cards +# +## Brooktree 848/849/878/879 based TV cards +#bktr* at pci? dev ? function ? +#radio* at bktr? +# +# +## Bluetooth Controller and Device support +# +## Bluetooth PCMCIA Controllers +#bt3c* at pcmcia? function ? # 3Com 3CRWB6096-A +#btbc* at pcmcia? function ? # AnyCom BlueCard LSE041/039/139 +# +## Bluetooth SDIO Controllers +#sbt* at sdmmc? +# +## Bluetooth USB Controllers +#ubt* at uhub? port ? +#aubtfwl* at uhub? port ? +# +## Bluetooth Device Hub +#bthub* at bcsp? +#bthub* at bt3c? +#bthub* at btbc? +#bthub* at btuart? +#bthub* at sbt? +#bthub* at ubt? +# +## Bluetooth HID support +#bthidev* at bthub? +# +## Bluetooth Mouse +#btms* at bthidev? reportid ? +#wsmouse* at btms? mux 0 +# +## Bluetooth Keyboard +#btkbd* at bthidev? reportid ? +#wskbd* at btkbd? console ? mux 1 +# +## Bluetooth Apple Magic Mouse +#btmagic* at bthub? +#wsmouse* at btmagic? mux 0 +# +## Bluetooth Audio support +#btsco* at bthub? +# +# +## SD/MMC/SDIO Controller and Device support +# +## SD/MMC controller +#sdhc* at pci? # SD Host Controller +#rtsx* at pci? # Realtek RTS5209/RTS5229 Card Reader +#sdhc* at cardbus? # SD Host Controller +#sdmmc* at sdhc? # SD/MMC bus +#sdmmc* at rtsx? # SD/MMC bus +# +#ld* at sdmmc? +# +# +## Middle Digital, Inc. PCI-Weasel serial console board control +## devices (watchdog timer, etc.) +#weasel* at pci? +# +## Virtio devices +virtio* at pci? dev ? function ? # Virtio PCI device +#viomb* at virtio? # Virtio memory balloon device +ld* at virtio? # Virtio disk device +vioif* at virtio? # Virtio network device +#viornd* at virtio? # Virtio entropy device +vioscsi* at virtio? # Virtio SCSI device +##vio9p* at virtio? # Virtio 9P device +# +## Hyper-V devices +#vmbus* at acpi? # Hyper-V VMBus +#genfb* at vmbus? # Hyper-V Synthetic Video Framebuffer +#hvkbd* at vmbus? # Hyper-V Synthetic Keyboard +#wskbd* at hvkbd? console ? mux 1 +#hvn* at vmbus? # Hyper-V NetVSC +#hvs* at vmbus? # Hyper-V StorVSC +#hvheartbeat* at vmbus? # Hyper-V Heartbeat Service +#hvshutdown* at vmbus? # Hyper-V Guest Shutdown Service +#hvtimesync* at vmbus? # Hyper-V Time Synchronization Service +##hvkvp* at vmbus? # Hyper-V Data Exchange Service +# +## Pseudo-Devices +# +#pseudo-device crypto # /dev/crypto device +#pseudo-device swcrypto # software crypto implementation +# +## disk/mass storage pseudo-devices +#pseudo-device bio # RAID control device driver +#pseudo-device ccd # concatenated/striped disk devices +#pseudo-device cgd # cryptographic disk devices +#pseudo-device raid # RAIDframe disk driver +#options RAID_AUTOCONFIG # auto-configuration of RAID components +## Options to enable various other RAIDframe RAID types. +##options RF_INCLUDE_EVENODD=1 +##options RF_INCLUDE_RAID5_RS=1 +##options RF_INCLUDE_PARITYLOGGING=1 +##options RF_INCLUDE_CHAINDECLUSTER=1 +##options RF_INCLUDE_INTERDECLUSTER=1 +##options RF_INCLUDE_PARITY_DECLUSTERING=1 +##options RF_INCLUDE_PARITY_DECLUSTERING_DS=1 +#pseudo-device fss # file system snapshot device +# +#pseudo-device md # memory disk device (ramdisk) +#options MEMORY_DISK_HOOKS # enable md specific hooks +#options MEMORY_DISK_DYNAMIC # enable dynamic resizing +# +#pseudo-device vnd # disk-like interface to files +#options VND_COMPRESSION # compressed vnd(4) +# +# +## network pseudo-devices +pseudo-device bpfilter # Berkeley packet filter +#pseudo-device carp # Common Address Redundancy Protocol +pseudo-device loop # network loopback +##pseudo-device mpls # MPLS pseudo-interface +#pseudo-device ppp # Point-to-Point Protocol +#pseudo-device pppoe # PPP over Ethernet (RFC 2516) +#pseudo-device sl # Serial Line IP +#pseudo-device irframetty # IrDA frame line discipline +#pseudo-device tun # network tunneling over tty +#pseudo-device tap # virtual Ethernet +#pseudo-device gre # generic L3 over IP tunnel +#pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) +#pseudo-device ipsecif # tunnel interface for routing based ipsec +##pseudo-device faith # IPv[46] tcp relay translation i/f +#pseudo-device stf # 6to4 IPv6 over IPv4 encapsulation +#pseudo-device vlan # IEEE 802.1q encapsulation +#pseudo-device bridge # simple inter-network bridging +#pseudo-device vether # Virtual Ethernet for bridge +#pseudo-device agr # IEEE 802.3ad link aggregation +#pseudo-device l2tp # L2TPv3 interface +#pseudo-device lagg # Link aggregation interface +#pseudo-device npf # NPF packet filter +# +##pseudo-device canloop # CAN loopback interface +# +## +## accept filters +#pseudo-device accf_data # "dataready" accept filter +#pseudo-device accf_http # "httpready" accept filter +# +## miscellaneous pseudo-devices +pseudo-device pty # pseudo-terminals +#pseudo-device sequencer # MIDI sequencer +## rnd works; RND_COM does not on port i386 yet. +##options RND_COM # use "com" randomness as well (BROKEN) +pseudo-device clockctl # user control of clock subsystem +pseudo-device ksyms # /dev/ksyms +#pseudo-device lockstat # lock profiling +#pseudo-device bcsp # BlueCore Serial Protocol +#pseudo-device btuart # Bluetooth HCI UART (H4) +##pseudo-device nvmm # NetBSD Virtual Machine Monitor +#pseudo-device swwdog # software watchdog timer -- swwdog(4) +# +## wscons pseudo-devices +#pseudo-device wsmux # mouse & keyboard multiplexor +#pseudo-device wsfont +## Give us a choice of fonts based on monitor size +#options FONT_BOLD8x16 +#options FONT_BOLD16x32 +# +## pseudo audio device driver +#pseudo-device pad +# +## userland interface to drivers, including autoconf and properties retrieval +#pseudo-device drvctl +# +## EFI runtime support +#options EFI_RUNTIME +#pseudo-device efi # /dev/efi +# +#include "dev/veriexec.config" +# +#options PAX_SEGVGUARD=0 # PaX Segmentation fault guard +#options PAX_MPROTECT=1 # PaX mprotect(2) restrictions +#options PAX_MPROTECT_DEBUG=1 # PaX mprotect debug +#options PAX_ASLR=1 # PaX Address Space Layout Randomization +#options PAX_ASLR_DEBUG=1 # PaX ASLR debug +# +## Pull in optional local configuration - always at end +#cinclude "arch/amd64/conf/GENERIC.local" + +# May prevent compilation +#options TSLOG # enable tslog(4) tracing facility +#options BOOTTIME # prints boot time relative to rdtsc diff --git a/sys/dev/pv/pvclock.c b/sys/dev/pv/pvclock.c index 09ef8a5661408..7419b53a721a0 100644 --- a/sys/dev/pv/pvclock.c +++ b/sys/dev/pv/pvclock.c @@ -61,11 +61,11 @@ #include #include -#include +#include #include - #include - +// the above is missing, and this is not enough: +//#define PVCLOCK_FLAG_TSC_STABLE 0 uint pvclock_lastcount; struct pvclock_softc { diff --git a/sys/kern/kern_tslog.c b/sys/kern/kern_tslog.c index 15819cf6188a9..e3e050342d8da 100644 --- a/sys/kern/kern_tslog.c +++ b/sys/kern/kern_tslog.c @@ -64,7 +64,7 @@ #include #include -#include +#include #include #ifndef TSLOGSIZE From 982102773f6f0ee27e95a488a451ee5948a3ab10 Mon Sep 17 00:00:00 2001 From: Charlotte <49044530+csdvrx@users.noreply.github.com> Date: Wed, 31 Jan 2024 03:49:31 +0100 Subject: [PATCH 114/114] Document the current results --- README.md | 150 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 105 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 0da145d0fe29a..d581f84d78050 100644 --- a/README.md +++ b/README.md @@ -1,56 +1,116 @@ -NetBSD -====== - -NetBSD is a free, fast, secure, and highly portable Unix-like Open -Source operating system. It is available for a [wide range of -platforms](https://wiki.NetBSD.org/ports/), from large-scale servers -and powerful desktop systems to handheld and embedded devices. +NetBSD-perf +=========== + +This is my attempt at replicating iMil results that were reported on https://mail-index.netbsd.org/tech-kern/2024/01/23/msg029450.html and https://old.reddit.com/r/BSD/comments/197vfmq by using the perf branch he published on https://github.com/NetBSDfr/NetBSD-src + +Currently, I'm still about 5x slower as I boot in 200 ms, but this is a good beginning: + +``` +[ 1.0000000] cpu_rng: rdrand/rdseed +[ 1.0000000] entropy: ready +[ 1.0000000] NetBSD 10.99.10 (MICROVM) Notice: this software is protected by copyright +[ 1.0000000] Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +[ 1.0000000] 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, +[ 1.0000000] 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, +[ 1.0000000] 2024 +[ 1.0000000] The NetBSD Foundation, Inc. All rights reserved. +[ 1.0000000] Copyright (c) 1982, 1986, 1989, 1991, 1993 +[ 1.0000000] The Regents of the University of California. All rights reserved. +[ 1.0000000] NetBSD 10.99.10 (MICROVM) #5: Tue Jan 30 19:22:09 CST 2024 +[ 1.0000000] charlotte@x1ng2:/home/charlotte/obj/sys/arch/amd64/compile/MICROVM +[ 1.0000000] total memory = 127 MB +[ 1.0000000] avail memory = 77184 KB +[ 1.0000000] timecounter: Timecounters tick every 10.000 msec +[ 1.0000000] timecounter: Timecounter "i8254" frequency 1193182 Hz quality 100 +[ 1.0000030] Hypervisor: KVM +[ 1.0000030] VMM: Generic PVH +[ 1.0000030] mainbus0 (root) +[ 1.0000030] mainbus0: Intel MP Specification (Version 1.4) (QBOOT 000000000000) +[ 1.0000030] cpu0 at mainbus0 apid 0 +[ 1.0000030] cpu0: Use lfence to serialize rdtsc +[ 1.0000030] got tsc from vmware compatible cpuid +[ 1.0000030] cpu0: TSC freq CPUID 2496000000 Hz +[ 1.0000030] cpu0: 12th Gen Intel(R) Core(TM) i7-1270P, id 0x906a2 +[ 1.0000030] cpu0: node 0, package 0, core 0, smt 0 +[ 1.0000030] mpbios: bus 0 is type ISA +[ 1.0000030] ioapic0 at mainbus0 apid 2: pa 0xfec00000, version 0x20, 24 pins +[ 1.0000030] isa0 at mainbus0 +[ 1.0000030] com0 at isa0 port 0x3f8-0x3ff irq 4: ns16550a, 16-byte FIFO +[ 1.0000030] com0: console +[ 1.0000030] allocated pic ioapic0 type edge pin 4 level 8 to cpu0 slot 0 idt entry 129 +[ 1.0000030] pv0 at mainbus0 +[ 1.0000030] virtio0 at pv0 +[ 1.0000030] kernel parameters: root=ld0c rw console=com -z -v virtio_mmio.device=512@0xfeb00e00:12 virtio_mmio.device=512@0xfeb00c00:11 +[ 1.0000030] viommio: 512@0xfeb00e00:12 +[ 1.0000030] virtio0: VirtIO-MMIO-v2 +[ 1.0000030] virtio0: entropy device (id 4, rev. 0x01) +[ 1.0000030] viornd0 at virtio0: features: 0x110000000 +[ 1.0000030] virtio0: allocated 32768 byte for virtqueue 0 for Entropy request, size 1024 +[ 1.0000030] allocated pic ioapic0 type level pin 12 level 6 to cpu0 slot 1 idt entry 96 +[ 1.0000030] virtio0: interrupting on -1 +[ 1.0000030] virtio1 at pv0 +[ 1.0000030] viommio: 512@0xfeb00c00:11 +[ 1.0000030] virtio1: VirtIO-MMIO-v2 +[ 1.0000030] virtio1: block device (id 2, rev. 0x01) +[ 1.0000030] ld0 at virtio1: features: 0x110000a54 +[ 1.0000030] virtio1: allocated 4227072 byte for virtqueue 0 for I/O request, size 1024 +[ 1.0000030] virtio1: using 4194304 byte (262144 entries) indirect descriptors +[ 1.0000030] allocated pic ioapic0 type level pin 11 level 6 to cpu0 slot 2 idt entry 97 +[ 1.0000030] virtio1: interrupting on -1 +[ 1.0000030] ld0: 512 MB, 1040 cyl, 16 head, 63 sec, 512 bytes/sect x 1048576 sectors +[ 1.0000030] timecounter: Timecounter "lapic" frequency 999998000 Hz quality -100 +[ 1.0000030] timecounter: Timecounter "clockinterrupt" frequency 100 Hz quality 0 +[ 1.0000030] timecounter: Timecounter "TSC" frequency 2496000000 Hz quality 3000 +[ 1.0001656] boot device: ld0 +[ 1.0001656] root on ld0c dumps on ld0b +[ 1.0001656] root file system type: ffs +[ 1.0001656] kern.module.path=/stand/amd64/10.99.10/modules +[ 1.0001656] WARNING: clock gained 4 days +[ 1.0050716] boot: 199ms (entry tsc: 281364007) +[ 1.0050716] exec /sbin/init: error 8 +[ 1.0050716] init: trying /sbin/oinit +bslinit v7 starting on NetBSD, will handle 12 signals, reaping zombies every 30 s + arg 0: oinit +oinit: mount /proc failed: Invalid argument +# Epoch 1706665417, time 2024-01-31T01:43:37 +``` + +Note however that the qemu append doesn't seem to survive a reboot: + +``` +# init: can't add utmpx record for `system boot': No such file or directory +init: can't add utmpx record for `system down': No such file or directory +qemu-system-x86_64: terminating on signal 2 +[ 1.0000000] cpu_rng: rdrand/rdseed +[ 1.0000000] entropy: ready +[ 1.0000000] NetBSD 10.99.10 (MICROVM) Notice: this software is protected by copyright +[ 1.0000000] Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +[ 1.0000000] 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, +[ 1.0000000] 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, +[ 1.0000000] 2024 +[ 1.0000000] The NetBSD Foundation, Inc. All rights reserved. +[ 1.0000000] Copyright (c) 1982, 1986, 1989, 1991, 1993 +[ 1.0000000] The Regents of the University of California. All rights reserved. +(...) +[ 1.0155438] root on ld0c dumps on ld0b +[ 1.0155438] vfs_mountroot: can't open root device +[ 1.0155438] cannot mount root, error = 6 +[ 1.0155438] root device (default ld0c): qemu-system-x86_64: terminating on signal 2 +``` Building -------- -You can cross-build NetBSD from most UNIX-like operating systems. -To build for amd64 (x86_64), in the src directory: - - ./build.sh -U -u -j4 -m amd64 -O ~/obj release - -Additional build information available in the [BUILDING](BUILDING) file. + sh ./compile.sh Binaries -------- -- [Daily builds](https://nycdn.NetBSD.org/pub/NetBSD-daily/HEAD/latest/) -- [Releases](https://cdn.NetBSD.org/pub/NetBSD/) - -Testing -------- - -On a running NetBSD system: - - cd /usr/tests; atf-run | atf-report - -Troubleshooting ---------------- - -- Send bugs and patches [via web form](https://www.NetBSD.org/cgi-bin/sendpr.cgi?gndb=netbsd). -- Subscribe to the [mailing lists](https://www.NetBSD.org/mailinglists/). - The [netbsd-users](https://www.NetBSD.org/mailinglists/#netbsd-users) list is a good choice for many problems; watch [current-users](https://www.NetBSD.org/mailinglists/#current-users) if you follow the bleeding edge of NetBSD-current. -- Join the community IRC channel [#netbsd @ libera.chat](https://web.libera.chat/#netbsd). - -Latest sources --------------- - -To fetch the main CVS repository: - - cvs -d anoncvs@anoncvs.NetBSD.org:/cvsroot checkout -P src - -To work in the Git mirror, which is updated every few hours from CVS: +When it'll be ready, you'll see the releases on the right handside - git clone https://github.com/NetBSD/src.git +TODO +---- -Additional Links ----------------- +Adding flamecharts to understand why it's so slow -- [The NetBSD Guide](https://www.NetBSD.org/docs/guide/en/) -- [NetBSD manual pages](https://man.NetBSD.org/) -- [NetBSD Cross-Reference](https://nxr.NetBSD.org/) +Adding a basic rootfs with simple tools such as bslinit, the tslog script to create flamecharts, and a shell