1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2011 NetApp, Inc.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  *
  28  * $FreeBSD$
  29  */
  30 /*
  31  * This file and its contents are supplied under the terms of the
  32  * Common Development and Distribution License ("CDDL"), version 1.0.
  33  * You may only use this file in accordance with the terms of version
  34  * 1.0 of the CDDL.
  35  *
  36  * A full copy of the text of the CDDL should have accompanied this
  37  * source.  A copy of the CDDL is also available via the Internet at
  38  * http://www.illumos.org/license/CDDL.
  39  *
  40  * Copyright 2015 Pluribus Networks Inc.
  41  * Copyright 2019 Joyent, Inc.
  42  * Copyright 2020 Oxide Computer Company
  43  */
  44 
  45 #ifndef _VMM_KERNEL_H_
  46 #define _VMM_KERNEL_H_
  47 
  48 #include <sys/sdt.h>
  49 #include <x86/segments.h>
  50 
  51 SDT_PROVIDER_DECLARE(vmm);
  52 
  53 struct vm;
  54 struct vm_exception;
  55 struct seg_desc;
  56 struct vm_exit;
  57 struct vie;
  58 struct vm_run;
  59 struct vhpet;
  60 struct vioapic;
  61 struct vlapic;
  62 struct vmspace;
  63 struct vm_object;
  64 struct vm_guest_paging;
  65 struct pmap;
  66 
  67 struct vm_eventinfo {
  68         uint_t  *rptr;          /* runblock cookie */
  69         int     *sptr;          /* suspend cookie */
  70         int     *iptr;          /* reqidle cookie */
  71 };
  72 
  73 typedef int     (*vmm_init_func_t)(int ipinum);
  74 typedef int     (*vmm_cleanup_func_t)(void);
  75 typedef void    (*vmm_resume_func_t)(void);
  76 typedef void *  (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
  77 typedef int     (*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip,
  78     struct pmap *pmap, struct vm_eventinfo *info);
  79 typedef void    (*vmi_cleanup_func_t)(void *vmi);
  80 typedef int     (*vmi_get_register_t)(void *vmi, int vcpu, int num,
  81     uint64_t *retval);
  82 typedef int     (*vmi_set_register_t)(void *vmi, int vcpu, int num,
  83     uint64_t val);
  84 typedef int     (*vmi_get_desc_t)(void *vmi, int vcpu, int num,
  85     struct seg_desc *desc);
  86 typedef int     (*vmi_set_desc_t)(void *vmi, int vcpu, int num,
  87     struct seg_desc *desc);
  88 typedef int     (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
  89 typedef int     (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
  90 typedef struct vmspace *(*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
  91 typedef void    (*vmi_vmspace_free)(struct vmspace *vmspace);
  92 typedef struct vlapic *(*vmi_vlapic_init)(void *vmi, int vcpu);
  93 typedef void    (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
  94 #ifndef __FreeBSD__
  95 typedef void    (*vmi_savectx)(void *vmi, int vcpu);
  96 typedef void    (*vmi_restorectx)(void *vmi, int vcpu);
  97 #endif
  98 
  99 struct vmm_ops {
 100         vmm_init_func_t         init;           /* module wide initialization */
 101         vmm_cleanup_func_t      cleanup;
 102         vmm_resume_func_t       resume;
 103 
 104         vmi_init_func_t         vminit;         /* vm-specific initialization */
 105         vmi_run_func_t          vmrun;
 106         vmi_cleanup_func_t      vmcleanup;
 107         vmi_get_register_t      vmgetreg;
 108         vmi_set_register_t      vmsetreg;
 109         vmi_get_desc_t          vmgetdesc;
 110         vmi_set_desc_t          vmsetdesc;
 111         vmi_get_cap_t           vmgetcap;
 112         vmi_set_cap_t           vmsetcap;
 113         vmi_vmspace_alloc       vmspace_alloc;
 114         vmi_vmspace_free        vmspace_free;
 115         vmi_vlapic_init         vlapic_init;
 116         vmi_vlapic_cleanup      vlapic_cleanup;
 117 
 118 #ifndef __FreeBSD__
 119         vmi_savectx             vmsavectx;
 120         vmi_restorectx          vmrestorectx;
 121 #endif
 122 };
 123 
 124 extern struct vmm_ops vmm_ops_intel;
 125 extern struct vmm_ops vmm_ops_amd;
 126 
 127 int vm_create(const char *name, struct vm **retvm);
 128 void vm_destroy(struct vm *vm);
 129 int vm_reinit(struct vm *vm);
 130 const char *vm_name(struct vm *vm);
 131 uint16_t vm_get_maxcpus(struct vm *vm);
 132 void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
 133     uint16_t *threads, uint16_t *maxcpus);
 134 int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
 135     uint16_t threads, uint16_t maxcpus);
 136 
 137 /*
 138  * APIs that modify the guest memory map require all vcpus to be frozen.
 139  */
 140 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
 141     size_t len, int prot, int flags);
 142 int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
 143 void vm_free_memseg(struct vm *vm, int ident);
 144 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 145 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 146 #ifdef __FreeBSD__
 147 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 148 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 149 #else
 150 int vm_assign_pptdev(struct vm *vm, int pptfd);
 151 int vm_unassign_pptdev(struct vm *vm, int pptfd);
 152 #endif /* __FreeBSD__ */
 153 
 154 /*
 155  * APIs that inspect the guest memory map require only a *single* vcpu to
 156  * be frozen. This acts like a read lock on the guest memory map since any
 157  * modification requires *all* vcpus to be frozen.
 158  */
 159 int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
 160     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
 161 int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
 162     struct vm_object **objptr);
 163 vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
 164 void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
 165     int prot, void **cookie);
 166 void vm_gpa_release(void *cookie);
 167 bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
 168 
 169 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
 170 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
 171 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 172                     struct seg_desc *ret_desc);
 173 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 174                     struct seg_desc *desc);
 175 int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
 176 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
 177 int vm_inject_nmi(struct vm *vm, int vcpu);
 178 int vm_nmi_pending(struct vm *vm, int vcpuid);
 179 void vm_nmi_clear(struct vm *vm, int vcpuid);
 180 int vm_inject_extint(struct vm *vm, int vcpu);
 181 int vm_extint_pending(struct vm *vm, int vcpuid);
 182 void vm_extint_clear(struct vm *vm, int vcpuid);
 183 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 184 struct vioapic *vm_ioapic(struct vm *vm);
 185 struct vhpet *vm_hpet(struct vm *vm);
 186 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
 187 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
 188 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 189 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 190 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 191 int vm_activate_cpu(struct vm *vm, int vcpu);
 192 int vm_suspend_cpu(struct vm *vm, int vcpu);
 193 int vm_resume_cpu(struct vm *vm, int vcpu);
 194 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 195 struct vie *vm_vie_ctx(struct vm *vm, int vcpuid);
 196 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 197 void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
 198 void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
 199 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 200 void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
 201 int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
 202     int rsize);
 203 int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
 204     int wsize);
 205 void vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip);
 206 
 207 #ifdef _SYS__CPUSET_H_
 208 cpuset_t vm_active_cpus(struct vm *vm);
 209 cpuset_t vm_debug_cpus(struct vm *vm);
 210 cpuset_t vm_suspended_cpus(struct vm *vm);
 211 #endif  /* _SYS__CPUSET_H_ */
 212 
 213 static __inline int
 214 vcpu_runblocked(struct vm_eventinfo *info)
 215 {
 216 
 217         return (*info->rptr != 0);
 218 }
 219 
 220 static __inline int
 221 vcpu_suspended(struct vm_eventinfo *info)
 222 {
 223 
 224         return (*info->sptr);
 225 }
 226 
 227 static __inline int
 228 vcpu_reqidle(struct vm_eventinfo *info)
 229 {
 230 
 231         return (*info->iptr);
 232 }
 233 
 234 int vcpu_debugged(struct vm *vm, int vcpuid);
 235 
 236 /*
 237  * Return true if device indicated by bus/slot/func is supposed to be a
 238  * pci passthrough device.
 239  *
 240  * Return false otherwise.
 241  */
 242 bool vmm_is_pptdev(int bus, int slot, int func);
 243 
 244 void *vm_iommu_domain(struct vm *vm);
 245 
 246 enum vcpu_state {
 247         VCPU_IDLE,
 248         VCPU_FROZEN,
 249         VCPU_RUNNING,
 250         VCPU_SLEEPING,
 251 };
 252 
 253 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
 254     bool from_idle);
 255 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
 256 void vcpu_block_run(struct vm *, int);
 257 void vcpu_unblock_run(struct vm *, int);
 258 
 259 #ifndef __FreeBSD__
 260 uint64_t vcpu_tsc_offset(struct vm *vm, int vcpuid);
 261 #endif
 262 
 263 static __inline int
 264 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 265 {
 266         return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
 267 }
 268 
 269 #ifdef _SYS_THREAD_H
 270 static __inline int
 271 vcpu_should_yield(struct vm *vm, int vcpu)
 272 {
 273 
 274         if (curthread->t_astflag)
 275                 return (1);
 276         else if (CPU->cpu_runrun)
 277                 return (1);
 278         else
 279                 return (0);
 280 }
 281 #endif /* _SYS_THREAD_H */
 282 
 283 typedef enum vcpu_notify {
 284         VCPU_NOTIFY_NONE,
 285         VCPU_NOTIFY_APIC,       /* Posted intr notification (if possible) */
 286         VCPU_NOTIFY_EXIT,       /* IPI to cause VM exit */
 287 } vcpu_notify_t;
 288 
 289 void *vcpu_stats(struct vm *vm, int vcpu);
 290 void vcpu_notify_event(struct vm *vm, int vcpuid);
 291 void vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t);
 292 struct vmspace *vm_get_vmspace(struct vm *vm);
 293 struct vatpic *vm_atpic(struct vm *vm);
 294 struct vatpit *vm_atpit(struct vm *vm);
 295 struct vpmtmr *vm_pmtmr(struct vm *vm);
 296 struct vrtc *vm_rtc(struct vm *vm);
 297 
 298 /*
 299  * Inject exception 'vector' into the guest vcpu. This function returns 0 on
 300  * success and non-zero on failure.
 301  *
 302  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
 303  * this function directly because they enforce the trap-like or fault-like
 304  * behavior of an exception.
 305  *
 306  * This function should only be called in the context of the thread that is
 307  * executing this vcpu.
 308  */
 309 int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
 310     uint32_t errcode, int restart_instruction);
 311 
 312 /*
 313  * This function is called after a VM-exit that occurred during exception or
 314  * interrupt delivery through the IDT. The format of 'intinfo' is described
 315  * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
 316  *
 317  * If a VM-exit handler completes the event delivery successfully then it
 318  * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
 319  * if the task switch emulation is triggered via a task gate then it should
 320  * call this function with 'intinfo=0' to indicate that the external event
 321  * is not pending anymore.
 322  *
 323  * Return value is 0 on success and non-zero on failure.
 324  */
 325 int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
 326 
 327 /*
 328  * This function is called before every VM-entry to retrieve a pending
 329  * event that should be injected into the guest. This function combines
 330  * nested events into a double or triple fault.
 331  *
 332  * Returns 0 if there are no events that need to be injected into the guest
 333  * and non-zero otherwise.
 334  */
 335 int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
 336 
 337 int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
 338 
 339 enum vm_reg_name vm_segment_name(int seg_encoding);
 340 
 341 struct vm_copyinfo {
 342         uint64_t        gpa;
 343         size_t          len;
 344         void            *hva;
 345         void            *cookie;
 346 };
 347 
 348 /*
 349  * Set up 'copyinfo[]' to copy to/from guest linear address space starting
 350  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
 351  * a copyin or PROT_WRITE for a copyout.
 352  *
 353  * retval       is_fault        Interpretation
 354  *   0             0            Success
 355  *   0             1            An exception was injected into the guest
 356  * EFAULT         N/A           Unrecoverable error
 357  *
 358  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
 359  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
 360  * 'vm_copy_teardown()' after the copy is done.
 361  */
 362 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
 363     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
 364     int num_copyinfo, int *is_fault);
 365 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
 366     int num_copyinfo);
 367 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
 368     void *kaddr, size_t len);
 369 void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
 370     struct vm_copyinfo *copyinfo, size_t len);
 371 
 372 int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
 373 
 374 /* APIs to inject faults into the guest */
 375 void vm_inject_fault(struct vm *vm, int vcpuid, int vector, int errcode_valid,
 376     int errcode);
 377 
 378 void vm_inject_ud(struct vm *vm, int vcpuid);
 379 void vm_inject_gp(struct vm *vm, int vcpuid);
 380 void vm_inject_ac(struct vm *vm, int vcpuid, int errcode);
 381 void vm_inject_ss(struct vm *vm, int vcpuid, int errcode);
 382 void vm_inject_pf(struct vm *vm, int vcpuid, int errcode, uint64_t cr2);
 383 
 384 /*
 385  * Both SVM and VMX have complex logic for injecting events such as exceptions
 386  * or interrupts into the guest.  Within those two backends, the progress of
 387  * event injection is tracked by event_inject_state, hopefully making it easier
 388  * to reason about.
 389  */
 390 enum event_inject_state {
 391         EIS_CAN_INJECT  = 0, /* exception/interrupt can be injected */
 392         EIS_EV_EXISTING = 1, /* blocked by existing event */
 393         EIS_EV_INJECTED = 2, /* blocked by injected event */
 394         EIS_GI_BLOCK    = 3, /* blocked by guest interruptability */
 395 
 396         /*
 397          * Flag to request an immediate exit from VM context after event
 398          * injection in order to perform more processing
 399          */
 400         EIS_REQ_EXIT    = (1 << 15),
 401 };
 402 
 403 #ifndef __FreeBSD__
 404 
 405 void vmm_sol_glue_init(void);
 406 void vmm_sol_glue_cleanup(void);
 407 
 408 int vmm_mod_load(void);
 409 int vmm_mod_unload(void);
 410 
 411 void vmm_call_trap(uint64_t);
 412 
 413 /*
 414  * Because of tangled headers, this is not exposed directly via the vmm_drv
 415  * interface, but rather mirrored as vmm_drv_iop_cb_t in vmm_drv.h.
 416  */
 417 typedef int (*ioport_handler_t)(void *, bool, uint16_t, uint8_t, uint32_t *);
 418 
 419 int vm_ioport_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
 420     uint8_t bytes, uint32_t *val);
 421 
 422 int vm_ioport_attach(struct vm *vm, uint16_t port, ioport_handler_t func,
 423     void *arg, void **cookie);
 424 int vm_ioport_detach(struct vm *vm, void **cookie, ioport_handler_t *old_func,
 425     void **old_arg);
 426 
 427 int vm_ioport_hook(struct vm *, uint16_t, ioport_handler_t, void *, void **);
 428 void vm_ioport_unhook(struct vm *, void **);
 429 
 430 #endif /* __FreeBSD */
 431 
 432 #endif /* _VMM_KERNEL_H_ */