1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2011 NetApp, Inc.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  *
  28  * $FreeBSD$
  29  */
  30 /*
  31  * This file and its contents are supplied under the terms of the
  32  * Common Development and Distribution License ("CDDL"), version 1.0.
  33  * You may only use this file in accordance with the terms of version
  34  * 1.0 of the CDDL.
  35  *
  36  * A full copy of the text of the CDDL should have accompanied this
  37  * source.  A copy of the CDDL is also available via the Internet at
  38  * http://www.illumos.org/license/CDDL.
  39  *
  40  * Copyright 2015 Pluribus Networks Inc.
  41  * Copyright 2019 Joyent, Inc.
  42  * Copyright 2020 Oxide Computer Company
  43  */
  44 
  45 #ifndef _VMM_KERNEL_H_
  46 #define _VMM_KERNEL_H_
  47 
  48 #include <sys/sdt.h>
  49 #include <x86/segments.h>
  50 
  51 SDT_PROVIDER_DECLARE(vmm);
  52 
  53 struct vm;
  54 struct vm_exception;
  55 struct seg_desc;
  56 struct vm_exit;
  57 struct vie;
  58 struct vm_run;
  59 struct vhpet;
  60 struct vioapic;
  61 struct vlapic;
  62 struct vmspace;
  63 struct vm_object;
  64 struct vm_guest_paging;
  65 struct pmap;
  66 
  67 typedef int     (*vmm_init_func_t)(int ipinum);
  68 typedef int     (*vmm_cleanup_func_t)(void);
  69 typedef void    (*vmm_resume_func_t)(void);
  70 typedef void *  (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
  71 typedef int     (*vmi_run_func_t)(void *vmi, int vcpu, uint64_t rip,
  72     struct pmap *pmap);
  73 typedef void    (*vmi_cleanup_func_t)(void *vmi);
  74 typedef int     (*vmi_get_register_t)(void *vmi, int vcpu, int num,
  75     uint64_t *retval);
  76 typedef int     (*vmi_set_register_t)(void *vmi, int vcpu, int num,
  77     uint64_t val);
  78 typedef int     (*vmi_get_desc_t)(void *vmi, int vcpu, int num,
  79     struct seg_desc *desc);
  80 typedef int     (*vmi_set_desc_t)(void *vmi, int vcpu, int num,
  81     const struct seg_desc *desc);
  82 typedef int     (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
  83 typedef int     (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
  84 typedef struct vmspace *(*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
  85 typedef void    (*vmi_vmspace_free)(struct vmspace *vmspace);
  86 typedef struct vlapic *(*vmi_vlapic_init)(void *vmi, int vcpu);
  87 typedef void    (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
  88 #ifndef __FreeBSD__
  89 typedef void    (*vmi_savectx)(void *vmi, int vcpu);
  90 typedef void    (*vmi_restorectx)(void *vmi, int vcpu);
  91 #endif
  92 
  93 struct vmm_ops {
  94         vmm_init_func_t         init;           /* module wide initialization */
  95         vmm_cleanup_func_t      cleanup;
  96         vmm_resume_func_t       resume;
  97 
  98         vmi_init_func_t         vminit;         /* vm-specific initialization */
  99         vmi_run_func_t          vmrun;
 100         vmi_cleanup_func_t      vmcleanup;
 101         vmi_get_register_t      vmgetreg;
 102         vmi_set_register_t      vmsetreg;
 103         vmi_get_desc_t          vmgetdesc;
 104         vmi_set_desc_t          vmsetdesc;
 105         vmi_get_cap_t           vmgetcap;
 106         vmi_set_cap_t           vmsetcap;
 107         vmi_vmspace_alloc       vmspace_alloc;
 108         vmi_vmspace_free        vmspace_free;
 109         vmi_vlapic_init         vlapic_init;
 110         vmi_vlapic_cleanup      vlapic_cleanup;
 111 
 112 #ifndef __FreeBSD__
 113         vmi_savectx             vmsavectx;
 114         vmi_restorectx          vmrestorectx;
 115 #endif
 116 };
 117 
 118 extern struct vmm_ops vmm_ops_intel;
 119 extern struct vmm_ops vmm_ops_amd;
 120 
 121 int vm_create(const char *name, struct vm **retvm);
 122 void vm_destroy(struct vm *vm);
 123 int vm_reinit(struct vm *vm);
 124 const char *vm_name(struct vm *vm);
 125 uint16_t vm_get_maxcpus(struct vm *vm);
 126 void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
 127     uint16_t *threads, uint16_t *maxcpus);
 128 int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
 129     uint16_t threads, uint16_t maxcpus);
 130 
 131 /*
 132  * APIs that modify the guest memory map require all vcpus to be frozen.
 133  */
 134 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
 135     size_t len, int prot, int flags);
 136 int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
 137 void vm_free_memseg(struct vm *vm, int ident);
 138 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 139 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 140 #ifdef __FreeBSD__
 141 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 142 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 143 #else
 144 int vm_assign_pptdev(struct vm *vm, int pptfd);
 145 int vm_unassign_pptdev(struct vm *vm, int pptfd);
 146 #endif /* __FreeBSD__ */
 147 
 148 /*
 149  * APIs that inspect the guest memory map require only a *single* vcpu to
 150  * be frozen. This acts like a read lock on the guest memory map since any
 151  * modification requires *all* vcpus to be frozen.
 152  */
 153 int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
 154     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
 155 int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
 156     struct vm_object **objptr);
 157 vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
 158 void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
 159     int prot, void **cookie);
 160 void vm_gpa_release(void *cookie);
 161 bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
 162 
 163 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
 164 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
 165 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 166     struct seg_desc *ret_desc);
 167 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 168     const struct seg_desc *desc);
 169 int vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state,
 170     uint8_t *sipi_vec);
 171 int vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state,
 172     uint8_t sipi_vec);
 173 int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
 174 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
 175 int vm_inject_nmi(struct vm *vm, int vcpu);
 176 int vm_nmi_pending(struct vm *vm, int vcpuid);
 177 void vm_nmi_clear(struct vm *vm, int vcpuid);
 178 int vm_inject_extint(struct vm *vm, int vcpu);
 179 int vm_extint_pending(struct vm *vm, int vcpuid);
 180 void vm_extint_clear(struct vm *vm, int vcpuid);
 181 int vm_inject_init(struct vm *vm, int vcpuid);
 182 int vm_inject_sipi(struct vm *vm, int vcpuid, uint8_t vec);
 183 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 184 struct vioapic *vm_ioapic(struct vm *vm);
 185 struct vhpet *vm_hpet(struct vm *vm);
 186 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
 187 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
 188 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 189 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 190 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 191 int vm_activate_cpu(struct vm *vm, int vcpu);
 192 int vm_suspend_cpu(struct vm *vm, int vcpu);
 193 int vm_resume_cpu(struct vm *vm, int vcpu);
 194 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 195 struct vie *vm_vie_ctx(struct vm *vm, int vcpuid);
 196 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 197 void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
 198 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 199 void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
 200 void vm_exit_run_state(struct vm *vm, int vcpuid, uint64_t rip);
 201 int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
 202     int rsize);
 203 int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
 204     int wsize);
 205 
 206 #ifdef _SYS__CPUSET_H_
 207 cpuset_t vm_active_cpus(struct vm *vm);
 208 cpuset_t vm_debug_cpus(struct vm *vm);
 209 cpuset_t vm_suspended_cpus(struct vm *vm);
 210 #endif  /* _SYS__CPUSET_H_ */
 211 
 212 bool vcpu_entry_bailout_checks(struct vm *vm, int vcpuid, uint64_t rip);
 213 bool vcpu_run_state_pending(struct vm *vm, int vcpuid);
 214 int vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only);
 215 
 216 /*
 217  * Return true if device indicated by bus/slot/func is supposed to be a
 218  * pci passthrough device.
 219  *
 220  * Return false otherwise.
 221  */
 222 bool vmm_is_pptdev(int bus, int slot, int func);
 223 
 224 void *vm_iommu_domain(struct vm *vm);
 225 
 226 enum vcpu_state {
 227         VCPU_IDLE,
 228         VCPU_FROZEN,
 229         VCPU_RUNNING,
 230         VCPU_SLEEPING,
 231 };
 232 
 233 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
 234     bool from_idle);
 235 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
 236 void vcpu_block_run(struct vm *, int);
 237 void vcpu_unblock_run(struct vm *, int);
 238 
 239 #ifndef __FreeBSD__
 240 uint64_t vcpu_tsc_offset(struct vm *vm, int vcpuid);
 241 #endif
 242 
 243 static __inline int
 244 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 245 {
 246         return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
 247 }
 248 
 249 #ifdef _SYS_THREAD_H
 250 static __inline int
 251 vcpu_should_yield(struct vm *vm, int vcpu)
 252 {
 253 
 254         if (curthread->t_astflag)
 255                 return (1);
 256         else if (CPU->cpu_runrun)
 257                 return (1);
 258         else
 259                 return (0);
 260 }
 261 #endif /* _SYS_THREAD_H */
 262 
 263 typedef enum vcpu_notify {
 264         VCPU_NOTIFY_NONE,
 265         VCPU_NOTIFY_APIC,       /* Posted intr notification (if possible) */
 266         VCPU_NOTIFY_EXIT,       /* IPI to cause VM exit */
 267 } vcpu_notify_t;
 268 
 269 void *vcpu_stats(struct vm *vm, int vcpu);
 270 void vcpu_notify_event(struct vm *vm, int vcpuid);
 271 void vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t);
 272 struct vmspace *vm_get_vmspace(struct vm *vm);
 273 struct vatpic *vm_atpic(struct vm *vm);
 274 struct vatpit *vm_atpit(struct vm *vm);
 275 struct vpmtmr *vm_pmtmr(struct vm *vm);
 276 struct vrtc *vm_rtc(struct vm *vm);
 277 
 278 /*
 279  * Inject exception 'vector' into the guest vcpu. This function returns 0 on
 280  * success and non-zero on failure.
 281  *
 282  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
 283  * this function directly because they enforce the trap-like or fault-like
 284  * behavior of an exception.
 285  *
 286  * This function should only be called in the context of the thread that is
 287  * executing this vcpu.
 288  */
 289 int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
 290     uint32_t errcode, int restart_instruction);
 291 
 292 /*
 293  * This function is called after a VM-exit that occurred during exception or
 294  * interrupt delivery through the IDT. The format of 'intinfo' is described
 295  * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
 296  *
 297  * If a VM-exit handler completes the event delivery successfully then it
 298  * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
 299  * if the task switch emulation is triggered via a task gate then it should
 300  * call this function with 'intinfo=0' to indicate that the external event
 301  * is not pending anymore.
 302  *
 303  * Return value is 0 on success and non-zero on failure.
 304  */
 305 int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
 306 
 307 /*
 308  * This function is called before every VM-entry to retrieve a pending
 309  * event that should be injected into the guest. This function combines
 310  * nested events into a double or triple fault.
 311  *
 312  * Returns 0 if there are no events that need to be injected into the guest
 313  * and non-zero otherwise.
 314  */
 315 int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
 316 
 317 int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
 318 
 319 enum vm_reg_name vm_segment_name(int seg_encoding);
 320 
 321 struct vm_copyinfo {
 322         uint64_t        gpa;
 323         size_t          len;
 324         void            *hva;
 325         void            *cookie;
 326 };
 327 
 328 /*
 329  * Set up 'copyinfo[]' to copy to/from guest linear address space starting
 330  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
 331  * a copyin or PROT_WRITE for a copyout.
 332  *
 333  * retval       is_fault        Interpretation
 334  *   0             0            Success
 335  *   0             1            An exception was injected into the guest
 336  * EFAULT         N/A           Unrecoverable error
 337  *
 338  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
 339  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
 340  * 'vm_copy_teardown()' after the copy is done.
 341  */
 342 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
 343     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
 344     int num_copyinfo, int *is_fault);
 345 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
 346     int num_copyinfo);
 347 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
 348     void *kaddr, size_t len);
 349 void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
 350     struct vm_copyinfo *copyinfo, size_t len);
 351 
 352 int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
 353 
 354 /* APIs to inject faults into the guest */
 355 void vm_inject_fault(struct vm *vm, int vcpuid, int vector, int errcode_valid,
 356     int errcode);
 357 
 358 void vm_inject_ud(struct vm *vm, int vcpuid);
 359 void vm_inject_gp(struct vm *vm, int vcpuid);
 360 void vm_inject_ac(struct vm *vm, int vcpuid, int errcode);
 361 void vm_inject_ss(struct vm *vm, int vcpuid, int errcode);
 362 void vm_inject_pf(struct vm *vm, int vcpuid, int errcode, uint64_t cr2);
 363 
 364 /*
 365  * Both SVM and VMX have complex logic for injecting events such as exceptions
 366  * or interrupts into the guest.  Within those two backends, the progress of
 367  * event injection is tracked by event_inject_state, hopefully making it easier
 368  * to reason about.
 369  */
 370 enum event_inject_state {
 371         EIS_CAN_INJECT  = 0, /* exception/interrupt can be injected */
 372         EIS_EV_EXISTING = 1, /* blocked by existing event */
 373         EIS_EV_INJECTED = 2, /* blocked by injected event */
 374         EIS_GI_BLOCK    = 3, /* blocked by guest interruptability */
 375 
 376         /*
 377          * Flag to request an immediate exit from VM context after event
 378          * injection in order to perform more processing
 379          */
 380         EIS_REQ_EXIT    = (1 << 15),
 381 };
 382 
 383 #ifndef __FreeBSD__
 384 
 385 void vmm_sol_glue_init(void);
 386 void vmm_sol_glue_cleanup(void);
 387 
 388 int vmm_mod_load(void);
 389 int vmm_mod_unload(void);
 390 
 391 void vmm_call_trap(uint64_t);
 392 
 393 /*
 394  * Because of tangled headers, this is not exposed directly via the vmm_drv
 395  * interface, but rather mirrored as vmm_drv_iop_cb_t in vmm_drv.h.
 396  */
 397 typedef int (*ioport_handler_t)(void *, bool, uint16_t, uint8_t, uint32_t *);
 398 
 399 int vm_ioport_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
 400     uint8_t bytes, uint32_t *val);
 401 
 402 int vm_ioport_attach(struct vm *vm, uint16_t port, ioport_handler_t func,
 403     void *arg, void **cookie);
 404 int vm_ioport_detach(struct vm *vm, void **cookie, ioport_handler_t *old_func,
 405     void **old_arg);
 406 
 407 int vm_ioport_hook(struct vm *, uint16_t, ioport_handler_t, void *, void **);
 408 void vm_ioport_unhook(struct vm *, void **);
 409 
 410 #endif /* __FreeBSD */
 411 
 412 #endif /* _VMM_KERNEL_H_ */