1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2011 NetApp, Inc.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  *
  28  * $FreeBSD$
  29  */
  30 /*
  31  * This file and its contents are supplied under the terms of the
  32  * Common Development and Distribution License ("CDDL"), version 1.0.
  33  * You may only use this file in accordance with the terms of version
  34  * 1.0 of the CDDL.
  35  *
  36  * A full copy of the text of the CDDL should have accompanied this
  37  * source.  A copy of the CDDL is also available via the Internet at
  38  * http://www.illumos.org/license/CDDL.
  39  *
  40  * Copyright 2015 Pluribus Networks Inc.
  41  * Copyright 2019 Joyent, Inc.
  42  * Copyright 2020 Oxide Computer Company
  43  */
  44 
  45 #include <sys/cdefs.h>
  46 __FBSDID("$FreeBSD$");
  47 
  48 #include <sys/param.h>
  49 #include <sys/sysctl.h>
  50 #include <sys/ioctl.h>
  51 #ifdef  __FreeBSD__
  52 #include <sys/linker.h>
  53 #endif
  54 #include <sys/mman.h>
  55 #include <sys/module.h>
  56 #include <sys/_iovec.h>
  57 #include <sys/cpuset.h>
  58 
  59 #include <x86/segments.h>
  60 #include <machine/specialreg.h>
  61 
  62 #include <errno.h>
  63 #include <stdio.h>
  64 #include <stdlib.h>
  65 #include <assert.h>
  66 #include <string.h>
  67 #include <fcntl.h>
  68 #include <unistd.h>
  69 
  70 #include <libutil.h>
  71 
  72 #include <machine/vmm.h>
  73 #include <machine/vmm_dev.h>
  74 
  75 #include "vmmapi.h"
  76 
  77 #define MB      (1024 * 1024UL)
  78 #define GB      (1024 * 1024 * 1024UL)
  79 
  80 #ifndef __FreeBSD__
  81 /* shim to no-op for now */
  82 #define MAP_NOCORE              0
  83 #define MAP_ALIGNED_SUPER       0
  84 
  85 /* Rely on PROT_NONE for guard purposes */
  86 #define MAP_GUARD               (MAP_PRIVATE | MAP_ANON | MAP_NORESERVE)
  87 #endif
  88 
  89 /*
  90  * Size of the guard region before and after the virtual address space
  91  * mapping the guest physical memory. This must be a multiple of the
  92  * superpage size for performance reasons.
  93  */
  94 #define VM_MMAP_GUARD_SIZE      (4 * MB)
  95 
  96 #define PROT_RW         (PROT_READ | PROT_WRITE)
  97 #define PROT_ALL        (PROT_READ | PROT_WRITE | PROT_EXEC)
  98 
  99 struct vmctx {
 100         int     fd;
 101         uint32_t lowmem_limit;
 102         int     memflags;
 103         size_t  lowmem;
 104         size_t  highmem;
 105         char    *baseaddr;
 106         char    *name;
 107 };
 108 
 109 #ifdef  __FreeBSD__
 110 #define CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
 111 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
 112 #else
 113 #define CREATE(x)       vm_do_ctl(VMM_CREATE_VM, (x))
 114 #define DESTROY(x)      vm_do_ctl(VMM_DESTROY_VM, (x))
 115 
 116 static int
 117 vm_do_ctl(int cmd, const char *name)
 118 {
 119         int ctl_fd;
 120 
 121         ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR);
 122         if (ctl_fd < 0) {
 123                 return (-1);
 124         }
 125 
 126         if (ioctl(ctl_fd, cmd, name) == -1) {
 127                 int err = errno;
 128 
 129                 /* Do not lose ioctl errno through the close(2) */
 130                 (void) close(ctl_fd);
 131                 errno = err;
 132                 return (-1);
 133         }
 134         (void) close(ctl_fd);
 135 
 136         return (0);
 137 }
 138 #endif
 139 
 140 static int
 141 vm_device_open(const char *name)
 142 {
 143         int fd, len;
 144         char *vmfile;
 145 
 146         len = strlen("/dev/vmm/") + strlen(name) + 1;
 147         vmfile = malloc(len);
 148         assert(vmfile != NULL);
 149         snprintf(vmfile, len, "/dev/vmm/%s", name);
 150 
 151         /* Open the device file */
 152         fd = open(vmfile, O_RDWR, 0);
 153 
 154         free(vmfile);
 155         return (fd);
 156 }
 157 
 158 int
 159 vm_create(const char *name)
 160 {
 161 #ifdef __FreeBSD__
 162         /* Try to load vmm(4) module before creating a guest. */
 163         if (modfind("vmm") < 0)
 164                 kldload("vmm");
 165 #endif
 166         return (CREATE((char *)name));
 167 }
 168 
 169 struct vmctx *
 170 vm_open(const char *name)
 171 {
 172         struct vmctx *vm;
 173 
 174         vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 175         assert(vm != NULL);
 176 
 177         vm->fd = -1;
 178         vm->memflags = 0;
 179         vm->lowmem_limit = 3 * GB;
 180         vm->name = (char *)(vm + 1);
 181         strcpy(vm->name, name);
 182 
 183         if ((vm->fd = vm_device_open(vm->name)) < 0)
 184                 goto err;
 185 
 186         return (vm);
 187 err:
 188         free(vm);
 189         return (NULL);
 190 }
 191 
 192 #ifndef __FreeBSD__
 193 void
 194 vm_close(struct vmctx *vm)
 195 {
 196         assert(vm != NULL);
 197         assert(vm->fd >= 0);
 198 
 199         (void) close(vm->fd);
 200 
 201         free(vm);
 202 }
 203 #endif
 204 
 205 void
 206 vm_destroy(struct vmctx *vm)
 207 {
 208         assert(vm != NULL);
 209 
 210         if (vm->fd >= 0)
 211                 close(vm->fd);
 212         DESTROY(vm->name);
 213 
 214         free(vm);
 215 }
 216 
 217 int
 218 vm_parse_memsize(const char *optarg, size_t *ret_memsize)
 219 {
 220         char *endptr;
 221         size_t optval;
 222         int error;
 223 
 224         optval = strtoul(optarg, &endptr, 0);
 225         if (*optarg != '\0' && *endptr == '\0') {
 226                 /*
 227                  * For the sake of backward compatibility if the memory size
 228                  * specified on the command line is less than a megabyte then
 229                  * it is interpreted as being in units of MB.
 230                  */
 231                 if (optval < MB)
 232                         optval *= MB;
 233                 *ret_memsize = optval;
 234                 error = 0;
 235         } else
 236                 error = expand_number(optarg, ret_memsize);
 237 
 238         return (error);
 239 }
 240 
 241 uint32_t
 242 vm_get_lowmem_limit(struct vmctx *ctx)
 243 {
 244 
 245         return (ctx->lowmem_limit);
 246 }
 247 
 248 void
 249 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
 250 {
 251 
 252         ctx->lowmem_limit = limit;
 253 }
 254 
 255 void
 256 vm_set_memflags(struct vmctx *ctx, int flags)
 257 {
 258 
 259         ctx->memflags = flags;
 260 }
 261 
 262 int
 263 vm_get_memflags(struct vmctx *ctx)
 264 {
 265 
 266         return (ctx->memflags);
 267 }
 268 
 269 /*
 270  * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
 271  */
 272 int
 273 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
 274     size_t len, int prot)
 275 {
 276         struct vm_memmap memmap;
 277         int error, flags;
 278 
 279         memmap.gpa = gpa;
 280         memmap.segid = segid;
 281         memmap.segoff = off;
 282         memmap.len = len;
 283         memmap.prot = prot;
 284         memmap.flags = 0;
 285 
 286         if (ctx->memflags & VM_MEM_F_WIRED)
 287                 memmap.flags |= VM_MEMMAP_F_WIRED;
 288 
 289         /*
 290          * If this mapping already exists then don't create it again. This
 291          * is the common case for SYSMEM mappings created by bhyveload(8).
 292          */
 293         error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
 294         if (error == 0 && gpa == memmap.gpa) {
 295                 if (segid != memmap.segid || off != memmap.segoff ||
 296                     prot != memmap.prot || flags != memmap.flags) {
 297                         errno = EEXIST;
 298                         return (-1);
 299                 } else {
 300                         return (0);
 301                 }
 302         }
 303 
 304         error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
 305         return (error);
 306 }
 307 
 308 int
 309 vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
 310 {
 311         struct vm_munmap munmap;
 312         int error;
 313 
 314         munmap.gpa = gpa;
 315         munmap.len = len;
 316 
 317         error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap);
 318         return (error);
 319 }
 320 
 321 int
 322 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
 323     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 324 {
 325         struct vm_memmap memmap;
 326         int error;
 327 
 328         bzero(&memmap, sizeof(struct vm_memmap));
 329         memmap.gpa = *gpa;
 330         error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
 331         if (error == 0) {
 332                 *gpa = memmap.gpa;
 333                 *segid = memmap.segid;
 334                 *segoff = memmap.segoff;
 335                 *len = memmap.len;
 336                 *prot = memmap.prot;
 337                 *flags = memmap.flags;
 338         }
 339         return (error);
 340 }
 341 
 342 /*
 343  * Return 0 if the segments are identical and non-zero otherwise.
 344  *
 345  * This is slightly complicated by the fact that only device memory segments
 346  * are named.
 347  */
 348 static int
 349 cmpseg(size_t len, const char *str, size_t len2, const char *str2)
 350 {
 351 
 352         if (len == len2) {
 353                 if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
 354                         return (0);
 355         }
 356         return (-1);
 357 }
 358 
 359 static int
 360 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
 361 {
 362         struct vm_memseg memseg;
 363         size_t n;
 364         int error;
 365 
 366         /*
 367          * If the memory segment has already been created then just return.
 368          * This is the usual case for the SYSMEM segment created by userspace
 369          * loaders like bhyveload(8).
 370          */
 371         error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
 372             sizeof(memseg.name));
 373         if (error)
 374                 return (error);
 375 
 376         if (memseg.len != 0) {
 377                 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
 378                         errno = EINVAL;
 379                         return (-1);
 380                 } else {
 381                         return (0);
 382                 }
 383         }
 384 
 385         bzero(&memseg, sizeof(struct vm_memseg));
 386         memseg.segid = segid;
 387         memseg.len = len;
 388         if (name != NULL) {
 389                 n = strlcpy(memseg.name, name, sizeof(memseg.name));
 390                 if (n >= sizeof(memseg.name)) {
 391                         errno = ENAMETOOLONG;
 392                         return (-1);
 393                 }
 394         }
 395 
 396         error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
 397         return (error);
 398 }
 399 
 400 int
 401 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
 402     size_t bufsize)
 403 {
 404         struct vm_memseg memseg;
 405         size_t n;
 406         int error;
 407 
 408         memseg.segid = segid;
 409         error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
 410         if (error == 0) {
 411                 *lenp = memseg.len;
 412                 n = strlcpy(namebuf, memseg.name, bufsize);
 413                 if (n >= bufsize) {
 414                         errno = ENAMETOOLONG;
 415                         error = -1;
 416                 }
 417         }
 418         return (error);
 419 }
 420 
 421 static int
 422 #ifdef __FreeBSD__
 423 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
 424 #else
 425 setup_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len,
 426     char *base)
 427 #endif
 428 {
 429         char *ptr;
 430         int error, flags;
 431 
 432         /* Map 'len' bytes starting at 'gpa' in the guest address space */
 433 #ifdef __FreeBSD__
 434         error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
 435 #else
 436         /*
 437          * As we use two segments for lowmem/highmem the offset within the
 438          * segment is 0 on illumos.
 439          */
 440         error = vm_mmap_memseg(ctx, gpa, segid, 0, len, PROT_ALL);
 441 #endif
 442         if (error)
 443                 return (error);
 444 
 445         flags = MAP_SHARED | MAP_FIXED;
 446         if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 447                 flags |= MAP_NOCORE;
 448 
 449         /* mmap into the process address space on the host */
 450         ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
 451         if (ptr == MAP_FAILED)
 452                 return (-1);
 453 
 454         return (0);
 455 }
 456 
 457 int
 458 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
 459 {
 460         size_t objsize, len;
 461         vm_paddr_t gpa;
 462         char *baseaddr, *ptr;
 463         int error;
 464 
 465         assert(vms == VM_MMAP_ALL);
 466 
 467         /*
 468          * If 'memsize' cannot fit entirely in the 'lowmem' segment then
 469          * create another 'highmem' segment above 4GB for the remainder.
 470          */
 471         if (memsize > ctx->lowmem_limit) {
 472                 ctx->lowmem = ctx->lowmem_limit;
 473                 ctx->highmem = memsize - ctx->lowmem_limit;
 474                 objsize = 4*GB + ctx->highmem;
 475         } else {
 476                 ctx->lowmem = memsize;
 477                 ctx->highmem = 0;
 478                 objsize = ctx->lowmem;
 479         }
 480 
 481 #ifdef __FreeBSD__
 482         error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
 483         if (error)
 484                 return (error);
 485 #endif
 486 
 487         /*
 488          * Stake out a contiguous region covering the guest physical memory
 489          * and the adjoining guard regions.
 490          */
 491         len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
 492         ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
 493         if (ptr == MAP_FAILED)
 494                 return (-1);
 495 
 496         baseaddr = ptr + VM_MMAP_GUARD_SIZE;
 497 
 498 #ifdef __FreeBSD__
 499         if (ctx->highmem > 0) {
 500                 gpa = 4*GB;
 501                 len = ctx->highmem;
 502                 error = setup_memory_segment(ctx, gpa, len, baseaddr);
 503                 if (error)
 504                         return (error);
 505         }
 506 
 507         if (ctx->lowmem > 0) {
 508                 gpa = 0;
 509                 len = ctx->lowmem;
 510                 error = setup_memory_segment(ctx, gpa, len, baseaddr);
 511                 if (error)
 512                         return (error);
 513         }
 514 #else
 515         if (ctx->highmem > 0) {
 516                 error = vm_alloc_memseg(ctx, VM_HIGHMEM, ctx->highmem, NULL);
 517                 if (error)
 518                         return (error);
 519                 gpa = 4*GB;
 520                 len = ctx->highmem;
 521                 error = setup_memory_segment(ctx, VM_HIGHMEM, gpa, len, baseaddr);
 522                 if (error)
 523                         return (error);
 524         }
 525 
 526         if (ctx->lowmem > 0) {
 527                 error = vm_alloc_memseg(ctx, VM_LOWMEM, ctx->lowmem, NULL);
 528                 if (error)
 529                         return (error);
 530                 gpa = 0;
 531                 len = ctx->lowmem;
 532                 error = setup_memory_segment(ctx, VM_LOWMEM, gpa, len, baseaddr);
 533                 if (error)
 534                         return (error);
 535         }
 536 #endif
 537 
 538         ctx->baseaddr = baseaddr;
 539 
 540         return (0);
 541 }
 542 
 543 /*
 544  * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
 545  * the lowmem or highmem regions.
 546  *
 547  * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
 548  * The instruction emulation code depends on this behavior.
 549  */
 550 void *
 551 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
 552 {
 553 
 554         if (ctx->lowmem > 0) {
 555                 if (gaddr < ctx->lowmem && len <= ctx->lowmem &&
 556                     gaddr + len <= ctx->lowmem)
 557                         return (ctx->baseaddr + gaddr);
 558         }
 559 
 560         if (ctx->highmem > 0) {
 561                 if (gaddr >= 4*GB) {
 562                         if (gaddr < 4*GB + ctx->highmem &&
 563                             len <= ctx->highmem &&
 564                             gaddr + len <= 4*GB + ctx->highmem)
 565                                 return (ctx->baseaddr + gaddr);
 566                 }
 567         }
 568 
 569         return (NULL);
 570 }
 571 
 572 size_t
 573 vm_get_lowmem_size(struct vmctx *ctx)
 574 {
 575 
 576         return (ctx->lowmem);
 577 }
 578 
 579 size_t
 580 vm_get_highmem_size(struct vmctx *ctx)
 581 {
 582 
 583         return (ctx->highmem);
 584 }
 585 
 586 #ifndef __FreeBSD__
 587 int
 588 vm_get_devmem_offset(struct vmctx *ctx, int segid, off_t *mapoff)
 589 {
 590         struct vm_devmem_offset vdo;
 591         int error;
 592 
 593         vdo.segid = segid;
 594         error = ioctl(ctx->fd, VM_DEVMEM_GETOFFSET, &vdo);
 595         if (error == 0)
 596                 *mapoff = vdo.offset;
 597 
 598         return (error);
 599 }
 600 #endif
 601 
 602 void *
 603 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
 604 {
 605 #ifdef  __FreeBSD__
 606         char pathname[MAXPATHLEN];
 607 #endif
 608         size_t len2;
 609         char *base, *ptr;
 610         int fd, error, flags;
 611         off_t mapoff;
 612 
 613         fd = -1;
 614         ptr = MAP_FAILED;
 615         if (name == NULL || strlen(name) == 0) {
 616                 errno = EINVAL;
 617                 goto done;
 618         }
 619 
 620         error = vm_alloc_memseg(ctx, segid, len, name);
 621         if (error)
 622                 goto done;
 623 
 624 #ifdef  __FreeBSD__
 625         strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
 626         strlcat(pathname, ctx->name, sizeof(pathname));
 627         strlcat(pathname, ".", sizeof(pathname));
 628         strlcat(pathname, name, sizeof(pathname));
 629 
 630         fd = open(pathname, O_RDWR);
 631         if (fd < 0)
 632                 goto done;
 633 #else
 634         if (vm_get_devmem_offset(ctx, segid, &mapoff) != 0)
 635                 goto done;
 636 #endif
 637 
 638         /*
 639          * Stake out a contiguous region covering the device memory and the
 640          * adjoining guard regions.
 641          */
 642         len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
 643         base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1,
 644             0);
 645         if (base == MAP_FAILED)
 646                 goto done;
 647 
 648         flags = MAP_SHARED | MAP_FIXED;
 649         if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 650                 flags |= MAP_NOCORE;
 651 
 652 #ifdef  __FreeBSD__
 653         /* mmap the devmem region in the host address space */
 654         ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
 655 #else
 656         /* mmap the devmem region in the host address space */
 657         ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, ctx->fd,
 658             mapoff);
 659 #endif
 660 done:
 661         if (fd >= 0)
 662                 close(fd);
 663         return (ptr);
 664 }
 665 
 666 int
 667 vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 668             uint64_t base, uint32_t limit, uint32_t access)
 669 {
 670         int error;
 671         struct vm_seg_desc vmsegdesc;
 672 
 673         bzero(&vmsegdesc, sizeof(vmsegdesc));
 674         vmsegdesc.cpuid = vcpu;
 675         vmsegdesc.regnum = reg;
 676         vmsegdesc.desc.base = base;
 677         vmsegdesc.desc.limit = limit;
 678         vmsegdesc.desc.access = access;
 679 
 680         error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 681         return (error);
 682 }
 683 
 684 int
 685 vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 686             uint64_t *base, uint32_t *limit, uint32_t *access)
 687 {
 688         int error;
 689         struct vm_seg_desc vmsegdesc;
 690 
 691         bzero(&vmsegdesc, sizeof(vmsegdesc));
 692         vmsegdesc.cpuid = vcpu;
 693         vmsegdesc.regnum = reg;
 694 
 695         error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 696         if (error == 0) {
 697                 *base = vmsegdesc.desc.base;
 698                 *limit = vmsegdesc.desc.limit;
 699                 *access = vmsegdesc.desc.access;
 700         }
 701         return (error);
 702 }
 703 
 704 int
 705 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
 706 {
 707         int error;
 708 
 709         error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
 710             &seg_desc->access);
 711         return (error);
 712 }
 713 
 714 int
 715 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 716 {
 717         int error;
 718         struct vm_register vmreg;
 719 
 720         bzero(&vmreg, sizeof(vmreg));
 721         vmreg.cpuid = vcpu;
 722         vmreg.regnum = reg;
 723         vmreg.regval = val;
 724 
 725         error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 726         return (error);
 727 }
 728 
 729 int
 730 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 731 {
 732         int error;
 733         struct vm_register vmreg;
 734 
 735         bzero(&vmreg, sizeof(vmreg));
 736         vmreg.cpuid = vcpu;
 737         vmreg.regnum = reg;
 738 
 739         error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 740         *ret_val = vmreg.regval;
 741         return (error);
 742 }
 743 
 744 int
 745 vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
 746     const int *regnums, uint64_t *regvals)
 747 {
 748         int error;
 749         struct vm_register_set vmregset;
 750 
 751         bzero(&vmregset, sizeof(vmregset));
 752         vmregset.cpuid = vcpu;
 753         vmregset.count = count;
 754         vmregset.regnums = regnums;
 755         vmregset.regvals = regvals;
 756 
 757         error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset);
 758         return (error);
 759 }
 760 
 761 int
 762 vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
 763     const int *regnums, uint64_t *regvals)
 764 {
 765         int error;
 766         struct vm_register_set vmregset;
 767 
 768         bzero(&vmregset, sizeof(vmregset));
 769         vmregset.cpuid = vcpu;
 770         vmregset.count = count;
 771         vmregset.regnums = regnums;
 772         vmregset.regvals = regvals;
 773 
 774         error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset);
 775         return (error);
 776 }
 777 
 778 int
 779 vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry,
 780     struct vm_exit *vm_exit)
 781 {
 782         struct vm_entry entry;
 783 
 784         bcopy(vm_entry, &entry, sizeof (entry));
 785         entry.cpuid = vcpu;
 786         entry.exit_data = vm_exit;
 787 
 788         return (ioctl(ctx->fd, VM_RUN, &entry));
 789 }
 790 
 791 int
 792 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
 793 {
 794         struct vm_suspend vmsuspend;
 795 
 796         bzero(&vmsuspend, sizeof(vmsuspend));
 797         vmsuspend.how = how;
 798         return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
 799 }
 800 
 801 int
 802 vm_reinit(struct vmctx *ctx)
 803 {
 804 
 805         return (ioctl(ctx->fd, VM_REINIT, 0));
 806 }
 807 
 808 int
 809 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
 810     uint32_t errcode, int restart_instruction)
 811 {
 812         struct vm_exception exc;
 813 
 814         exc.cpuid = vcpu;
 815         exc.vector = vector;
 816         exc.error_code = errcode;
 817         exc.error_code_valid = errcode_valid;
 818         exc.restart_instruction = restart_instruction;
 819 
 820         return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
 821 }
 822 
 823 #ifndef __FreeBSD__
 824 void
 825 vm_inject_fault(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
 826     int errcode)
 827 {
 828         int error;
 829         struct vm_exception exc;
 830 
 831         exc.cpuid = vcpu;
 832         exc.vector = vector;
 833         exc.error_code = errcode;
 834         exc.error_code_valid = errcode_valid;
 835         exc.restart_instruction = 1;
 836         error = ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc);
 837 
 838         assert(error == 0);
 839 }
 840 #endif /* __FreeBSD__ */
 841 
 842 int
 843 vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 844 {
 845         /*
 846          * The apic id associated with the 'vcpu' has the same numerical value
 847          * as the 'vcpu' itself.
 848          */
 849         return (apicid);
 850 }
 851 
 852 int
 853 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 854 {
 855         struct vm_lapic_irq vmirq;
 856 
 857         bzero(&vmirq, sizeof(vmirq));
 858         vmirq.cpuid = vcpu;
 859         vmirq.vector = vector;
 860 
 861         return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 862 }
 863 
 864 int
 865 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
 866 {
 867         struct vm_lapic_irq vmirq;
 868 
 869         bzero(&vmirq, sizeof(vmirq));
 870         vmirq.cpuid = vcpu;
 871         vmirq.vector = vector;
 872 
 873         return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
 874 }
 875 
 876 int
 877 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
 878 {
 879         struct vm_lapic_msi vmmsi;
 880 
 881         bzero(&vmmsi, sizeof(vmmsi));
 882         vmmsi.addr = addr;
 883         vmmsi.msg = msg;
 884 
 885         return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
 886 }
 887 
 888 int
 889 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 890 {
 891         struct vm_ioapic_irq ioapic_irq;
 892 
 893         bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 894         ioapic_irq.irq = irq;
 895 
 896         return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
 897 }
 898 
 899 int
 900 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
 901 {
 902         struct vm_ioapic_irq ioapic_irq;
 903 
 904         bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 905         ioapic_irq.irq = irq;
 906 
 907         return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
 908 }
 909 
 910 int
 911 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
 912 {
 913         struct vm_ioapic_irq ioapic_irq;
 914 
 915         bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 916         ioapic_irq.irq = irq;
 917 
 918         return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
 919 }
 920 
 921 int
 922 vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
 923 {
 924 
 925         return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
 926 }
 927 
 928 int
 929 vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa,
 930     bool write, int size, uint64_t *value)
 931 {
 932         struct vm_readwrite_kernemu_device irp = {
 933                 .vcpuid = vcpu,
 934                 .access_width = fls(size) - 1,
 935                 .gpa = gpa,
 936                 .value = write ? *value : ~0ul,
 937         };
 938         long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV);
 939         int rc;
 940 
 941         rc = ioctl(ctx->fd, cmd, &irp);
 942         if (rc == 0 && !write)
 943                 *value = irp.value;
 944         return (rc);
 945 }
 946 
 947 int
 948 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 949 {
 950         struct vm_isa_irq isa_irq;
 951 
 952         bzero(&isa_irq, sizeof(struct vm_isa_irq));
 953         isa_irq.atpic_irq = atpic_irq;
 954         isa_irq.ioapic_irq = ioapic_irq;
 955 
 956         return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
 957 }
 958 
 959 int
 960 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 961 {
 962         struct vm_isa_irq isa_irq;
 963 
 964         bzero(&isa_irq, sizeof(struct vm_isa_irq));
 965         isa_irq.atpic_irq = atpic_irq;
 966         isa_irq.ioapic_irq = ioapic_irq;
 967 
 968         return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
 969 }
 970 
 971 int
 972 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 973 {
 974         struct vm_isa_irq isa_irq;
 975 
 976         bzero(&isa_irq, sizeof(struct vm_isa_irq));
 977         isa_irq.atpic_irq = atpic_irq;
 978         isa_irq.ioapic_irq = ioapic_irq;
 979 
 980         return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
 981 }
 982 
 983 int
 984 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
 985     enum vm_intr_trigger trigger)
 986 {
 987         struct vm_isa_irq_trigger isa_irq_trigger;
 988 
 989         bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
 990         isa_irq_trigger.atpic_irq = atpic_irq;
 991         isa_irq_trigger.trigger = trigger;
 992 
 993         return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
 994 }
 995 
 996 int
 997 vm_inject_nmi(struct vmctx *ctx, int vcpu)
 998 {
 999         struct vm_nmi vmnmi;
1000 
1001         bzero(&vmnmi, sizeof(vmnmi));
1002         vmnmi.cpuid = vcpu;
1003 
1004         return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
1005 }
1006 
1007 static const char *capstrmap[] = {
1008         [VM_CAP_HALT_EXIT]  = "hlt_exit",
1009         [VM_CAP_MTRAP_EXIT] = "mtrap_exit",
1010         [VM_CAP_PAUSE_EXIT] = "pause_exit",
1011 #ifdef __FreeBSD__
1012         [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest",
1013 #endif
1014         [VM_CAP_ENABLE_INVPCID] = "enable_invpcid",
1015         [VM_CAP_BPT_EXIT] = "bpt_exit",
1016 };
1017 
1018 int
1019 vm_capability_name2type(const char *capname)
1020 {
1021         int i;
1022 
1023         for (i = 0; i < nitems(capstrmap); i++) {
1024                 if (strcmp(capstrmap[i], capname) == 0)
1025                         return (i);
1026         }
1027 
1028         return (-1);
1029 }
1030 
1031 const char *
1032 vm_capability_type2name(int type)
1033 {
1034         if (type >= 0 && type < nitems(capstrmap))
1035                 return (capstrmap[type]);
1036 
1037         return (NULL);
1038 }
1039 
1040 int
1041 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
1042                   int *retval)
1043 {
1044         int error;
1045         struct vm_capability vmcap;
1046 
1047         bzero(&vmcap, sizeof(vmcap));
1048         vmcap.cpuid = vcpu;
1049         vmcap.captype = cap;
1050 
1051         error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
1052         *retval = vmcap.capval;
1053         return (error);
1054 }
1055 
1056 int
1057 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
1058 {
1059         struct vm_capability vmcap;
1060 
1061         bzero(&vmcap, sizeof(vmcap));
1062         vmcap.cpuid = vcpu;
1063         vmcap.captype = cap;
1064         vmcap.capval = val;
1065 
1066         return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
1067 }
1068 
1069 #ifdef __FreeBSD__
1070 int
1071 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
1072 {
1073         struct vm_pptdev pptdev;
1074 
1075         bzero(&pptdev, sizeof(pptdev));
1076         pptdev.bus = bus;
1077         pptdev.slot = slot;
1078         pptdev.func = func;
1079 
1080         return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
1081 }
1082 
1083 int
1084 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
1085 {
1086         struct vm_pptdev pptdev;
1087 
1088         bzero(&pptdev, sizeof(pptdev));
1089         pptdev.bus = bus;
1090         pptdev.slot = slot;
1091         pptdev.func = func;
1092 
1093         return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
1094 }
1095 
1096 int
1097 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
1098                    vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
1099 {
1100         struct vm_pptdev_mmio pptmmio;
1101 
1102         bzero(&pptmmio, sizeof(pptmmio));
1103         pptmmio.bus = bus;
1104         pptmmio.slot = slot;
1105         pptmmio.func = func;
1106         pptmmio.gpa = gpa;
1107         pptmmio.len = len;
1108         pptmmio.hpa = hpa;
1109 
1110         return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
1111 }
1112 
1113 int
1114 vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
1115                      vm_paddr_t gpa, size_t len)
1116 {
1117         struct vm_pptdev_mmio pptmmio;
1118 
1119         bzero(&pptmmio, sizeof(pptmmio));
1120         pptmmio.bus = bus;
1121         pptmmio.slot = slot;
1122         pptmmio.func = func;
1123         pptmmio.gpa = gpa;
1124         pptmmio.len = len;
1125 
1126         return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
1127 }
1128 
1129 int
1130 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
1131     uint64_t addr, uint64_t msg, int numvec)
1132 {
1133         struct vm_pptdev_msi pptmsi;
1134 
1135         bzero(&pptmsi, sizeof(pptmsi));
1136         pptmsi.vcpu = vcpu;
1137         pptmsi.bus = bus;
1138         pptmsi.slot = slot;
1139         pptmsi.func = func;
1140         pptmsi.msg = msg;
1141         pptmsi.addr = addr;
1142         pptmsi.numvec = numvec;
1143 
1144         return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
1145 }
1146 
1147 int
1148 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
1149     int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
1150 {
1151         struct vm_pptdev_msix pptmsix;
1152 
1153         bzero(&pptmsix, sizeof(pptmsix));
1154         pptmsix.vcpu = vcpu;
1155         pptmsix.bus = bus;
1156         pptmsix.slot = slot;
1157         pptmsix.func = func;
1158         pptmsix.idx = idx;
1159         pptmsix.msg = msg;
1160         pptmsix.addr = addr;
1161         pptmsix.vector_control = vector_control;
1162 
1163         return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
1164 }
1165 
1166 int
1167 vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func,
1168     int *msi_limit, int *msix_limit)
1169 {
1170         struct vm_pptdev_limits pptlimits;
1171         int error;
1172 
1173         bzero(&pptlimits, sizeof (pptlimits));
1174         pptlimits.bus = bus;
1175         pptlimits.slot = slot;
1176         pptlimits.func = func;
1177 
1178         error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits);
1179 
1180         *msi_limit = pptlimits.msi_limit;
1181         *msix_limit = pptlimits.msix_limit;
1182 
1183         return (error);
1184 }
1185 
1186 int
1187 vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func)
1188 {
1189         struct vm_pptdev ppt;
1190 
1191         bzero(&ppt, sizeof(ppt));
1192         ppt.bus = bus;
1193         ppt.slot = slot;
1194         ppt.func = func;
1195 
1196         return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt);
1197 }
1198 
1199 #else /* __FreeBSD__ */
1200 
1201 int
1202 vm_assign_pptdev(struct vmctx *ctx, int pptfd)
1203 {
1204         struct vm_pptdev pptdev;
1205 
1206         pptdev.pptfd = pptfd;
1207         return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
1208 }
1209 
1210 int
1211 vm_unassign_pptdev(struct vmctx *ctx, int pptfd)
1212 {
1213         struct vm_pptdev pptdev;
1214 
1215         pptdev.pptfd = pptfd;
1216         return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
1217 }
1218 
1219 int
1220 vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len,
1221     vm_paddr_t hpa)
1222 {
1223         struct vm_pptdev_mmio pptmmio;
1224 
1225         pptmmio.pptfd = pptfd;
1226         pptmmio.gpa = gpa;
1227         pptmmio.len = len;
1228         pptmmio.hpa = hpa;
1229         return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
1230 }
1231 
1232 int
1233 vm_unmap_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len)
1234 {
1235         struct vm_pptdev_mmio pptmmio;
1236 
1237         bzero(&pptmmio, sizeof(pptmmio));
1238         pptmmio.pptfd = pptfd;
1239         pptmmio.gpa = gpa;
1240         pptmmio.len = len;
1241 
1242         return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
1243 }
1244 
1245 int
1246 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr,
1247     uint64_t msg, int numvec)
1248 {
1249         struct vm_pptdev_msi pptmsi;
1250 
1251         pptmsi.vcpu = vcpu;
1252         pptmsi.pptfd = pptfd;
1253         pptmsi.msg = msg;
1254         pptmsi.addr = addr;
1255         pptmsi.numvec = numvec;
1256         return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
1257 }
1258 
1259 int
1260 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx,
1261     uint64_t addr, uint64_t msg, uint32_t vector_control)
1262 {
1263         struct vm_pptdev_msix pptmsix;
1264 
1265         pptmsix.vcpu = vcpu;
1266         pptmsix.pptfd = pptfd;
1267         pptmsix.idx = idx;
1268         pptmsix.msg = msg;
1269         pptmsix.addr = addr;
1270         pptmsix.vector_control = vector_control;
1271         return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
1272 }
1273 
1274 int
1275 vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit,
1276     int *msix_limit)
1277 {
1278         struct vm_pptdev_limits pptlimits;
1279         int error;
1280 
1281         bzero(&pptlimits, sizeof (pptlimits));
1282         pptlimits.pptfd = pptfd;
1283         error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits);
1284 
1285         *msi_limit = pptlimits.msi_limit;
1286         *msix_limit = pptlimits.msix_limit;
1287         return (error);
1288 }
1289 
1290 int
1291 vm_disable_pptdev_msix(struct vmctx *ctx, int pptfd)
1292 {
1293         struct vm_pptdev pptdev;
1294 
1295         pptdev.pptfd = pptfd;
1296         return (ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &pptdev));
1297 }
1298 #endif /* __FreeBSD__ */
1299 
1300 uint64_t *
1301 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
1302              int *ret_entries)
1303 {
1304         int error;
1305 
1306         static struct vm_stats vmstats;
1307 
1308         vmstats.cpuid = vcpu;
1309 
1310         error = ioctl(ctx->fd, VM_STATS_IOC, &vmstats);
1311         if (error == 0) {
1312                 if (ret_entries)
1313                         *ret_entries = vmstats.num_entries;
1314                 if (ret_tv)
1315                         *ret_tv = vmstats.tv;
1316                 return (vmstats.statbuf);
1317         } else
1318                 return (NULL);
1319 }
1320 
1321 const char *
1322 vm_get_stat_desc(struct vmctx *ctx, int index)
1323 {
1324         static struct vm_stat_desc statdesc;
1325 
1326         statdesc.index = index;
1327         if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
1328                 return (statdesc.desc);
1329         else
1330                 return (NULL);
1331 }
1332 
1333 int
1334 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
1335 {
1336         int error;
1337         struct vm_x2apic x2apic;
1338 
1339         bzero(&x2apic, sizeof(x2apic));
1340         x2apic.cpuid = vcpu;
1341 
1342         error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
1343         *state = x2apic.state;
1344         return (error);
1345 }
1346 
1347 int
1348 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
1349 {
1350         int error;
1351         struct vm_x2apic x2apic;
1352 
1353         bzero(&x2apic, sizeof(x2apic));
1354         x2apic.cpuid = vcpu;
1355         x2apic.state = state;
1356 
1357         error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
1358 
1359         return (error);
1360 }
1361 
1362 #ifndef __FreeBSD__
1363 int
1364 vcpu_reset(struct vmctx *vmctx, int vcpu)
1365 {
1366         struct vm_vcpu_reset vvr;
1367 
1368         vvr.vcpuid = vcpu;
1369         vvr.kind = VRK_RESET;
1370 
1371         return (ioctl(vmctx->fd, VM_RESET_CPU, &vvr));
1372 }
1373 #else /* __FreeBSD__ */
1374 /*
1375  * From Intel Vol 3a:
1376  * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
1377  */
1378 int
1379 vcpu_reset(struct vmctx *vmctx, int vcpu)
1380 {
1381         int error;
1382         uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
1383         uint32_t desc_access, desc_limit;
1384         uint16_t sel;
1385 
1386         zero = 0;
1387 
1388         rflags = 0x2;
1389         error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
1390         if (error)
1391                 goto done;
1392 
1393         rip = 0xfff0;
1394         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
1395                 goto done;
1396 
1397         cr0 = CR0_NE;
1398         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
1399                 goto done;
1400 
1401         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
1402                 goto done;
1403         
1404         cr4 = 0;
1405         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
1406                 goto done;
1407 
1408         /*
1409          * CS: present, r/w, accessed, 16-bit, byte granularity, usable
1410          */
1411         desc_base = 0xffff0000;
1412         desc_limit = 0xffff;
1413         desc_access = 0x0093;
1414         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
1415                             desc_base, desc_limit, desc_access);
1416         if (error)
1417                 goto done;
1418 
1419         sel = 0xf000;
1420         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
1421                 goto done;
1422 
1423         /*
1424          * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
1425          */
1426         desc_base = 0;
1427         desc_limit = 0xffff;
1428         desc_access = 0x0093;
1429         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
1430                             desc_base, desc_limit, desc_access);
1431         if (error)
1432                 goto done;
1433 
1434         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
1435                             desc_base, desc_limit, desc_access);
1436         if (error)
1437                 goto done;
1438 
1439         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
1440                             desc_base, desc_limit, desc_access);
1441         if (error)
1442                 goto done;
1443 
1444         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
1445                             desc_base, desc_limit, desc_access);
1446         if (error)
1447                 goto done;
1448 
1449         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
1450                             desc_base, desc_limit, desc_access);
1451         if (error)
1452                 goto done;
1453 
1454         sel = 0;
1455         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
1456                 goto done;
1457         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
1458                 goto done;
1459         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
1460                 goto done;
1461         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
1462                 goto done;
1463         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
1464                 goto done;
1465 
1466         /* General purpose registers */
1467         rdx = 0xf00;
1468         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
1469                 goto done;
1470         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
1471                 goto done;
1472         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
1473                 goto done;
1474         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
1475                 goto done;
1476         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
1477                 goto done;
1478         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
1479                 goto done;
1480         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
1481                 goto done;
1482         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
1483                 goto done;
1484 
1485         /* GDTR, IDTR */
1486         desc_base = 0;
1487         desc_limit = 0xffff;
1488         desc_access = 0;
1489         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
1490                             desc_base, desc_limit, desc_access);
1491         if (error != 0)
1492                 goto done;
1493 
1494         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
1495                             desc_base, desc_limit, desc_access);
1496         if (error != 0)
1497                 goto done;
1498 
1499         /* TR */
1500         desc_base = 0;
1501         desc_limit = 0xffff;
1502         desc_access = 0x0000008b;
1503         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
1504         if (error)
1505                 goto done;
1506 
1507         sel = 0;
1508         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
1509                 goto done;
1510 
1511         /* LDTR */
1512         desc_base = 0;
1513         desc_limit = 0xffff;
1514         desc_access = 0x00000082;
1515         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
1516                             desc_limit, desc_access);
1517         if (error)
1518                 goto done;
1519 
1520         sel = 0;
1521         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
1522                 goto done;
1523 
1524         /* XXX cr2, debug registers */
1525 
1526         error = 0;
1527 done:
1528         return (error);
1529 }
1530 #endif /* __FreeBSD__ */
1531 
1532 int
1533 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
1534 {
1535         int error, i;
1536         struct vm_gpa_pte gpapte;
1537 
1538         bzero(&gpapte, sizeof(gpapte));
1539         gpapte.gpa = gpa;
1540 
1541         error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
1542 
1543         if (error == 0) {
1544                 *num = gpapte.ptenum;
1545                 for (i = 0; i < gpapte.ptenum; i++)
1546                         pte[i] = gpapte.pte[i];
1547         }
1548 
1549         return (error);
1550 }
1551 
1552 int
1553 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
1554 {
1555         int error;
1556         struct vm_hpet_cap cap;
1557 
1558         bzero(&cap, sizeof(struct vm_hpet_cap));
1559         error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
1560         if (capabilities != NULL)
1561                 *capabilities = cap.capabilities;
1562         return (error);
1563 }
1564 
1565 int
1566 vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1567     uint64_t gla, int prot, uint64_t *gpa, int *fault)
1568 {
1569         struct vm_gla2gpa gg;
1570         int error;
1571 
1572         bzero(&gg, sizeof(struct vm_gla2gpa));
1573         gg.vcpuid = vcpu;
1574         gg.prot = prot;
1575         gg.gla = gla;
1576         gg.paging = *paging;
1577 
1578         error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
1579         if (error == 0) {
1580                 *fault = gg.fault;
1581                 *gpa = gg.gpa;
1582         }
1583         return (error);
1584 }
1585 
1586 int
1587 vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1588     uint64_t gla, int prot, uint64_t *gpa, int *fault)
1589 {
1590         struct vm_gla2gpa gg;
1591         int error;
1592 
1593         bzero(&gg, sizeof(struct vm_gla2gpa));
1594         gg.vcpuid = vcpu;
1595         gg.prot = prot;
1596         gg.gla = gla;
1597         gg.paging = *paging;
1598 
1599         error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg);
1600         if (error == 0) {
1601                 *fault = gg.fault;
1602                 *gpa = gg.gpa;
1603         }
1604         return (error);
1605 }
1606 
1607 #ifndef min
1608 #define min(a,b)        (((a) < (b)) ? (a) : (b))
1609 #endif
1610 
1611 int
1612 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1613     uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
1614     int *fault)
1615 {
1616         void *va;
1617         uint64_t gpa;
1618         int error, i, n, off;
1619 
1620         for (i = 0; i < iovcnt; i++) {
1621                 iov[i].iov_base = 0;
1622                 iov[i].iov_len = 0;
1623         }
1624 
1625         while (len) {
1626                 assert(iovcnt > 0);
1627                 error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault);
1628                 if (error || *fault)
1629                         return (error);
1630 
1631                 off = gpa & PAGE_MASK;
1632                 n = min(len, PAGE_SIZE - off);
1633 
1634                 va = vm_map_gpa(ctx, gpa, n);
1635                 if (va == NULL)
1636                         return (EFAULT);
1637 
1638                 iov->iov_base = va;
1639                 iov->iov_len = n;
1640                 iov++;
1641                 iovcnt--;
1642 
1643                 gla += n;
1644                 len -= n;
1645         }
1646         return (0);
1647 }
1648 
1649 void
1650 vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
1651 {
1652 
1653         return;
1654 }
1655 
1656 void
1657 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
1658 {
1659         const char *src;
1660         char *dst;
1661         size_t n;
1662 
1663         dst = vp;
1664         while (len) {
1665                 assert(iov->iov_len);
1666                 n = min(len, iov->iov_len);
1667                 src = iov->iov_base;
1668                 bcopy(src, dst, n);
1669 
1670                 iov++;
1671                 dst += n;
1672                 len -= n;
1673         }
1674 }
1675 
1676 void
1677 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
1678     size_t len)
1679 {
1680         const char *src;
1681         char *dst;
1682         size_t n;
1683 
1684         src = vp;
1685         while (len) {
1686                 assert(iov->iov_len);
1687                 n = min(len, iov->iov_len);
1688                 dst = iov->iov_base;
1689                 bcopy(src, dst, n);
1690 
1691                 iov++;
1692                 src += n;
1693                 len -= n;
1694         }
1695 }
1696 
1697 static int
1698 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus)
1699 {
1700         struct vm_cpuset vm_cpuset;
1701         int error;
1702 
1703         bzero(&vm_cpuset, sizeof(struct vm_cpuset));
1704         vm_cpuset.which = which;
1705         vm_cpuset.cpusetsize = sizeof(cpuset_t);
1706         vm_cpuset.cpus = cpus;
1707 
1708         error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset);
1709         return (error);
1710 }
1711 
1712 int
1713 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus)
1714 {
1715 
1716         return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus));
1717 }
1718 
1719 int
1720 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
1721 {
1722 
1723         return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus));
1724 }
1725 
1726 int
1727 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
1728 {
1729 
1730         return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
1731 }
1732 
1733 int
1734 vm_activate_cpu(struct vmctx *ctx, int vcpu)
1735 {
1736         struct vm_activate_cpu ac;
1737         int error;
1738 
1739         bzero(&ac, sizeof(struct vm_activate_cpu));
1740         ac.vcpuid = vcpu;
1741         error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
1742         return (error);
1743 }
1744 
1745 int
1746 vm_suspend_cpu(struct vmctx *ctx, int vcpu)
1747 {
1748         struct vm_activate_cpu ac;
1749         int error;
1750 
1751         bzero(&ac, sizeof(struct vm_activate_cpu));
1752         ac.vcpuid = vcpu;
1753         error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
1754         return (error);
1755 }
1756 
1757 int
1758 vm_resume_cpu(struct vmctx *ctx, int vcpu)
1759 {
1760         struct vm_activate_cpu ac;
1761         int error;
1762 
1763         bzero(&ac, sizeof(struct vm_activate_cpu));
1764         ac.vcpuid = vcpu;
1765         error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
1766         return (error);
1767 }
1768 
1769 int
1770 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
1771 {
1772         struct vm_intinfo vmii;
1773         int error;
1774 
1775         bzero(&vmii, sizeof(struct vm_intinfo));
1776         vmii.vcpuid = vcpu;
1777         error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
1778         if (error == 0) {
1779                 *info1 = vmii.info1;
1780                 *info2 = vmii.info2;
1781         }
1782         return (error);
1783 }
1784 
1785 int
1786 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
1787 {
1788         struct vm_intinfo vmii;
1789         int error;
1790 
1791         bzero(&vmii, sizeof(struct vm_intinfo));
1792         vmii.vcpuid = vcpu;
1793         vmii.info1 = info1;
1794         error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
1795         return (error);
1796 }
1797 
1798 int
1799 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
1800 {
1801         struct vm_rtc_data rtcdata;
1802         int error;
1803 
1804         bzero(&rtcdata, sizeof(struct vm_rtc_data));
1805         rtcdata.offset = offset;
1806         rtcdata.value = value;
1807         error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
1808         return (error);
1809 }
1810 
1811 int
1812 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
1813 {
1814         struct vm_rtc_data rtcdata;
1815         int error;
1816 
1817         bzero(&rtcdata, sizeof(struct vm_rtc_data));
1818         rtcdata.offset = offset;
1819         error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
1820         if (error == 0)
1821                 *retval = rtcdata.value;
1822         return (error);
1823 }
1824 
1825 int
1826 vm_rtc_settime(struct vmctx *ctx, time_t secs)
1827 {
1828         struct vm_rtc_time rtctime;
1829         int error;
1830 
1831         bzero(&rtctime, sizeof(struct vm_rtc_time));
1832         rtctime.secs = secs;
1833         error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
1834         return (error);
1835 }
1836 
1837 int
1838 vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
1839 {
1840         struct vm_rtc_time rtctime;
1841         int error;
1842 
1843         bzero(&rtctime, sizeof(struct vm_rtc_time));
1844         error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
1845         if (error == 0)
1846                 *secs = rtctime.secs;
1847         return (error);
1848 }
1849 
1850 int
1851 vm_restart_instruction(void *arg, int vcpu)
1852 {
1853         struct vmctx *ctx = arg;
1854 
1855         return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
1856 }
1857 
1858 int
1859 vm_set_topology(struct vmctx *ctx,
1860     uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
1861 {
1862         struct vm_cpu_topology topology;
1863 
1864         bzero(&topology, sizeof (struct vm_cpu_topology));
1865         topology.sockets = sockets;
1866         topology.cores = cores;
1867         topology.threads = threads;
1868         topology.maxcpus = maxcpus;
1869         return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
1870 }
1871 
1872 int
1873 vm_get_topology(struct vmctx *ctx,
1874     uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
1875 {
1876         struct vm_cpu_topology topology;
1877         int error;
1878 
1879         bzero(&topology, sizeof (struct vm_cpu_topology));
1880         error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
1881         if (error == 0) {
1882                 *sockets = topology.sockets;
1883                 *cores = topology.cores;
1884                 *threads = topology.threads;
1885                 *maxcpus = topology.maxcpus;
1886         }
1887         return (error);
1888 }
1889 
1890 int
1891 vm_get_device_fd(struct vmctx *ctx)
1892 {
1893 
1894         return (ctx->fd);
1895 }
1896 
1897 #ifndef __FreeBSD__
1898 int
1899 vm_pmtmr_set_location(struct vmctx *ctx, uint16_t ioport)
1900 {
1901         return (ioctl(ctx->fd, VM_PMTMR_LOCATE, ioport));
1902 }
1903 
1904 int
1905 vm_wrlock_cycle(struct vmctx *ctx)
1906 {
1907         if (ioctl(ctx->fd, VM_WRLOCK_CYCLE, 0) != 0) {
1908                 return (errno);
1909         }
1910         return (0);
1911 }
1912 
1913 int
1914 vm_get_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state *state,
1915     uint8_t *sipi_vector)
1916 {
1917         struct vm_run_state data;
1918 
1919         data.vcpuid = vcpu;
1920         if (ioctl(ctx->fd, VM_GET_RUN_STATE, &data) != 0) {
1921                 return (errno);
1922         }
1923 
1924         *state = data.state;
1925         *sipi_vector = data.sipi_vector;
1926         return (0);
1927 }
1928 
1929 int
1930 vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state,
1931     uint8_t sipi_vector)
1932 {
1933         struct vm_run_state data;
1934 
1935         data.vcpuid = vcpu;
1936         data.state = state;
1937         data.sipi_vector = sipi_vector;
1938         if (ioctl(ctx->fd, VM_SET_RUN_STATE, &data) != 0) {
1939                 return (errno);
1940         }
1941 
1942         return (0);
1943 }
1944 
1945 int
1946 vm_arc_resv(struct vmctx *ctx, size_t len)
1947 {
1948         if (ioctl(ctx->fd, VM_ARC_RESV, (uint64_t)len) != 0) {
1949                 return (errno);
1950         }
1951         return (0);
1952 }
1953 #endif /* __FreeBSD__ */
1954 
1955 #ifdef __FreeBSD__
1956 const cap_ioctl_t *
1957 vm_get_ioctls(size_t *len)
1958 {
1959         cap_ioctl_t *cmds;
1960         /* keep in sync with machine/vmm_dev.h */
1961         static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
1962             VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
1963             VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER,
1964             VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
1965             VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
1966             VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV,
1967             VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
1968             VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
1969             VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
1970             VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
1971             VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
1972             VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
1973             VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
1974             VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
1975             VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
1976             VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
1977             VM_GLA2GPA_NOFAULT,
1978             VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
1979             VM_SET_INTINFO, VM_GET_INTINFO,
1980             VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
1981             VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
1982 
1983         if (len == NULL) {
1984                 cmds = malloc(sizeof(vm_ioctl_cmds));
1985                 if (cmds == NULL)
1986                         return (NULL);
1987                 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds));
1988                 return (cmds);
1989         }
1990 
1991         *len = nitems(vm_ioctl_cmds);
1992         return (NULL);
1993 }
1994 #endif /* __FreeBSD__ */