8005-backout Wdiff usr/src/lib/libvmmapi/common/vmmapi.c

Print this page

Revert "OS-8005 bhyve memory pressure needs to target ARC better (#354)"
This reverts commit a6033573eedd94118d2b9e65f45deca0bf4b42f7.

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libvmmapi/common/vmmapi.c
          +++ new/usr/src/lib/libvmmapi/common/vmmapi.c

   1    1  /*-
   2    2   * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3    3   *
   4    4   * Copyright (c) 2011 NetApp, Inc.
   5    5   * All rights reserved.
   6    6   *
   7    7   * Redistribution and use in source and binary forms, with or without
   8    8   * modification, are permitted provided that the following conditions
   9    9   * are met:
  10   10   * 1. Redistributions of source code must retain the above copyright
  11   11   *    notice, this list of conditions and the following disclaimer.
  12   12   * 2. Redistributions in binary form must reproduce the above copyright
  13   13   *    notice, this list of conditions and the following disclaimer in the
  14   14   *    documentation and/or other materials provided with the distribution.
  15   15   *
  16   16   * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  17   17   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18   18   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19   19   * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  20   20   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21   21   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22   22   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23   23   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24   24   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25   25   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26   26   * SUCH DAMAGE.
  27   27   *
  28   28   * $FreeBSD$
  29   29   */
  30   30  /*
  31   31   * This file and its contents are supplied under the terms of the
  32   32   * Common Development and Distribution License ("CDDL"), version 1.0.
  33   33   * You may only use this file in accordance with the terms of version
  34   34   * 1.0 of the CDDL.
  35   35   *
  36   36   * A full copy of the text of the CDDL should have accompanied this
  37   37   * source.  A copy of the CDDL is also available via the Internet at
  38   38   * http://www.illumos.org/license/CDDL.
  39   39   *
  40   40   * Copyright 2015 Pluribus Networks Inc.
  41   41   * Copyright 2019 Joyent, Inc.
  42   42   * Copyright 2020 Oxide Computer Company
  43   43   */
  44   44  
  45   45  #include <sys/cdefs.h>
  46   46  __FBSDID("$FreeBSD$");
  47   47  
  48   48  #include <sys/param.h>
  49   49  #include <sys/sysctl.h>
  50   50  #include <sys/ioctl.h>
  51   51  #ifdef  __FreeBSD__
  52   52  #include <sys/linker.h>
  53   53  #endif
  54   54  #include <sys/mman.h>
  55   55  #include <sys/module.h>
  56   56  #include <sys/_iovec.h>
  57   57  #include <sys/cpuset.h>
  58   58  
  59   59  #include <x86/segments.h>
  60   60  #include <machine/specialreg.h>
  61   61  
  62   62  #include <errno.h>
  63   63  #include <stdio.h>
  64   64  #include <stdlib.h>
  65   65  #include <assert.h>
  66   66  #include <string.h>
  67   67  #include <fcntl.h>
  68   68  #include <unistd.h>
  69   69  
  70   70  #include <libutil.h>
  71   71  
  72   72  #include <machine/vmm.h>
  73   73  #include <machine/vmm_dev.h>
  74   74  
  75   75  #include "vmmapi.h"
  76   76  
  77   77  #define MB      (1024 * 1024UL)
  78   78  #define GB      (1024 * 1024 * 1024UL)
  79   79  
  80   80  #ifndef __FreeBSD__
  81   81  /* shim to no-op for now */
  82   82  #define MAP_NOCORE              0
  83   83  #define MAP_ALIGNED_SUPER       0
  84   84  
  85   85  /* Rely on PROT_NONE for guard purposes */
  86   86  #define MAP_GUARD               (MAP_PRIVATE | MAP_ANON | MAP_NORESERVE)
  87   87  #endif
  88   88  
  89   89  /*
  90   90   * Size of the guard region before and after the virtual address space
  91   91   * mapping the guest physical memory. This must be a multiple of the
  92   92   * superpage size for performance reasons.
  93   93   */
  94   94  #define VM_MMAP_GUARD_SIZE      (4 * MB)
  95   95  
  96   96  #define PROT_RW         (PROT_READ | PROT_WRITE)
  97   97  #define PROT_ALL        (PROT_READ | PROT_WRITE | PROT_EXEC)
  98   98  
  99   99  struct vmctx {
 100  100          int     fd;
 101  101          uint32_t lowmem_limit;
 102  102          int     memflags;
 103  103          size_t  lowmem;
 104  104          size_t  highmem;
 105  105          char    *baseaddr;
 106  106          char    *name;
 107  107  };
 108  108  
 109  109  #ifdef  __FreeBSD__
 110  110  #define CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
 111  111  #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
 112  112  #else
 113  113  #define CREATE(x)       vm_do_ctl(VMM_CREATE_VM, (x))
 114  114  #define DESTROY(x)      vm_do_ctl(VMM_DESTROY_VM, (x))
 115  115  
 116  116  static int
 117  117  vm_do_ctl(int cmd, const char *name)
 118  118  {
 119  119          int ctl_fd;
 120  120  
 121  121          ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR);
 122  122          if (ctl_fd < 0) {
 123  123                  return (-1);
 124  124          }
 125  125  
 126  126          if (ioctl(ctl_fd, cmd, name) == -1) {
 127  127                  int err = errno;
 128  128  
 129  129                  /* Do not lose ioctl errno through the close(2) */
 130  130                  (void) close(ctl_fd);
 131  131                  errno = err;
 132  132                  return (-1);
 133  133          }
 134  134          (void) close(ctl_fd);
 135  135  
 136  136          return (0);
 137  137  }
 138  138  #endif
 139  139  
 140  140  static int
 141  141  vm_device_open(const char *name)
 142  142  {
 143  143          int fd, len;
 144  144          char *vmfile;
 145  145  
 146  146          len = strlen("/dev/vmm/") + strlen(name) + 1;
 147  147          vmfile = malloc(len);
 148  148          assert(vmfile != NULL);
 149  149          snprintf(vmfile, len, "/dev/vmm/%s", name);
 150  150  
 151  151          /* Open the device file */
 152  152          fd = open(vmfile, O_RDWR, 0);
 153  153  
 154  154          free(vmfile);
 155  155          return (fd);
 156  156  }
 157  157  
 158  158  int
 159  159  vm_create(const char *name)
 160  160  {
 161  161  #ifdef __FreeBSD__
 162  162          /* Try to load vmm(4) module before creating a guest. */
 163  163          if (modfind("vmm") < 0)
 164  164                  kldload("vmm");
 165  165  #endif
 166  166          return (CREATE((char *)name));
 167  167  }
 168  168  
 169  169  struct vmctx *
 170  170  vm_open(const char *name)
 171  171  {
 172  172          struct vmctx *vm;
 173  173  
 174  174          vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 175  175          assert(vm != NULL);
 176  176  
 177  177          vm->fd = -1;
 178  178          vm->memflags = 0;
 179  179          vm->lowmem_limit = 3 * GB;
 180  180          vm->name = (char *)(vm + 1);
 181  181          strcpy(vm->name, name);
 182  182  
 183  183          if ((vm->fd = vm_device_open(vm->name)) < 0)
 184  184                  goto err;
 185  185  
 186  186          return (vm);
 187  187  err:
 188  188          free(vm);
 189  189          return (NULL);
 190  190  }
 191  191  
 192  192  #ifndef __FreeBSD__
 193  193  void
 194  194  vm_close(struct vmctx *vm)
 195  195  {
 196  196          assert(vm != NULL);
 197  197          assert(vm->fd >= 0);
 198  198  
 199  199          (void) close(vm->fd);
 200  200  
 201  201          free(vm);
 202  202  }
 203  203  #endif
 204  204  
 205  205  void
 206  206  vm_destroy(struct vmctx *vm)
 207  207  {
 208  208          assert(vm != NULL);
 209  209  
 210  210          if (vm->fd >= 0)
 211  211                  close(vm->fd);
 212  212          DESTROY(vm->name);
 213  213  
 214  214          free(vm);
 215  215  }
 216  216  
 217  217  int
 218  218  vm_parse_memsize(const char *optarg, size_t *ret_memsize)
 219  219  {
 220  220          char *endptr;
 221  221          size_t optval;
 222  222          int error;
 223  223  
 224  224          optval = strtoul(optarg, &endptr, 0);
 225  225          if (*optarg != '\0' && *endptr == '\0') {
 226  226                  /*
 227  227                   * For the sake of backward compatibility if the memory size
 228  228                   * specified on the command line is less than a megabyte then
 229  229                   * it is interpreted as being in units of MB.
 230  230                   */
 231  231                  if (optval < MB)
 232  232                          optval *= MB;
 233  233                  *ret_memsize = optval;
 234  234                  error = 0;
 235  235          } else
 236  236                  error = expand_number(optarg, ret_memsize);
 237  237  
 238  238          return (error);
 239  239  }
 240  240  
 241  241  uint32_t
 242  242  vm_get_lowmem_limit(struct vmctx *ctx)
 243  243  {
 244  244  
 245  245          return (ctx->lowmem_limit);
 246  246  }
 247  247  
 248  248  void
 249  249  vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
 250  250  {
 251  251  
 252  252          ctx->lowmem_limit = limit;
 253  253  }
 254  254  
 255  255  void
 256  256  vm_set_memflags(struct vmctx *ctx, int flags)
 257  257  {
 258  258  
 259  259          ctx->memflags = flags;
 260  260  }
 261  261  
 262  262  int
 263  263  vm_get_memflags(struct vmctx *ctx)
 264  264  {
 265  265  
 266  266          return (ctx->memflags);
 267  267  }
 268  268  
 269  269  /*
 270  270   * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
 271  271   */
 272  272  int
 273  273  vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
 274  274      size_t len, int prot)
 275  275  {
 276  276          struct vm_memmap memmap;
 277  277          int error, flags;
 278  278  
 279  279          memmap.gpa = gpa;
 280  280          memmap.segid = segid;
 281  281          memmap.segoff = off;
 282  282          memmap.len = len;
 283  283          memmap.prot = prot;
 284  284          memmap.flags = 0;
 285  285  
 286  286          if (ctx->memflags & VM_MEM_F_WIRED)
 287  287                  memmap.flags |= VM_MEMMAP_F_WIRED;
 288  288  
 289  289          /*
 290  290           * If this mapping already exists then don't create it again. This
 291  291           * is the common case for SYSMEM mappings created by bhyveload(8).
 292  292           */
 293  293          error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
 294  294          if (error == 0 && gpa == memmap.gpa) {
 295  295                  if (segid != memmap.segid || off != memmap.segoff ||
 296  296                      prot != memmap.prot || flags != memmap.flags) {
 297  297                          errno = EEXIST;
 298  298                          return (-1);
 299  299                  } else {
 300  300                          return (0);
 301  301                  }
 302  302          }
 303  303  
 304  304          error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
 305  305          return (error);
 306  306  }
 307  307  
 308  308  int
 309  309  vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
 310  310  {
 311  311          struct vm_munmap munmap;
 312  312          int error;
 313  313  
 314  314          munmap.gpa = gpa;
 315  315          munmap.len = len;
 316  316  
 317  317          error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap);
 318  318          return (error);
 319  319  }
 320  320  
 321  321  int
 322  322  vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
 323  323      vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 324  324  {
 325  325          struct vm_memmap memmap;
 326  326          int error;
 327  327  
 328  328          bzero(&memmap, sizeof(struct vm_memmap));
 329  329          memmap.gpa = *gpa;
 330  330          error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
 331  331          if (error == 0) {
 332  332                  *gpa = memmap.gpa;
 333  333                  *segid = memmap.segid;
 334  334                  *segoff = memmap.segoff;
 335  335                  *len = memmap.len;
 336  336                  *prot = memmap.prot;
 337  337                  *flags = memmap.flags;
 338  338          }
 339  339          return (error);
 340  340  }
 341  341  
 342  342  /*
 343  343   * Return 0 if the segments are identical and non-zero otherwise.
 344  344   *
 345  345   * This is slightly complicated by the fact that only device memory segments
 346  346   * are named.
 347  347   */
 348  348  static int
 349  349  cmpseg(size_t len, const char *str, size_t len2, const char *str2)
 350  350  {
 351  351  
 352  352          if (len == len2) {
 353  353                  if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
 354  354                          return (0);
 355  355          }
 356  356          return (-1);
 357  357  }
 358  358  
 359  359  static int
 360  360  vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
 361  361  {
 362  362          struct vm_memseg memseg;
 363  363          size_t n;
 364  364          int error;
 365  365  
 366  366          /*
 367  367           * If the memory segment has already been created then just return.
 368  368           * This is the usual case for the SYSMEM segment created by userspace
 369  369           * loaders like bhyveload(8).
 370  370           */
 371  371          error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
 372  372              sizeof(memseg.name));
 373  373          if (error)
 374  374                  return (error);
 375  375  
 376  376          if (memseg.len != 0) {
 377  377                  if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
 378  378                          errno = EINVAL;
 379  379                          return (-1);
 380  380                  } else {
 381  381                          return (0);
 382  382                  }
 383  383          }
 384  384  
 385  385          bzero(&memseg, sizeof(struct vm_memseg));
 386  386          memseg.segid = segid;
 387  387          memseg.len = len;
 388  388          if (name != NULL) {
 389  389                  n = strlcpy(memseg.name, name, sizeof(memseg.name));
 390  390                  if (n >= sizeof(memseg.name)) {
 391  391                          errno = ENAMETOOLONG;
 392  392                          return (-1);
 393  393                  }
 394  394          }
 395  395  
 396  396          error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
 397  397          return (error);
 398  398  }
 399  399  
 400  400  int
 401  401  vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
 402  402      size_t bufsize)
 403  403  {
 404  404          struct vm_memseg memseg;
 405  405          size_t n;
 406  406          int error;
 407  407  
 408  408          memseg.segid = segid;
 409  409          error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
 410  410          if (error == 0) {
 411  411                  *lenp = memseg.len;
 412  412                  n = strlcpy(namebuf, memseg.name, bufsize);
 413  413                  if (n >= bufsize) {
 414  414                          errno = ENAMETOOLONG;
 415  415                          error = -1;
 416  416                  }
 417  417          }
 418  418          return (error);
 419  419  }
 420  420  
 421  421  static int
 422  422  #ifdef __FreeBSD__
 423  423  setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
 424  424  #else
 425  425  setup_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len,
 426  426      char *base)
 427  427  #endif
 428  428  {
 429  429          char *ptr;
 430  430          int error, flags;
 431  431  
 432  432          /* Map 'len' bytes starting at 'gpa' in the guest address space */
 433  433  #ifdef __FreeBSD__
 434  434          error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
 435  435  #else
 436  436          /*
 437  437           * As we use two segments for lowmem/highmem the offset within the
 438  438           * segment is 0 on illumos.
 439  439           */
 440  440          error = vm_mmap_memseg(ctx, gpa, segid, 0, len, PROT_ALL);
 441  441  #endif
 442  442          if (error)
 443  443                  return (error);
 444  444  
 445  445          flags = MAP_SHARED | MAP_FIXED;
 446  446          if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 447  447                  flags |= MAP_NOCORE;
 448  448  
 449  449          /* mmap into the process address space on the host */
 450  450          ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
 451  451          if (ptr == MAP_FAILED)
 452  452                  return (-1);
 453  453  
 454  454          return (0);
 455  455  }
 456  456  
 457  457  int
 458  458  vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
 459  459  {
 460  460          size_t objsize, len;
 461  461          vm_paddr_t gpa;
 462  462          char *baseaddr, *ptr;
 463  463          int error;
 464  464  
 465  465          assert(vms == VM_MMAP_ALL);
 466  466  
 467  467          /*
 468  468           * If 'memsize' cannot fit entirely in the 'lowmem' segment then
 469  469           * create another 'highmem' segment above 4GB for the remainder.
 470  470           */
 471  471          if (memsize > ctx->lowmem_limit) {
 472  472                  ctx->lowmem = ctx->lowmem_limit;
 473  473                  ctx->highmem = memsize - ctx->lowmem_limit;
 474  474                  objsize = 4*GB + ctx->highmem;
 475  475          } else {
 476  476                  ctx->lowmem = memsize;
 477  477                  ctx->highmem = 0;
 478  478                  objsize = ctx->lowmem;
 479  479          }
 480  480  
 481  481  #ifdef __FreeBSD__
 482  482          error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
 483  483          if (error)
 484  484                  return (error);
 485  485  #endif
 486  486  
 487  487          /*
 488  488           * Stake out a contiguous region covering the guest physical memory
 489  489           * and the adjoining guard regions.
 490  490           */
 491  491          len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
 492  492          ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
 493  493          if (ptr == MAP_FAILED)
 494  494                  return (-1);
 495  495  
 496  496          baseaddr = ptr + VM_MMAP_GUARD_SIZE;
 497  497  
 498  498  #ifdef __FreeBSD__
 499  499          if (ctx->highmem > 0) {
 500  500                  gpa = 4*GB;
 501  501                  len = ctx->highmem;
 502  502                  error = setup_memory_segment(ctx, gpa, len, baseaddr);
 503  503                  if (error)
 504  504                          return (error);
 505  505          }
 506  506  
 507  507          if (ctx->lowmem > 0) {
 508  508                  gpa = 0;
 509  509                  len = ctx->lowmem;
 510  510                  error = setup_memory_segment(ctx, gpa, len, baseaddr);
 511  511                  if (error)
 512  512                          return (error);
 513  513          }
 514  514  #else
 515  515          if (ctx->highmem > 0) {
 516  516                  error = vm_alloc_memseg(ctx, VM_HIGHMEM, ctx->highmem, NULL);
 517  517                  if (error)
 518  518                          return (error);
 519  519                  gpa = 4*GB;
 520  520                  len = ctx->highmem;
 521  521                  error = setup_memory_segment(ctx, VM_HIGHMEM, gpa, len, baseaddr);
 522  522                  if (error)
 523  523                          return (error);
 524  524          }
 525  525  
 526  526          if (ctx->lowmem > 0) {
 527  527                  error = vm_alloc_memseg(ctx, VM_LOWMEM, ctx->lowmem, NULL);
 528  528                  if (error)
 529  529                          return (error);
 530  530                  gpa = 0;
 531  531                  len = ctx->lowmem;
 532  532                  error = setup_memory_segment(ctx, VM_LOWMEM, gpa, len, baseaddr);
 533  533                  if (error)
 534  534                          return (error);
 535  535          }
 536  536  #endif
 537  537  
 538  538          ctx->baseaddr = baseaddr;
 539  539  
 540  540          return (0);
 541  541  }
 542  542  
 543  543  /*
 544  544   * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
 545  545   * the lowmem or highmem regions.
 546  546   *
 547  547   * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
 548  548   * The instruction emulation code depends on this behavior.
 549  549   */
 550  550  void *
 551  551  vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
 552  552  {
 553  553  
 554  554          if (ctx->lowmem > 0) {
 555  555                  if (gaddr < ctx->lowmem && len <= ctx->lowmem &&
 556  556                      gaddr + len <= ctx->lowmem)
 557  557                          return (ctx->baseaddr + gaddr);
 558  558          }
 559  559  
 560  560          if (ctx->highmem > 0) {
 561  561                  if (gaddr >= 4*GB) {
 562  562                          if (gaddr < 4*GB + ctx->highmem &&
 563  563                              len <= ctx->highmem &&
 564  564                              gaddr + len <= 4*GB + ctx->highmem)
 565  565                                  return (ctx->baseaddr + gaddr);
 566  566                  }
 567  567          }
 568  568  
 569  569          return (NULL);
 570  570  }
 571  571  
 572  572  size_t
 573  573  vm_get_lowmem_size(struct vmctx *ctx)
 574  574  {
 575  575  
 576  576          return (ctx->lowmem);
 577  577  }
 578  578  
 579  579  size_t
 580  580  vm_get_highmem_size(struct vmctx *ctx)
 581  581  {
 582  582  
 583  583          return (ctx->highmem);
 584  584  }
 585  585  
 586  586  #ifndef __FreeBSD__
 587  587  int
 588  588  vm_get_devmem_offset(struct vmctx *ctx, int segid, off_t *mapoff)
 589  589  {
 590  590          struct vm_devmem_offset vdo;
 591  591          int error;
 592  592  
 593  593          vdo.segid = segid;
 594  594          error = ioctl(ctx->fd, VM_DEVMEM_GETOFFSET, &vdo);
 595  595          if (error == 0)
 596  596                  *mapoff = vdo.offset;
 597  597  
 598  598          return (error);
 599  599  }
 600  600  #endif
 601  601  
 602  602  void *
 603  603  vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
 604  604  {
 605  605  #ifdef  __FreeBSD__
 606  606          char pathname[MAXPATHLEN];
 607  607  #endif
 608  608          size_t len2;
 609  609          char *base, *ptr;
 610  610          int fd, error, flags;
 611  611          off_t mapoff;
 612  612  
 613  613          fd = -1;
 614  614          ptr = MAP_FAILED;
 615  615          if (name == NULL || strlen(name) == 0) {
 616  616                  errno = EINVAL;
 617  617                  goto done;
 618  618          }
 619  619  
 620  620          error = vm_alloc_memseg(ctx, segid, len, name);
 621  621          if (error)
 622  622                  goto done;
 623  623  
 624  624  #ifdef  __FreeBSD__
 625  625          strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
 626  626          strlcat(pathname, ctx->name, sizeof(pathname));
 627  627          strlcat(pathname, ".", sizeof(pathname));
 628  628          strlcat(pathname, name, sizeof(pathname));
 629  629  
 630  630          fd = open(pathname, O_RDWR);
 631  631          if (fd < 0)
 632  632                  goto done;
 633  633  #else
 634  634          if (vm_get_devmem_offset(ctx, segid, &mapoff) != 0)
 635  635                  goto done;
 636  636  #endif
 637  637  
 638  638          /*
 639  639           * Stake out a contiguous region covering the device memory and the
 640  640           * adjoining guard regions.
 641  641           */
 642  642          len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
 643  643          base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1,
 644  644              0);
 645  645          if (base == MAP_FAILED)
 646  646                  goto done;
 647  647  
 648  648          flags = MAP_SHARED | MAP_FIXED;
 649  649          if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 650  650                  flags |= MAP_NOCORE;
 651  651  
 652  652  #ifdef  __FreeBSD__
 653  653          /* mmap the devmem region in the host address space */
 654  654          ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
 655  655  #else
 656  656          /* mmap the devmem region in the host address space */
 657  657          ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, ctx->fd,
 658  658              mapoff);
 659  659  #endif
 660  660  done:
 661  661          if (fd >= 0)
 662  662                  close(fd);
 663  663          return (ptr);
 664  664  }
 665  665  
 666  666  int
 667  667  vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 668  668              uint64_t base, uint32_t limit, uint32_t access)
 669  669  {
 670  670          int error;
 671  671          struct vm_seg_desc vmsegdesc;
 672  672  
 673  673          bzero(&vmsegdesc, sizeof(vmsegdesc));
 674  674          vmsegdesc.cpuid = vcpu;
 675  675          vmsegdesc.regnum = reg;
 676  676          vmsegdesc.desc.base = base;
 677  677          vmsegdesc.desc.limit = limit;
 678  678          vmsegdesc.desc.access = access;
 679  679  
 680  680          error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 681  681          return (error);
 682  682  }
 683  683  
 684  684  int
 685  685  vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 686  686              uint64_t *base, uint32_t *limit, uint32_t *access)
 687  687  {
 688  688          int error;
 689  689          struct vm_seg_desc vmsegdesc;
 690  690  
 691  691          bzero(&vmsegdesc, sizeof(vmsegdesc));
 692  692          vmsegdesc.cpuid = vcpu;
 693  693          vmsegdesc.regnum = reg;
 694  694  
 695  695          error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 696  696          if (error == 0) {
 697  697                  *base = vmsegdesc.desc.base;
 698  698                  *limit = vmsegdesc.desc.limit;
 699  699                  *access = vmsegdesc.desc.access;
 700  700          }
 701  701          return (error);
 702  702  }
 703  703  
 704  704  int
 705  705  vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
 706  706  {
 707  707          int error;
 708  708  
 709  709          error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
 710  710              &seg_desc->access);
 711  711          return (error);
 712  712  }
 713  713  
 714  714  int
 715  715  vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 716  716  {
 717  717          int error;
 718  718          struct vm_register vmreg;
 719  719  
 720  720          bzero(&vmreg, sizeof(vmreg));
 721  721          vmreg.cpuid = vcpu;
 722  722          vmreg.regnum = reg;
 723  723          vmreg.regval = val;
 724  724  
 725  725          error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 726  726          return (error);
 727  727  }
 728  728  
 729  729  int
 730  730  vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 731  731  {
 732  732          int error;
 733  733          struct vm_register vmreg;
 734  734  
 735  735          bzero(&vmreg, sizeof(vmreg));
 736  736          vmreg.cpuid = vcpu;
 737  737          vmreg.regnum = reg;
 738  738  
 739  739          error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 740  740          *ret_val = vmreg.regval;
 741  741          return (error);
 742  742  }
 743  743  
 744  744  int
 745  745  vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
 746  746      const int *regnums, uint64_t *regvals)
 747  747  {
 748  748          int error;
 749  749          struct vm_register_set vmregset;
 750  750  
 751  751          bzero(&vmregset, sizeof(vmregset));
 752  752          vmregset.cpuid = vcpu;
 753  753          vmregset.count = count;
 754  754          vmregset.regnums = regnums;
 755  755          vmregset.regvals = regvals;
 756  756  
 757  757          error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset);
 758  758          return (error);
 759  759  }
 760  760  
 761  761  int
 762  762  vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
 763  763      const int *regnums, uint64_t *regvals)
 764  764  {
 765  765          int error;
 766  766          struct vm_register_set vmregset;
 767  767  
 768  768          bzero(&vmregset, sizeof(vmregset));
 769  769          vmregset.cpuid = vcpu;
 770  770          vmregset.count = count;
 771  771          vmregset.regnums = regnums;
 772  772          vmregset.regvals = regvals;
 773  773  
 774  774          error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset);
 775  775          return (error);
 776  776  }
 777  777  
 778  778  int
 779  779  vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry,
 780  780      struct vm_exit *vm_exit)
 781  781  {
 782  782          struct vm_entry entry;
 783  783  
 784  784          bcopy(vm_entry, &entry, sizeof (entry));
 785  785          entry.cpuid = vcpu;
 786  786          entry.exit_data = vm_exit;
 787  787  
 788  788          return (ioctl(ctx->fd, VM_RUN, &entry));
 789  789  }
 790  790  
 791  791  int
 792  792  vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
 793  793  {
 794  794          struct vm_suspend vmsuspend;
 795  795  
 796  796          bzero(&vmsuspend, sizeof(vmsuspend));
 797  797          vmsuspend.how = how;
 798  798          return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
 799  799  }
 800  800  
 801  801  int
 802  802  vm_reinit(struct vmctx *ctx)
 803  803  {
 804  804  
 805  805          return (ioctl(ctx->fd, VM_REINIT, 0));
 806  806  }
 807  807  
 808  808  int
 809  809  vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
 810  810      uint32_t errcode, int restart_instruction)
 811  811  {
 812  812          struct vm_exception exc;
 813  813  
 814  814          exc.cpuid = vcpu;
 815  815          exc.vector = vector;
 816  816          exc.error_code = errcode;
 817  817          exc.error_code_valid = errcode_valid;
 818  818          exc.restart_instruction = restart_instruction;
 819  819  
 820  820          return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
 821  821  }
 822  822  
 823  823  #ifndef __FreeBSD__
 824  824  void
 825  825  vm_inject_fault(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
 826  826      int errcode)
 827  827  {
 828  828          int error;
 829  829          struct vm_exception exc;
 830  830  
 831  831          exc.cpuid = vcpu;
 832  832          exc.vector = vector;
 833  833          exc.error_code = errcode;
 834  834          exc.error_code_valid = errcode_valid;
 835  835          exc.restart_instruction = 1;
 836  836          error = ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc);
 837  837  
 838  838          assert(error == 0);
 839  839  }
 840  840  #endif /* __FreeBSD__ */
 841  841  
 842  842  int
 843  843  vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 844  844  {
 845  845          /*
 846  846           * The apic id associated with the 'vcpu' has the same numerical value
 847  847           * as the 'vcpu' itself.
 848  848           */
 849  849          return (apicid);
 850  850  }
 851  851  
 852  852  int
 853  853  vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 854  854  {
 855  855          struct vm_lapic_irq vmirq;
 856  856  
 857  857          bzero(&vmirq, sizeof(vmirq));
 858  858          vmirq.cpuid = vcpu;
 859  859          vmirq.vector = vector;
 860  860  
 861  861          return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 862  862  }
 863  863  
 864  864  int
 865  865  vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
 866  866  {
 867  867          struct vm_lapic_irq vmirq;
 868  868  
 869  869          bzero(&vmirq, sizeof(vmirq));
 870  870          vmirq.cpuid = vcpu;
 871  871          vmirq.vector = vector;
 872  872  
 873  873          return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
 874  874  }
 875  875  
 876  876  int
 877  877  vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
 878  878  {
 879  879          struct vm_lapic_msi vmmsi;
 880  880  
 881  881          bzero(&vmmsi, sizeof(vmmsi));
 882  882          vmmsi.addr = addr;
 883  883          vmmsi.msg = msg;
 884  884  
 885  885          return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
 886  886  }
 887  887  
 888  888  int
 889  889  vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 890  890  {
 891  891          struct vm_ioapic_irq ioapic_irq;
 892  892  
 893  893          bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 894  894          ioapic_irq.irq = irq;
 895  895  
 896  896          return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
 897  897  }
 898  898  
 899  899  int
 900  900  vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
 901  901  {
 902  902          struct vm_ioapic_irq ioapic_irq;
 903  903  
 904  904          bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 905  905          ioapic_irq.irq = irq;
 906  906  
 907  907          return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
 908  908  }
 909  909  
 910  910  int
 911  911  vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
 912  912  {
 913  913          struct vm_ioapic_irq ioapic_irq;
 914  914  
 915  915          bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 916  916          ioapic_irq.irq = irq;
 917  917  
 918  918          return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
 919  919  }
 920  920  
 921  921  int
 922  922  vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
 923  923  {
 924  924  
 925  925          return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
 926  926  }
 927  927  
 928  928  int
 929  929  vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa,
 930  930      bool write, int size, uint64_t *value)
 931  931  {
 932  932          struct vm_readwrite_kernemu_device irp = {
 933  933                  .vcpuid = vcpu,
 934  934                  .access_width = fls(size) - 1,
 935  935                  .gpa = gpa,
 936  936                  .value = write ? *value : ~0ul,
 937  937          };
 938  938          long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV);
 939  939          int rc;
 940  940  
 941  941          rc = ioctl(ctx->fd, cmd, &irp);
 942  942          if (rc == 0 && !write)
 943  943                  *value = irp.value;
 944  944          return (rc);
 945  945  }
 946  946  
 947  947  int
 948  948  vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 949  949  {
 950  950          struct vm_isa_irq isa_irq;
 951  951  
 952  952          bzero(&isa_irq, sizeof(struct vm_isa_irq));
 953  953          isa_irq.atpic_irq = atpic_irq;
 954  954          isa_irq.ioapic_irq = ioapic_irq;
 955  955  
 956  956          return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
 957  957  }
 958  958  
 959  959  int
 960  960  vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 961  961  {
 962  962          struct vm_isa_irq isa_irq;
 963  963  
 964  964          bzero(&isa_irq, sizeof(struct vm_isa_irq));
 965  965          isa_irq.atpic_irq = atpic_irq;
 966  966          isa_irq.ioapic_irq = ioapic_irq;
 967  967  
 968  968          return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
 969  969  }
 970  970  
 971  971  int
 972  972  vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 973  973  {
 974  974          struct vm_isa_irq isa_irq;
 975  975  
 976  976          bzero(&isa_irq, sizeof(struct vm_isa_irq));
 977  977          isa_irq.atpic_irq = atpic_irq;
 978  978          isa_irq.ioapic_irq = ioapic_irq;
 979  979  
 980  980          return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
 981  981  }
 982  982  
 983  983  int
 984  984  vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
 985  985      enum vm_intr_trigger trigger)
 986  986  {
 987  987          struct vm_isa_irq_trigger isa_irq_trigger;
 988  988  
 989  989          bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
 990  990          isa_irq_trigger.atpic_irq = atpic_irq;
 991  991          isa_irq_trigger.trigger = trigger;
 992  992  
 993  993          return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
 994  994  }
 995  995  
 996  996  int
 997  997  vm_inject_nmi(struct vmctx *ctx, int vcpu)
 998  998  {
 999  999          struct vm_nmi vmnmi;
1000 1000  
1001 1001          bzero(&vmnmi, sizeof(vmnmi));
1002 1002          vmnmi.cpuid = vcpu;
1003 1003  
1004 1004          return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
1005 1005  }
1006 1006  
1007 1007  static const char *capstrmap[] = {
1008 1008          [VM_CAP_HALT_EXIT]  = "hlt_exit",
1009 1009          [VM_CAP_MTRAP_EXIT] = "mtrap_exit",
1010 1010          [VM_CAP_PAUSE_EXIT] = "pause_exit",
1011 1011  #ifdef __FreeBSD__
1012 1012          [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest",
1013 1013  #endif
1014 1014          [VM_CAP_ENABLE_INVPCID] = "enable_invpcid",
1015 1015          [VM_CAP_BPT_EXIT] = "bpt_exit",
1016 1016  };
1017 1017  
1018 1018  int
1019 1019  vm_capability_name2type(const char *capname)
1020 1020  {
1021 1021          int i;
1022 1022  
1023 1023          for (i = 0; i < nitems(capstrmap); i++) {
1024 1024                  if (strcmp(capstrmap[i], capname) == 0)
1025 1025                          return (i);
1026 1026          }
1027 1027  
1028 1028          return (-1);
1029 1029  }
1030 1030  
1031 1031  const char *
1032 1032  vm_capability_type2name(int type)
1033 1033  {
1034 1034          if (type >= 0 && type < nitems(capstrmap))
1035 1035                  return (capstrmap[type]);
1036 1036  
1037 1037          return (NULL);
1038 1038  }
1039 1039  
1040 1040  int
1041 1041  vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
1042 1042                    int *retval)
1043 1043  {
1044 1044          int error;
1045 1045          struct vm_capability vmcap;
1046 1046  
1047 1047          bzero(&vmcap, sizeof(vmcap));
1048 1048          vmcap.cpuid = vcpu;
1049 1049          vmcap.captype = cap;
1050 1050  
1051 1051          error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
1052 1052          *retval = vmcap.capval;
1053 1053          return (error);
1054 1054  }
1055 1055  
1056 1056  int
1057 1057  vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
1058 1058  {
1059 1059          struct vm_capability vmcap;
1060 1060  
1061 1061          bzero(&vmcap, sizeof(vmcap));
1062 1062          vmcap.cpuid = vcpu;
1063 1063          vmcap.captype = cap;
1064 1064          vmcap.capval = val;
1065 1065  
1066 1066          return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
1067 1067  }
1068 1068  
1069 1069  #ifdef __FreeBSD__
1070 1070  int
1071 1071  vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
1072 1072  {
1073 1073          struct vm_pptdev pptdev;
1074 1074  
1075 1075          bzero(&pptdev, sizeof(pptdev));
1076 1076          pptdev.bus = bus;
1077 1077          pptdev.slot = slot;
1078 1078          pptdev.func = func;
1079 1079  
1080 1080          return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
1081 1081  }
1082 1082  
1083 1083  int
1084 1084  vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
1085 1085  {
1086 1086          struct vm_pptdev pptdev;
1087 1087  
1088 1088          bzero(&pptdev, sizeof(pptdev));
1089 1089          pptdev.bus = bus;
1090 1090          pptdev.slot = slot;
1091 1091          pptdev.func = func;
1092 1092  
1093 1093          return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
1094 1094  }
1095 1095  
1096 1096  int
1097 1097  vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
1098 1098                     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
1099 1099  {
1100 1100          struct vm_pptdev_mmio pptmmio;
1101 1101  
1102 1102          bzero(&pptmmio, sizeof(pptmmio));
1103 1103          pptmmio.bus = bus;
1104 1104          pptmmio.slot = slot;
1105 1105          pptmmio.func = func;
1106 1106          pptmmio.gpa = gpa;
1107 1107          pptmmio.len = len;
1108 1108          pptmmio.hpa = hpa;
1109 1109  
1110 1110          return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
1111 1111  }
1112 1112  
1113 1113  int
1114 1114  vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
1115 1115                       vm_paddr_t gpa, size_t len)
1116 1116  {
1117 1117          struct vm_pptdev_mmio pptmmio;
1118 1118  
1119 1119          bzero(&pptmmio, sizeof(pptmmio));
1120 1120          pptmmio.bus = bus;
1121 1121          pptmmio.slot = slot;
1122 1122          pptmmio.func = func;
1123 1123          pptmmio.gpa = gpa;
1124 1124          pptmmio.len = len;
1125 1125  
1126 1126          return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
1127 1127  }
1128 1128  
1129 1129  int
1130 1130  vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
1131 1131      uint64_t addr, uint64_t msg, int numvec)
1132 1132  {
1133 1133          struct vm_pptdev_msi pptmsi;
1134 1134  
1135 1135          bzero(&pptmsi, sizeof(pptmsi));
1136 1136          pptmsi.vcpu = vcpu;
1137 1137          pptmsi.bus = bus;
1138 1138          pptmsi.slot = slot;
1139 1139          pptmsi.func = func;
1140 1140          pptmsi.msg = msg;
1141 1141          pptmsi.addr = addr;
1142 1142          pptmsi.numvec = numvec;
1143 1143  
1144 1144          return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
1145 1145  }
1146 1146  
1147 1147  int
1148 1148  vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
1149 1149      int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
1150 1150  {
1151 1151          struct vm_pptdev_msix pptmsix;
1152 1152  
1153 1153          bzero(&pptmsix, sizeof(pptmsix));
1154 1154          pptmsix.vcpu = vcpu;
1155 1155          pptmsix.bus = bus;
1156 1156          pptmsix.slot = slot;
1157 1157          pptmsix.func = func;
1158 1158          pptmsix.idx = idx;
1159 1159          pptmsix.msg = msg;
1160 1160          pptmsix.addr = addr;
1161 1161          pptmsix.vector_control = vector_control;
1162 1162  
1163 1163          return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
1164 1164  }
1165 1165  
1166 1166  int
1167 1167  vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func,
1168 1168      int *msi_limit, int *msix_limit)
1169 1169  {
1170 1170          struct vm_pptdev_limits pptlimits;
1171 1171          int error;
1172 1172  
1173 1173          bzero(&pptlimits, sizeof (pptlimits));
1174 1174          pptlimits.bus = bus;
1175 1175          pptlimits.slot = slot;
1176 1176          pptlimits.func = func;
1177 1177  
1178 1178          error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits);
1179 1179  
1180 1180          *msi_limit = pptlimits.msi_limit;
1181 1181          *msix_limit = pptlimits.msix_limit;
1182 1182  
1183 1183          return (error);
1184 1184  }
1185 1185  
1186 1186  int
1187 1187  vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func)
1188 1188  {
1189 1189          struct vm_pptdev ppt;
1190 1190  
1191 1191          bzero(&ppt, sizeof(ppt));
1192 1192          ppt.bus = bus;
1193 1193          ppt.slot = slot;
1194 1194          ppt.func = func;
1195 1195  
1196 1196          return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt);
1197 1197  }
1198 1198  
1199 1199  #else /* __FreeBSD__ */
1200 1200  
1201 1201  int
1202 1202  vm_assign_pptdev(struct vmctx *ctx, int pptfd)
1203 1203  {
1204 1204          struct vm_pptdev pptdev;
1205 1205  
1206 1206          pptdev.pptfd = pptfd;
1207 1207          return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
1208 1208  }
1209 1209  
1210 1210  int
1211 1211  vm_unassign_pptdev(struct vmctx *ctx, int pptfd)
1212 1212  {
1213 1213          struct vm_pptdev pptdev;
1214 1214  
1215 1215          pptdev.pptfd = pptfd;
1216 1216          return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
1217 1217  }
1218 1218  
1219 1219  int
1220 1220  vm_map_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len,
1221 1221      vm_paddr_t hpa)
1222 1222  {
1223 1223          struct vm_pptdev_mmio pptmmio;
1224 1224  
1225 1225          pptmmio.pptfd = pptfd;
1226 1226          pptmmio.gpa = gpa;
1227 1227          pptmmio.len = len;
1228 1228          pptmmio.hpa = hpa;
1229 1229          return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
1230 1230  }
1231 1231  
1232 1232  int
1233 1233  vm_unmap_pptdev_mmio(struct vmctx *ctx, int pptfd, vm_paddr_t gpa, size_t len)
1234 1234  {
1235 1235          struct vm_pptdev_mmio pptmmio;
1236 1236  
1237 1237          bzero(&pptmmio, sizeof(pptmmio));
1238 1238          pptmmio.pptfd = pptfd;
1239 1239          pptmmio.gpa = gpa;
1240 1240          pptmmio.len = len;
1241 1241  
1242 1242          return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio));
1243 1243  }
1244 1244  
1245 1245  int
1246 1246  vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr,
1247 1247      uint64_t msg, int numvec)
1248 1248  {
1249 1249          struct vm_pptdev_msi pptmsi;
1250 1250  
1251 1251          pptmsi.vcpu = vcpu;
1252 1252          pptmsi.pptfd = pptfd;
1253 1253          pptmsi.msg = msg;
1254 1254          pptmsi.addr = addr;
1255 1255          pptmsi.numvec = numvec;
1256 1256          return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
1257 1257  }
1258 1258  
1259 1259  int
1260 1260  vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx,
1261 1261      uint64_t addr, uint64_t msg, uint32_t vector_control)
1262 1262  {
1263 1263          struct vm_pptdev_msix pptmsix;
1264 1264  
1265 1265          pptmsix.vcpu = vcpu;
1266 1266          pptmsix.pptfd = pptfd;
1267 1267          pptmsix.idx = idx;
1268 1268          pptmsix.msg = msg;
1269 1269          pptmsix.addr = addr;
1270 1270          pptmsix.vector_control = vector_control;
1271 1271          return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
1272 1272  }
1273 1273  
1274 1274  int
1275 1275  vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit,
1276 1276      int *msix_limit)
1277 1277  {
1278 1278          struct vm_pptdev_limits pptlimits;
1279 1279          int error;
1280 1280  
1281 1281          bzero(&pptlimits, sizeof (pptlimits));
1282 1282          pptlimits.pptfd = pptfd;
1283 1283          error = ioctl(ctx->fd, VM_GET_PPTDEV_LIMITS, &pptlimits);
1284 1284  
1285 1285          *msi_limit = pptlimits.msi_limit;
1286 1286          *msix_limit = pptlimits.msix_limit;
1287 1287          return (error);
1288 1288  }
1289 1289  
1290 1290  int
1291 1291  vm_disable_pptdev_msix(struct vmctx *ctx, int pptfd)
1292 1292  {
1293 1293          struct vm_pptdev pptdev;
1294 1294  
1295 1295          pptdev.pptfd = pptfd;
1296 1296          return (ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &pptdev));
1297 1297  }
1298 1298  #endif /* __FreeBSD__ */
1299 1299  
1300 1300  uint64_t *
1301 1301  vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
1302 1302               int *ret_entries)
1303 1303  {
1304 1304          int error;
1305 1305  
1306 1306          static struct vm_stats vmstats;
1307 1307  
1308 1308          vmstats.cpuid = vcpu;
1309 1309  
1310 1310          error = ioctl(ctx->fd, VM_STATS_IOC, &vmstats);
1311 1311          if (error == 0) {
1312 1312                  if (ret_entries)
1313 1313                          *ret_entries = vmstats.num_entries;
1314 1314                  if (ret_tv)
1315 1315                          *ret_tv = vmstats.tv;
1316 1316                  return (vmstats.statbuf);
1317 1317          } else
1318 1318                  return (NULL);
1319 1319  }
1320 1320  
1321 1321  const char *
1322 1322  vm_get_stat_desc(struct vmctx *ctx, int index)
1323 1323  {
1324 1324          static struct vm_stat_desc statdesc;
1325 1325  
1326 1326          statdesc.index = index;
1327 1327          if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
1328 1328                  return (statdesc.desc);
1329 1329          else
1330 1330                  return (NULL);
1331 1331  }
1332 1332  
1333 1333  int
1334 1334  vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
1335 1335  {
1336 1336          int error;
1337 1337          struct vm_x2apic x2apic;
1338 1338  
1339 1339          bzero(&x2apic, sizeof(x2apic));
1340 1340          x2apic.cpuid = vcpu;
1341 1341  
1342 1342          error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
1343 1343          *state = x2apic.state;
1344 1344          return (error);
1345 1345  }
1346 1346  
1347 1347  int
1348 1348  vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
1349 1349  {
1350 1350          int error;
1351 1351          struct vm_x2apic x2apic;
1352 1352  
1353 1353          bzero(&x2apic, sizeof(x2apic));
1354 1354          x2apic.cpuid = vcpu;
1355 1355          x2apic.state = state;
1356 1356  
1357 1357          error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
1358 1358  
1359 1359          return (error);
1360 1360  }
1361 1361  
1362 1362  #ifndef __FreeBSD__
1363 1363  int
1364 1364  vcpu_reset(struct vmctx *vmctx, int vcpu)
1365 1365  {
1366 1366          struct vm_vcpu_reset vvr;
1367 1367  
1368 1368          vvr.vcpuid = vcpu;
1369 1369          vvr.kind = VRK_RESET;
1370 1370  
1371 1371          return (ioctl(vmctx->fd, VM_RESET_CPU, &vvr));
1372 1372  }
1373 1373  #else /* __FreeBSD__ */
1374 1374  /*
1375 1375   * From Intel Vol 3a:
1376 1376   * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
1377 1377   */
1378 1378  int
1379 1379  vcpu_reset(struct vmctx *vmctx, int vcpu)
1380 1380  {
1381 1381          int error;
1382 1382          uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
1383 1383          uint32_t desc_access, desc_limit;
1384 1384          uint16_t sel;
1385 1385  
1386 1386          zero = 0;
1387 1387  
1388 1388          rflags = 0x2;
1389 1389          error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
1390 1390          if (error)
1391 1391                  goto done;
1392 1392  
1393 1393          rip = 0xfff0;
1394 1394          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
1395 1395                  goto done;
1396 1396  
1397 1397          cr0 = CR0_NE;
1398 1398          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
1399 1399                  goto done;
1400 1400  
1401 1401          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
1402 1402                  goto done;
1403 1403          
1404 1404          cr4 = 0;
1405 1405          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
1406 1406                  goto done;
1407 1407  
1408 1408          /*
1409 1409           * CS: present, r/w, accessed, 16-bit, byte granularity, usable
1410 1410           */
1411 1411          desc_base = 0xffff0000;
1412 1412          desc_limit = 0xffff;
1413 1413          desc_access = 0x0093;
1414 1414          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
1415 1415                              desc_base, desc_limit, desc_access);
1416 1416          if (error)
1417 1417                  goto done;
1418 1418  
1419 1419          sel = 0xf000;
1420 1420          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
1421 1421                  goto done;
1422 1422  
1423 1423          /*
1424 1424           * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
1425 1425           */
1426 1426          desc_base = 0;
1427 1427          desc_limit = 0xffff;
1428 1428          desc_access = 0x0093;
1429 1429          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
1430 1430                              desc_base, desc_limit, desc_access);
1431 1431          if (error)
1432 1432                  goto done;
1433 1433  
1434 1434          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
1435 1435                              desc_base, desc_limit, desc_access);
1436 1436          if (error)
1437 1437                  goto done;
1438 1438  
1439 1439          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
1440 1440                              desc_base, desc_limit, desc_access);
1441 1441          if (error)
1442 1442                  goto done;
1443 1443  
1444 1444          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
1445 1445                              desc_base, desc_limit, desc_access);
1446 1446          if (error)
1447 1447                  goto done;
1448 1448  
1449 1449          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
1450 1450                              desc_base, desc_limit, desc_access);
1451 1451          if (error)
1452 1452                  goto done;
1453 1453  
1454 1454          sel = 0;
1455 1455          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
1456 1456                  goto done;
1457 1457          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
1458 1458                  goto done;
1459 1459          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
1460 1460                  goto done;
1461 1461          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
1462 1462                  goto done;
1463 1463          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
1464 1464                  goto done;
1465 1465  
1466 1466          /* General purpose registers */
1467 1467          rdx = 0xf00;
1468 1468          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
1469 1469                  goto done;
1470 1470          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
1471 1471                  goto done;
1472 1472          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
1473 1473                  goto done;
1474 1474          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
1475 1475                  goto done;
1476 1476          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
1477 1477                  goto done;
1478 1478          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
1479 1479                  goto done;
1480 1480          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
1481 1481                  goto done;
1482 1482          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
1483 1483                  goto done;
1484 1484  
1485 1485          /* GDTR, IDTR */
1486 1486          desc_base = 0;
1487 1487          desc_limit = 0xffff;
1488 1488          desc_access = 0;
1489 1489          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
1490 1490                              desc_base, desc_limit, desc_access);
1491 1491          if (error != 0)
1492 1492                  goto done;
1493 1493  
1494 1494          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
1495 1495                              desc_base, desc_limit, desc_access);
1496 1496          if (error != 0)
1497 1497                  goto done;
1498 1498  
1499 1499          /* TR */
1500 1500          desc_base = 0;
1501 1501          desc_limit = 0xffff;
1502 1502          desc_access = 0x0000008b;
1503 1503          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
1504 1504          if (error)
1505 1505                  goto done;
1506 1506  
1507 1507          sel = 0;
1508 1508          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
1509 1509                  goto done;
1510 1510  
1511 1511          /* LDTR */
1512 1512          desc_base = 0;
1513 1513          desc_limit = 0xffff;
1514 1514          desc_access = 0x00000082;
1515 1515          error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
1516 1516                              desc_limit, desc_access);
1517 1517          if (error)
1518 1518                  goto done;
1519 1519  
1520 1520          sel = 0;
1521 1521          if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
1522 1522                  goto done;
1523 1523  
1524 1524          /* XXX cr2, debug registers */
1525 1525  
1526 1526          error = 0;
1527 1527  done:
1528 1528          return (error);
1529 1529  }
1530 1530  #endif /* __FreeBSD__ */
1531 1531  
1532 1532  int
1533 1533  vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
1534 1534  {
1535 1535          int error, i;
1536 1536          struct vm_gpa_pte gpapte;
1537 1537  
1538 1538          bzero(&gpapte, sizeof(gpapte));
1539 1539          gpapte.gpa = gpa;
1540 1540  
1541 1541          error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
1542 1542  
1543 1543          if (error == 0) {
1544 1544                  *num = gpapte.ptenum;
1545 1545                  for (i = 0; i < gpapte.ptenum; i++)
1546 1546                          pte[i] = gpapte.pte[i];
1547 1547          }
1548 1548  
1549 1549          return (error);
1550 1550  }
1551 1551  
1552 1552  int
1553 1553  vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
1554 1554  {
1555 1555          int error;
1556 1556          struct vm_hpet_cap cap;
1557 1557  
1558 1558          bzero(&cap, sizeof(struct vm_hpet_cap));
1559 1559          error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
1560 1560          if (capabilities != NULL)
1561 1561                  *capabilities = cap.capabilities;
1562 1562          return (error);
1563 1563  }
1564 1564  
1565 1565  int
1566 1566  vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1567 1567      uint64_t gla, int prot, uint64_t *gpa, int *fault)
1568 1568  {
1569 1569          struct vm_gla2gpa gg;
1570 1570          int error;
1571 1571  
1572 1572          bzero(&gg, sizeof(struct vm_gla2gpa));
1573 1573          gg.vcpuid = vcpu;
1574 1574          gg.prot = prot;
1575 1575          gg.gla = gla;
1576 1576          gg.paging = *paging;
1577 1577  
1578 1578          error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
1579 1579          if (error == 0) {
1580 1580                  *fault = gg.fault;
1581 1581                  *gpa = gg.gpa;
1582 1582          }
1583 1583          return (error);
1584 1584  }
1585 1585  
1586 1586  int
1587 1587  vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1588 1588      uint64_t gla, int prot, uint64_t *gpa, int *fault)
1589 1589  {
1590 1590          struct vm_gla2gpa gg;
1591 1591          int error;
1592 1592  
1593 1593          bzero(&gg, sizeof(struct vm_gla2gpa));
1594 1594          gg.vcpuid = vcpu;
1595 1595          gg.prot = prot;
1596 1596          gg.gla = gla;
1597 1597          gg.paging = *paging;
1598 1598  
1599 1599          error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg);
1600 1600          if (error == 0) {
1601 1601                  *fault = gg.fault;
1602 1602                  *gpa = gg.gpa;
1603 1603          }
1604 1604          return (error);
1605 1605  }
1606 1606  
1607 1607  #ifndef min
1608 1608  #define min(a,b)        (((a) < (b)) ? (a) : (b))
1609 1609  #endif
1610 1610  
1611 1611  int
1612 1612  vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1613 1613      uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
1614 1614      int *fault)
1615 1615  {
1616 1616          void *va;
1617 1617          uint64_t gpa;
1618 1618          int error, i, n, off;
1619 1619  
1620 1620          for (i = 0; i < iovcnt; i++) {
1621 1621                  iov[i].iov_base = 0;
1622 1622                  iov[i].iov_len = 0;
1623 1623          }
1624 1624  
1625 1625          while (len) {
1626 1626                  assert(iovcnt > 0);
1627 1627                  error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault);
1628 1628                  if (error || *fault)
1629 1629                          return (error);
1630 1630  
1631 1631                  off = gpa & PAGE_MASK;
1632 1632                  n = min(len, PAGE_SIZE - off);
1633 1633  
1634 1634                  va = vm_map_gpa(ctx, gpa, n);
1635 1635                  if (va == NULL)
1636 1636                          return (EFAULT);
1637 1637  
1638 1638                  iov->iov_base = va;
1639 1639                  iov->iov_len = n;
1640 1640                  iov++;
1641 1641                  iovcnt--;
1642 1642  
1643 1643                  gla += n;
1644 1644                  len -= n;
1645 1645          }
1646 1646          return (0);
1647 1647  }
1648 1648  
1649 1649  void
1650 1650  vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
1651 1651  {
1652 1652  
1653 1653          return;
1654 1654  }
1655 1655  
1656 1656  void
1657 1657  vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
1658 1658  {
1659 1659          const char *src;
1660 1660          char *dst;
1661 1661          size_t n;
1662 1662  
1663 1663          dst = vp;
1664 1664          while (len) {
1665 1665                  assert(iov->iov_len);
1666 1666                  n = min(len, iov->iov_len);
1667 1667                  src = iov->iov_base;
1668 1668                  bcopy(src, dst, n);
1669 1669  
1670 1670                  iov++;
1671 1671                  dst += n;
1672 1672                  len -= n;
1673 1673          }
1674 1674  }
1675 1675  
1676 1676  void
1677 1677  vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
1678 1678      size_t len)
1679 1679  {
1680 1680          const char *src;
1681 1681          char *dst;
1682 1682          size_t n;
1683 1683  
1684 1684          src = vp;
1685 1685          while (len) {
1686 1686                  assert(iov->iov_len);
1687 1687                  n = min(len, iov->iov_len);
1688 1688                  dst = iov->iov_base;
1689 1689                  bcopy(src, dst, n);
1690 1690  
1691 1691                  iov++;
1692 1692                  src += n;
1693 1693                  len -= n;
1694 1694          }
1695 1695  }
1696 1696  
1697 1697  static int
1698 1698  vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus)
1699 1699  {
1700 1700          struct vm_cpuset vm_cpuset;
1701 1701          int error;
1702 1702  
1703 1703          bzero(&vm_cpuset, sizeof(struct vm_cpuset));
1704 1704          vm_cpuset.which = which;
1705 1705          vm_cpuset.cpusetsize = sizeof(cpuset_t);
1706 1706          vm_cpuset.cpus = cpus;
1707 1707  
1708 1708          error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset);
1709 1709          return (error);
1710 1710  }
1711 1711  
1712 1712  int
1713 1713  vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus)
1714 1714  {
1715 1715  
1716 1716          return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus));
1717 1717  }
1718 1718  
1719 1719  int
1720 1720  vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
1721 1721  {
1722 1722  
1723 1723          return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus));
1724 1724  }
1725 1725  
1726 1726  int
1727 1727  vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
1728 1728  {
1729 1729  
1730 1730          return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
1731 1731  }
1732 1732  
1733 1733  int
1734 1734  vm_activate_cpu(struct vmctx *ctx, int vcpu)
1735 1735  {
1736 1736          struct vm_activate_cpu ac;
1737 1737          int error;
1738 1738  
1739 1739          bzero(&ac, sizeof(struct vm_activate_cpu));
1740 1740          ac.vcpuid = vcpu;
1741 1741          error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
1742 1742          return (error);
1743 1743  }
1744 1744  
1745 1745  int
1746 1746  vm_suspend_cpu(struct vmctx *ctx, int vcpu)
1747 1747  {
1748 1748          struct vm_activate_cpu ac;
1749 1749          int error;
1750 1750  
1751 1751          bzero(&ac, sizeof(struct vm_activate_cpu));
1752 1752          ac.vcpuid = vcpu;
1753 1753          error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
1754 1754          return (error);
1755 1755  }
1756 1756  
1757 1757  int
1758 1758  vm_resume_cpu(struct vmctx *ctx, int vcpu)
1759 1759  {
1760 1760          struct vm_activate_cpu ac;
1761 1761          int error;
1762 1762  
1763 1763          bzero(&ac, sizeof(struct vm_activate_cpu));
1764 1764          ac.vcpuid = vcpu;
1765 1765          error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
1766 1766          return (error);
1767 1767  }
1768 1768  
1769 1769  int
1770 1770  vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
1771 1771  {
1772 1772          struct vm_intinfo vmii;
1773 1773          int error;
1774 1774  
1775 1775          bzero(&vmii, sizeof(struct vm_intinfo));
1776 1776          vmii.vcpuid = vcpu;
1777 1777          error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
1778 1778          if (error == 0) {
1779 1779                  *info1 = vmii.info1;
1780 1780                  *info2 = vmii.info2;
1781 1781          }
1782 1782          return (error);
1783 1783  }
1784 1784  
1785 1785  int
1786 1786  vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
1787 1787  {
1788 1788          struct vm_intinfo vmii;
1789 1789          int error;
1790 1790  
1791 1791          bzero(&vmii, sizeof(struct vm_intinfo));
1792 1792          vmii.vcpuid = vcpu;
1793 1793          vmii.info1 = info1;
1794 1794          error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
1795 1795          return (error);
1796 1796  }
1797 1797  
1798 1798  int
1799 1799  vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
1800 1800  {
1801 1801          struct vm_rtc_data rtcdata;
1802 1802          int error;
1803 1803  
1804 1804          bzero(&rtcdata, sizeof(struct vm_rtc_data));
1805 1805          rtcdata.offset = offset;
1806 1806          rtcdata.value = value;
1807 1807          error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
1808 1808          return (error);
1809 1809  }
1810 1810  
1811 1811  int
1812 1812  vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
1813 1813  {
1814 1814          struct vm_rtc_data rtcdata;
1815 1815          int error;
1816 1816  
1817 1817          bzero(&rtcdata, sizeof(struct vm_rtc_data));
1818 1818          rtcdata.offset = offset;
1819 1819          error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
1820 1820          if (error == 0)
1821 1821                  *retval = rtcdata.value;
1822 1822          return (error);
1823 1823  }
1824 1824  
1825 1825  int
1826 1826  vm_rtc_settime(struct vmctx *ctx, time_t secs)
1827 1827  {
1828 1828          struct vm_rtc_time rtctime;
1829 1829          int error;
1830 1830  
1831 1831          bzero(&rtctime, sizeof(struct vm_rtc_time));
1832 1832          rtctime.secs = secs;
1833 1833          error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
1834 1834          return (error);
1835 1835  }
1836 1836  
1837 1837  int
1838 1838  vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
1839 1839  {
1840 1840          struct vm_rtc_time rtctime;
1841 1841          int error;
1842 1842  
1843 1843          bzero(&rtctime, sizeof(struct vm_rtc_time));
1844 1844          error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
1845 1845          if (error == 0)
1846 1846                  *secs = rtctime.secs;
1847 1847          return (error);
1848 1848  }
1849 1849  
1850 1850  int
1851 1851  vm_restart_instruction(void *arg, int vcpu)
1852 1852  {
1853 1853          struct vmctx *ctx = arg;
1854 1854  
1855 1855          return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
1856 1856  }
1857 1857  
1858 1858  int
1859 1859  vm_set_topology(struct vmctx *ctx,
1860 1860      uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
1861 1861  {
1862 1862          struct vm_cpu_topology topology;
1863 1863  
1864 1864          bzero(&topology, sizeof (struct vm_cpu_topology));
1865 1865          topology.sockets = sockets;
1866 1866          topology.cores = cores;
1867 1867          topology.threads = threads;
1868 1868          topology.maxcpus = maxcpus;
1869 1869          return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
1870 1870  }
1871 1871  
1872 1872  int
1873 1873  vm_get_topology(struct vmctx *ctx,
1874 1874      uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
1875 1875  {
1876 1876          struct vm_cpu_topology topology;
1877 1877          int error;
1878 1878  
1879 1879          bzero(&topology, sizeof (struct vm_cpu_topology));
1880 1880          error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
1881 1881          if (error == 0) {
1882 1882                  *sockets = topology.sockets;
1883 1883                  *cores = topology.cores;
1884 1884                  *threads = topology.threads;
1885 1885                  *maxcpus = topology.maxcpus;
1886 1886          }
1887 1887          return (error);
1888 1888  }
1889 1889  
1890 1890  int
1891 1891  vm_get_device_fd(struct vmctx *ctx)
1892 1892  {
1893 1893  
1894 1894          return (ctx->fd);
1895 1895  }
1896 1896  
1897 1897  #ifndef __FreeBSD__
1898 1898  int
1899 1899  vm_pmtmr_set_location(struct vmctx *ctx, uint16_t ioport)
1900 1900  {
1901 1901          return (ioctl(ctx->fd, VM_PMTMR_LOCATE, ioport));
1902 1902  }
1903 1903  
1904 1904  int
1905 1905  vm_wrlock_cycle(struct vmctx *ctx)
1906 1906  {
1907 1907          if (ioctl(ctx->fd, VM_WRLOCK_CYCLE, 0) != 0) {
1908 1908                  return (errno);
1909 1909          }
1910 1910          return (0);
1911 1911  }
1912 1912  
1913 1913  int
1914 1914  vm_get_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state *state,
1915 1915      uint8_t *sipi_vector)
1916 1916  {
1917 1917          struct vm_run_state data;
1918 1918  
1919 1919          data.vcpuid = vcpu;
1920 1920          if (ioctl(ctx->fd, VM_GET_RUN_STATE, &data) != 0) {
1921 1921                  return (errno);
1922 1922          }
1923 1923  
1924 1924          *state = data.state;
1925 1925          *sipi_vector = data.sipi_vector;
1926 1926          return (0);
1927 1927  }
1928 1928  
1929 1929  int
1930 1930  vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state,
1931 1931      uint8_t sipi_vector)
1932 1932  {
1933 1933          struct vm_run_state data;
1934 1934

↓ open down ↓

1934 lines elided

↑ open up ↑

1935 1935          data.vcpuid = vcpu;
1936 1936          data.state = state;
1937 1937          data.sipi_vector = sipi_vector;
1938 1938          if (ioctl(ctx->fd, VM_SET_RUN_STATE, &data) != 0) {
1939 1939                  return (errno);
1940 1940          }
1941 1941  
1942 1942          return (0);
1943 1943  }
1944 1944  
1945      -int
1946      -vm_arc_resv(struct vmctx *ctx, size_t len)
1947      -{
1948      -        if (ioctl(ctx->fd, VM_ARC_RESV, (uint64_t)len) != 0) {
1949      -                return (errno);
1950      -        }
1951      -        return (0);
1952      -}
1953 1945  #endif /* __FreeBSD__ */
1954 1946  
1955 1947  #ifdef __FreeBSD__
1956 1948  const cap_ioctl_t *
1957 1949  vm_get_ioctls(size_t *len)
1958 1950  {
1959 1951          cap_ioctl_t *cmds;
1960 1952          /* keep in sync with machine/vmm_dev.h */
1961 1953          static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
1962 1954              VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,

1963 1955              VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER,
1964 1956              VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
1965 1957              VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
1966 1958              VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV,
1967 1959              VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
1968 1960              VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
1969 1961              VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
1970 1962              VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
1971 1963              VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
1972 1964              VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
1973 1965              VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX,
1974 1966              VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
1975 1967              VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
1976 1968              VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
1977 1969              VM_GLA2GPA_NOFAULT,
1978 1970              VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
1979 1971              VM_SET_INTINFO, VM_GET_INTINFO,
1980 1972              VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
1981 1973              VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
1982 1974  
1983 1975          if (len == NULL) {
1984 1976                  cmds = malloc(sizeof(vm_ioctl_cmds));
1985 1977                  if (cmds == NULL)
1986 1978                          return (NULL);
1987 1979                  bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds));
1988 1980                  return (cmds);
1989 1981          }
1990 1982  
1991 1983          *len = nitems(vm_ioctl_cmds);
1992 1984          return (NULL);
1993 1985  }
1994 1986  #endif /* __FreeBSD__ */

↓ open down ↓

32 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX