Print this page
    
re #13140 rb4270 hvm_sd module missing dependencies on scsi and cmlb
re #13166 rb4270 Check for Xen HVM even if CPUID signature returns Microsoft Hv
re #13187 rb4270 Fix Xen HVM related warnings
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c
          +++ new/usr/src/uts/i86pc/i86hvm/io/xpv/xpv_support.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  #include <sys/modctl.h>
  27   27  #include <sys/types.h>
  28   28  #include <sys/archsystm.h>
  29   29  #include <sys/machsystm.h>
  30   30  #include <sys/sunndi.h>
  31   31  #include <sys/sunddi.h>
  32   32  #include <sys/ddi_subrdefs.h>
  33   33  #include <sys/xpv_support.h>
  34   34  #include <sys/xen_errno.h>
  35   35  #include <sys/hypervisor.h>
  36   36  #include <sys/gnttab.h>
  37   37  #include <sys/xenbus_comms.h>
  38   38  #include <sys/xenbus_impl.h>
  39   39  #include <xen/sys/xendev.h>
  40   40  #include <sys/sysmacros.h>
  41   41  #include <sys/x86_archext.h>
  42   42  #include <sys/mman.h>
  43   43  #include <sys/stat.h>
  44   44  #include <sys/conf.h>
  45   45  #include <sys/devops.h>
  46   46  #include <sys/pc_mmu.h>
  47   47  #include <sys/cmn_err.h>
  48   48  #include <sys/cpr.h>
  49   49  #include <sys/ddi.h>
  50   50  #include <vm/seg_kmem.h>
  51   51  #include <vm/as.h>
  52   52  #include <vm/hat_pte.h>
  53   53  #include <vm/hat_i86.h>
  54   54  
  55   55  #define XPV_MINOR 0
  56   56  #define XPV_BUFSIZE 128
  57   57  
  58   58  /* virtual addr for the store_mfn page */
  59   59  caddr_t xb_addr;
  60   60  
  61   61  dev_info_t *xpv_dip;
  62   62  static dev_info_t *xpvd_dip;
  63   63  
  64   64  #ifdef DEBUG
  65   65  int xen_suspend_debug;
  66   66  
  67   67  #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf
  68   68  #else
  69   69  #define SUSPEND_DEBUG(...)
  70   70  #endif
  71   71  
  72   72  /*
  73   73   * Forward declarations
  74   74   */
  75   75  static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
  76   76  static int xpv_attach(dev_info_t *, ddi_attach_cmd_t);
  77   77  static int xpv_detach(dev_info_t *, ddi_detach_cmd_t);
  78   78  static int xpv_open(dev_t *, int, int, cred_t *);
  79   79  static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
  80   80  
  81   81  static struct cb_ops xpv_cb_ops = {
  82   82          xpv_open,
  83   83          nulldev,        /* close */
  84   84          nodev,          /* strategy */
  85   85          nodev,          /* print */
  86   86          nodev,          /* dump */
  87   87          nodev,          /* read */
  88   88          nodev,          /* write */
  89   89          xpv_ioctl,      /* ioctl */
  90   90          nodev,          /* devmap */
  91   91          nodev,          /* mmap */
  92   92          nodev,          /* segmap */
  93   93          nochpoll,       /* poll */
  94   94          ddi_prop_op,
  95   95          NULL,
  96   96          D_MP,
  97   97          CB_REV,
  98   98          NULL,
  99   99          NULL
 100  100  };
 101  101  
 102  102  static struct dev_ops xpv_dv_ops = {
 103  103          DEVO_REV,
 104  104          0,
 105  105          xpv_getinfo,
 106  106          nulldev,        /* identify */
 107  107          nulldev,        /* probe */
 108  108          xpv_attach,
 109  109          xpv_detach,
 110  110          nodev,          /* reset */
 111  111          &xpv_cb_ops,
 112  112          NULL,           /* struct bus_ops */
 113  113          NULL,           /* power */
 114  114          ddi_quiesce_not_supported,      /* devo_quiesce */
 115  115  };
 116  116  
 117  117  static struct modldrv modldrv = {
 118  118          &mod_driverops,
 119  119          "xpv driver",
 120  120          &xpv_dv_ops
 121  121  };
 122  122  
 123  123  static struct modlinkage modl = {
 124  124          MODREV_1,
 125  125          {
 126  126                  (void *)&modldrv,
 127  127                  NULL            /* null termination */
 128  128          }
 129  129  };
 130  130  
 131  131  static ddi_dma_attr_t xpv_dma_attr = {
 132  132          DMA_ATTR_V0,            /* version of this structure */
 133  133          0,                      /* lowest usable address */
 134  134          0xffffffffffffffffULL,  /* highest usable address */
 135  135          0x7fffffff,             /* maximum DMAable byte count */
 136  136          MMU_PAGESIZE,           /* alignment in bytes */
 137  137          0x7ff,                  /* bitmap of burst sizes */
 138  138          1,                      /* minimum transfer */
 139  139          0xffffffffU,            /* maximum transfer */
 140  140          0x7fffffffULL,          /* maximum segment length */
 141  141          1,                      /* maximum number of segments */
 142  142          1,                      /* granularity */
 143  143          0,                      /* flags (reserved) */
 144  144  };
 145  145  
 146  146  static ddi_device_acc_attr_t xpv_accattr = {
 147  147          DDI_DEVICE_ATTR_V0,
 148  148          DDI_NEVERSWAP_ACC,
 149  149          DDI_STRICTORDER_ACC
 150  150  };
 151  151  
 152  152  #define MAX_ALLOCATIONS 10
 153  153  static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS];
 154  154  static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS];
 155  155  static int xen_alloc_cnt = 0;
 156  156  
 157  157  void *
 158  158  xen_alloc_pages(pgcnt_t cnt)
 159  159  {
 160  160          size_t len;
 161  161          int a = xen_alloc_cnt++;
 162  162          caddr_t addr;
 163  163  
 164  164          ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS);
 165  165          if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0,
 166  166              &xpv_dma_handle[a]) != DDI_SUCCESS)
 167  167                  return (NULL);
 168  168  
 169  169          if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt,
 170  170              &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0,
 171  171              &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) {
 172  172                  ddi_dma_free_handle(&xpv_dma_handle[a]);
 173  173                  cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices");
 174  174                  return (NULL);
 175  175          }
 176  176          return (addr);
 177  177  }
 178  178  
 179  179  /*
 180  180   * This function is invoked twice, first time with reprogram=0 to set up
 181  181   * the xpvd portion of the device tree. The second time it is ignored.
 182  182   */
 183  183  static void
 184  184  xpv_enumerate(int reprogram)
 185  185  {
 186  186          dev_info_t *dip;
 187  187  
 188  188          if (reprogram != 0)
 189  189                  return;
 190  190  
 191  191          ndi_devi_alloc_sleep(ddi_root_node(), "xpvd",
 192  192              (pnode_t)DEVI_SID_NODEID, &dip);
 193  193  
 194  194          (void) ndi_devi_bind_driver(dip, 0);
 195  195  
 196  196          /*
 197  197           * Too early to enumerate split device drivers in domU
 198  198           * since we need to create taskq thread during enumeration.
 199  199           * So, we only enumerate softdevs and console here.
 200  200           */
 201  201          xendev_enum_all(dip, B_TRUE);
 202  202  }
 203  203  
 204  204  /*
 205  205   * Translate a hypervisor errcode to a Solaris error code.
 206  206   */
 207  207  int
 208  208  xen_xlate_errcode(int error)
 209  209  {
 210  210  #define CASE(num)       case X_##num: error = num; break
 211  211  
 212  212          switch (-error) {
 213  213                  CASE(EPERM);    CASE(ENOENT);   CASE(ESRCH);
 214  214                  CASE(EINTR);    CASE(EIO);      CASE(ENXIO);
 215  215                  CASE(E2BIG);    CASE(ENOMEM);   CASE(EACCES);
 216  216                  CASE(EFAULT);   CASE(EBUSY);    CASE(EEXIST);
 217  217                  CASE(ENODEV);   CASE(EISDIR);   CASE(EINVAL);
 218  218                  CASE(ENOSPC);   CASE(ESPIPE);   CASE(EROFS);
 219  219                  CASE(ENOSYS);   CASE(ENOTEMPTY); CASE(EISCONN);
 220  220                  CASE(ENODATA);
 221  221                  default:
 222  222                  panic("xen_xlate_errcode: unknown error %d", error);
 223  223          }
 224  224          return (error);
 225  225  #undef CASE
 226  226  }
 227  227  
 228  228  /*PRINTFLIKE1*/
 229  229  void
 230  230  xen_printf(const char *fmt, ...)
 231  231  {
 232  232          va_list adx;
 233  233  
 234  234          va_start(adx, fmt);
 235  235          printf(fmt, adx);
 236  236          va_end(adx);
 237  237  }
 238  238  
 239  239  /*
 240  240   * Stub functions to get the FE drivers to build, and to catch drivers that
 241  241   * misbehave in HVM domains.
 242  242   */
 243  243  /*ARGSUSED*/
 244  244  void
 245  245  xen_release_pfn(pfn_t pfn)
 246  246  {
 247  247          panic("xen_release_pfn() is not supported in HVM domains");
 248  248  }
 249  249  
 250  250  /*ARGSUSED*/
 251  251  void
 252  252  reassign_pfn(pfn_t pfn, mfn_t mfn)
 253  253  {
 254  254          panic("reassign_pfn() is not supported in HVM domains");
 255  255  }
 256  256  
 257  257  /*ARGSUSED*/
 258  258  long
 259  259  balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
 260  260  {
 261  261          panic("balloon_free_pages() is not supported in HVM domains");
 262  262          return (0);
 263  263  }
 264  264  
 265  265  /*ARGSUSED*/
 266  266  void
 267  267  balloon_drv_added(int64_t delta)
 268  268  {
 269  269          panic("balloon_drv_added() is not supported in HVM domains");
 270  270  }
 271  271  
 272  272  /*
 273  273   * Add a mapping for the machine page at the given virtual address.
 274  274   */
 275  275  void
 276  276  kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
 277  277  {
 278  278          ASSERT(level == 0);
 279  279  
 280  280          hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE,
 281  281              mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD);
 282  282  }
 283  283  
 284  284  /*ARGSUSED*/
 285  285  int
 286  286  xen_map_gref(uint_t cmd, gnttab_map_grant_ref_t *mapop, uint_t count,
 287  287      boolean_t uvaddr)
 288  288  {
 289  289          long rc;
 290  290  
 291  291          ASSERT(cmd == GNTTABOP_map_grant_ref);
 292  292          rc = HYPERVISOR_grant_table_op(cmd, mapop, count);
 293  293  
 294  294          return (rc);
 295  295  }
 296  296  
 297  297  static struct xenbus_watch shutdown_watch;
 298  298  taskq_t *xen_shutdown_tq;
 299  299  
 300  300  #define SHUTDOWN_INVALID        -1
 301  301  #define SHUTDOWN_POWEROFF       0
 302  302  #define SHUTDOWN_REBOOT         1
 303  303  #define SHUTDOWN_SUSPEND        2
 304  304  #define SHUTDOWN_HALT           3
 305  305  #define SHUTDOWN_MAX            4
 306  306  
 307  307  #define SHUTDOWN_TIMEOUT_SECS (60 * 5)
 308  308  
 309  309  int
 310  310  xen_suspend_devices(dev_info_t *dip)
 311  311  {
 312  312          int error;
 313  313          char buf[XPV_BUFSIZE];
 314  314  
 315  315          SUSPEND_DEBUG("xen_suspend_devices\n");
 316  316  
 317  317          for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
 318  318                  if (xen_suspend_devices(ddi_get_child(dip)))
 319  319                          return (ENXIO);
 320  320                  if (ddi_get_driver(dip) == NULL)
 321  321                          continue;
 322  322                  SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf));
 323  323                  ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0);
 324  324  
 325  325  
 326  326                  if (!i_ddi_devi_attached(dip)) {
 327  327                          error = DDI_FAILURE;
 328  328                  } else {
 329  329                          error = devi_detach(dip, DDI_SUSPEND);
 330  330                  }
 331  331  
 332  332                  if (error == DDI_SUCCESS) {
 333  333                          DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED;
 334  334                  } else {
 335  335                          SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n",
 336  336                              ddi_deviname(dip, buf));
 337  337                          cmn_err(CE_WARN, "Unable to suspend device %s.",
 338  338                              ddi_deviname(dip, buf));
 339  339                          cmn_err(CE_WARN, "Device is busy or does not "
 340  340                              "support suspend/resume.");
 341  341                                  return (ENXIO);
 342  342                  }
 343  343          }
 344  344          return (0);
 345  345  }
 346  346  
 347  347  int
 348  348  xen_resume_devices(dev_info_t *start, int resume_failed)
 349  349  {
 350  350          dev_info_t *dip, *next, *last = NULL;
 351  351          int did_suspend;
 352  352          int error = resume_failed;
 353  353          char buf[XPV_BUFSIZE];
 354  354  
 355  355          SUSPEND_DEBUG("xen_resume_devices\n");
 356  356  
 357  357          while (last != start) {
 358  358                  dip = start;
 359  359                  next = ddi_get_next_sibling(dip);
 360  360                  while (next != last) {
 361  361                          dip = next;
 362  362                          next = ddi_get_next_sibling(dip);
 363  363                  }
 364  364  
 365  365                  /*
 366  366                   * cpr is the only one that uses this field and the device
 367  367                   * itself hasn't resumed yet, there is no need to use a
 368  368                   * lock, even though kernel threads are active by now.
 369  369                   */
 370  370                  did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED;
 371  371                  if (did_suspend)
 372  372                          DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED;
 373  373  
 374  374                  /*
 375  375                   * There may be background attaches happening on devices
 376  376                   * that were not originally suspended by cpr, so resume
 377  377                   * only devices that were suspended by cpr. Also, stop
 378  378                   * resuming after the first resume failure, but traverse
 379  379                   * the entire tree to clear the suspend flag.
 380  380                   */
 381  381                  if (did_suspend && !error) {
 382  382                          SUSPEND_DEBUG("Resuming device %s\n",
 383  383                              ddi_deviname(dip, buf));
 384  384                          /*
 385  385                           * If a device suspended by cpr gets detached during
 386  386                           * the resume process (for example, due to hotplugging)
 387  387                           * before cpr gets around to issuing it a DDI_RESUME,
 388  388                           * we'll have problems.
 389  389                           */
 390  390                          if (!i_ddi_devi_attached(dip)) {
 391  391                                  cmn_err(CE_WARN, "Skipping %s, device "
 392  392                                      "not ready for resume",
 393  393                                      ddi_deviname(dip, buf));
 394  394                          } else {
 395  395                                  if (devi_attach(dip, DDI_RESUME) !=
 396  396                                      DDI_SUCCESS) {
 397  397                                          error = ENXIO;
 398  398                                  }
 399  399                          }
 400  400                  }
 401  401  
 402  402                  if (error == ENXIO) {
 403  403                          cmn_err(CE_WARN, "Unable to resume device %s",
 404  404                              ddi_deviname(dip, buf));
 405  405                  }
 406  406  
 407  407                  error = xen_resume_devices(ddi_get_child(dip), error);
 408  408                  last = dip;
 409  409          }
 410  410  
 411  411          return (error);
 412  412  }
 413  413  
 414  414  /*ARGSUSED*/
 415  415  static int
 416  416  check_xpvd(dev_info_t *dip, void *arg)
 417  417  {
 418  418          char *name;
 419  419  
 420  420          name = ddi_node_name(dip);
 421  421          if (name == NULL || strcmp(name, "xpvd")) {
 422  422                  return (DDI_WALK_CONTINUE);
 423  423          } else {
 424  424                  xpvd_dip = dip;
 425  425                  return (DDI_WALK_TERMINATE);
 426  426          }
 427  427  }
 428  428  
 429  429  /*
 430  430   * Top level routine to direct suspend/resume of a domain.
 431  431   */
 432  432  void
 433  433  xen_suspend_domain(void)
 434  434  {
 435  435          extern void rtcsync(void);
 436  436          extern void ec_resume(void);
 437  437          extern kmutex_t ec_lock;
 438  438          struct xen_add_to_physmap xatp;
 439  439          ulong_t flags;
 440  440          int err;
 441  441  
 442  442          cmn_err(CE_NOTE, "Domain suspending for save/migrate");
 443  443  
 444  444          SUSPEND_DEBUG("xen_suspend_domain\n");
 445  445  
 446  446          /*
 447  447           * We only want to suspend the PV devices, since the emulated devices
 448  448           * are suspended by saving the emulated device state.  The PV devices
 449  449           * are all children of the xpvd nexus device.  So we search the
 450  450           * device tree for the xpvd node to use as the root of the tree to
 451  451           * be suspended.
 452  452           */
 453  453          if (xpvd_dip == NULL)
 454  454                  ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);
 455  455  
 456  456          /*
 457  457           * suspend interrupts and devices
 458  458           */
 459  459          if (xpvd_dip != NULL)
 460  460                  (void) xen_suspend_devices(ddi_get_child(xpvd_dip));
 461  461          else
 462  462                  cmn_err(CE_WARN, "No PV devices found to suspend");
 463  463          SUSPEND_DEBUG("xenbus_suspend\n");
 464  464          xenbus_suspend();
 465  465  
 466  466          mutex_enter(&cpu_lock);
 467  467  
 468  468          /*
 469  469           * Suspend on vcpu 0
 470  470           */
 471  471          thread_affinity_set(curthread, 0);
 472  472          kpreempt_disable();
 473  473  
 474  474          if (ncpus > 1)
 475  475                  pause_cpus(NULL, NULL);
 476  476          /*
 477  477           * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
 478  478           * any holder would have dropped it to get through pause_cpus().
 479  479           */
 480  480          mutex_enter(&ec_lock);
 481  481  
 482  482          /*
 483  483           * From here on in, we can't take locks.
 484  484           */
 485  485  
 486  486          flags = intr_clear();
 487  487  
 488  488          SUSPEND_DEBUG("HYPERVISOR_suspend\n");
 489  489          /*
 490  490           * At this point we suspend and sometime later resume.
 491  491           * Note that this call may return with an indication of a cancelled
 492  492           * for now no matter ehat the return we do a full resume of all
 493  493           * suspended drivers, etc.
 494  494           */
 495  495          (void) HYPERVISOR_shutdown(SHUTDOWN_suspend);
 496  496  
 497  497          /*
 498  498           * Point HYPERVISOR_shared_info to the proper place.
 499  499           */
 500  500          xatp.domid = DOMID_SELF;
 501  501          xatp.idx = 0;
 502  502          xatp.space = XENMAPSPACE_shared_info;
 503  503          xatp.gpfn = xen_shared_info_frame;
 504  504          if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
 505  505                  panic("Could not set shared_info page. error: %d", err);
 506  506  
 507  507          SUSPEND_DEBUG("gnttab_resume\n");
 508  508          gnttab_resume();
 509  509  
 510  510          SUSPEND_DEBUG("ec_resume\n");
 511  511          ec_resume();
 512  512  
 513  513          intr_restore(flags);
 514  514  
 515  515          if (ncpus > 1)
 516  516                  start_cpus();
 517  517  
 518  518          mutex_exit(&ec_lock);
 519  519          mutex_exit(&cpu_lock);
 520  520  
 521  521          /*
 522  522           * Now we can take locks again.
 523  523           */
 524  524  
 525  525          rtcsync();
 526  526  
 527  527          SUSPEND_DEBUG("xenbus_resume\n");
 528  528          xenbus_resume();
 529  529          SUSPEND_DEBUG("xen_resume_devices\n");
 530  530          if (xpvd_dip != NULL)
 531  531                  (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);
 532  532  
 533  533          thread_affinity_clear(curthread);
 534  534          kpreempt_enable();
 535  535  
 536  536          SUSPEND_DEBUG("finished xen_suspend_domain\n");
 537  537  
 538  538          cmn_err(CE_NOTE, "domain restore/migrate completed");
 539  539  }
 540  540  
 541  541  static void
 542  542  xen_dirty_shutdown(void *arg)
 543  543  {
 544  544          int cmd = (uintptr_t)arg;
 545  545  
 546  546          cmn_err(CE_WARN, "Externally requested shutdown failed or "
 547  547              "timed out.\nShutting down.\n");
 548  548  
 549  549          switch (cmd) {
 550  550          case SHUTDOWN_HALT:
 551  551          case SHUTDOWN_POWEROFF:
 552  552                  (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
 553  553                  break;
 554  554          case SHUTDOWN_REBOOT:
 555  555                  (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred);
 556  556                  break;
 557  557          }
 558  558  }
 559  559  
 560  560  static void
 561  561  xen_shutdown(void *arg)
 562  562  {
 563  563          int cmd = (uintptr_t)arg;
 564  564          proc_t *initpp;
 565  565  
 566  566          ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX);
 567  567  
 568  568          if (cmd == SHUTDOWN_SUSPEND) {
 569  569                  xen_suspend_domain();
 570  570                  return;
 571  571          }
 572  572  
 573  573          switch (cmd) {
 574  574          case SHUTDOWN_POWEROFF:
 575  575                  force_shutdown_method = AD_POWEROFF;
 576  576                  break;
 577  577          case SHUTDOWN_HALT:
 578  578                  force_shutdown_method = AD_HALT;
 579  579                  break;
 580  580          case SHUTDOWN_REBOOT:
 581  581                  force_shutdown_method = AD_BOOT;
 582  582                  break;
 583  583          }
 584  584  
 585  585  
 586  586          /*
 587  587           * If we're still booting and init(1) isn't set up yet, simply halt.
 588  588           */
 589  589          mutex_enter(&pidlock);
 590  590          initpp = prfind(P_INITPID);
 591  591          mutex_exit(&pidlock);
 592  592          if (initpp == NULL) {
 593  593                  extern void halt(char *);
 594  594                  halt("Power off the System");   /* just in case */
 595  595          }
 596  596  
 597  597          /*
 598  598           * else, graceful shutdown with inittab and all getting involved
 599  599           */
 600  600          psignal(initpp, SIGPWR);
 601  601  
 602  602          (void) timeout(xen_dirty_shutdown, arg,
 603  603              SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC));
 604  604  }
 605  605  
 606  606  /*ARGSUSED*/
 607  607  static void
 608  608  xen_shutdown_handler(struct xenbus_watch *watch, const char **vec,
 609  609          unsigned int len)
 610  610  {
 611  611          char *str;
 612  612          xenbus_transaction_t xbt;
 613  613          int err, shutdown_code = SHUTDOWN_INVALID;
 614  614          unsigned int slen;
 615  615  
 616  616  again:
 617  617          err = xenbus_transaction_start(&xbt);
 618  618          if (err)
 619  619                  return;
 620  620          if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) {
 621  621                  (void) xenbus_transaction_end(xbt, 1);
 622  622                  return;
 623  623          }
 624  624  
 625  625          SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str);
 626  626  
 627  627          /*
 628  628           * If this is a watch fired from our write below, check out early to
 629  629           * avoid an infinite loop.
 630  630           */
 631  631          if (strcmp(str, "") == 0) {
 632  632                  (void) xenbus_transaction_end(xbt, 0);
 633  633                  kmem_free(str, slen);
 634  634                  return;
 635  635          } else if (strcmp(str, "poweroff") == 0) {
 636  636                  shutdown_code = SHUTDOWN_POWEROFF;
 637  637          } else if (strcmp(str, "reboot") == 0) {
 638  638                  shutdown_code = SHUTDOWN_REBOOT;
 639  639          } else if (strcmp(str, "suspend") == 0) {
 640  640                  shutdown_code = SHUTDOWN_SUSPEND;
 641  641          } else if (strcmp(str, "halt") == 0) {
 642  642                  shutdown_code = SHUTDOWN_HALT;
 643  643          } else {
 644  644                  printf("Ignoring shutdown request: %s\n", str);
 645  645          }
 646  646  
 647  647          (void) xenbus_write(xbt, "control", "shutdown", "");
 648  648          err = xenbus_transaction_end(xbt, 0);
 649  649          if (err == EAGAIN) {
 650  650                  SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id);
 651  651                  kmem_free(str, slen);
 652  652                  goto again;
 653  653          }
 654  654  
 655  655          kmem_free(str, slen);
 656  656          if (shutdown_code != SHUTDOWN_INVALID) {
 657  657                  (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown,
 658  658                      (void *)(intptr_t)shutdown_code, 0);
 659  659          }
 660  660  }
 661  661  
 662  662  static int
 663  663  xpv_drv_init(void)
 664  664  {
 665  665          if (xpv_feature(XPVF_HYPERCALLS) < 0 ||
 666  666              xpv_feature(XPVF_SHARED_INFO) < 0)
 667  667                  return (-1);
 668  668  
 669  669          /* Set up the grant tables.  */
 670  670          gnttab_init();
 671  671  
 672  672          /* Set up event channel support */
 673  673          if (ec_init() != 0)
 674  674                  return (-1);
 675  675  
 676  676          /* Set up xenbus */
 677  677          xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
 678  678          xs_early_init();
 679  679          xs_domu_init();
 680  680  
 681  681          /* Set up for suspend/resume/migrate */
 682  682          xen_shutdown_tq = taskq_create("shutdown_taskq", 1,
 683  683              maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE);
 684  684          shutdown_watch.node = "control/shutdown";
 685  685          shutdown_watch.callback = xen_shutdown_handler;
 686  686          if (register_xenbus_watch(&shutdown_watch))
 687  687                  cmn_err(CE_WARN, "Failed to set shutdown watcher");
 688  688  
 689  689          return (0);
 690  690  }
 691  691  
 692  692  static void
 693  693  xen_pv_fini()
 694  694  {
 695  695          ec_fini();
 696  696  }
 697  697  
 698  698  /*ARGSUSED*/
 699  699  static int
 700  700  xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 701  701  {
 702  702          if (getminor((dev_t)arg) != XPV_MINOR)
 703  703                  return (DDI_FAILURE);
 704  704  
 705  705          switch (cmd) {
 706  706          case DDI_INFO_DEVT2DEVINFO:
 707  707                  *result = xpv_dip;
 708  708                  break;
 709  709          case DDI_INFO_DEVT2INSTANCE:
 710  710                  *result = 0;
 711  711                  break;
 712  712          default:
 713  713                  return (DDI_FAILURE);
 714  714          }
 715  715  
 716  716          return (DDI_SUCCESS);
 717  717  }
 718  718  
 719  719  static int
 720  720  xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 721  721  {
 722  722          if (cmd != DDI_ATTACH)
 723  723                  return (DDI_FAILURE);
 724  724  
 725  725          if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
 726  726              ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
 727  727                  return (DDI_FAILURE);
 728  728  
 729  729          xpv_dip = dip;
 730  730  
 731  731          if (xpv_drv_init() != 0)
  
    | 
      ↓ open down ↓ | 
    731 lines elided | 
    
      ↑ open up ↑ | 
  
 732  732                  return (DDI_FAILURE);
 733  733  
 734  734          ddi_report_dev(dip);
 735  735  
 736  736          /*
 737  737           * If the memscrubber attempts to scrub the pages we hand to Xen,
 738  738           * the domain will panic.
 739  739           */
 740  740          memscrub_disable();
 741  741  
 742      -        /*
 743      -         * Report our version to dom0.
 744      -         */
 745      -        if (xenbus_printf(XBT_NULL, "guest/xpv", "version", "%d",
 746      -            HVMPV_XPV_VERS))
 747      -                cmn_err(CE_WARN, "xpv: couldn't write version\n");
      742 +        /* Report our version to dom0 */
      743 +        (void) xenbus_printf(XBT_NULL, "guest/xpv", "version", "%d",
      744 +            HVMPV_XPV_VERS);
 748  745  
 749  746          return (DDI_SUCCESS);
 750  747  }
 751  748  
 752  749  /*
 753  750   * Attempts to reload the PV driver plumbing hang on Intel platforms, so
 754  751   * we don't want to unload the framework by accident.
 755  752   */
 756  753  int xpv_allow_detach = 0;
 757  754  
 758  755  static int
 759  756  xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 760  757  {
 761  758          if (cmd != DDI_DETACH || xpv_allow_detach == 0)
 762  759                  return (DDI_FAILURE);
 763  760  
 764  761          if (xpv_dip != NULL) {
 765  762                  xen_pv_fini();
 766  763                  ddi_remove_minor_node(dip, NULL);
 767  764                  xpv_dip = NULL;
 768  765          }
 769  766  
 770  767          return (DDI_SUCCESS);
 771  768  }
 772  769  
 773  770  /*ARGSUSED1*/
 774  771  static int
 775  772  xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr)
 776  773  {
 777  774          return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO);
 778  775  }
 779  776  
 780  777  /*ARGSUSED*/
 781  778  static int
 782  779  xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr,
 783  780      int *rval_p)
 784  781  {
 785  782          return (EINVAL);
 786  783  }
 787  784  
 788  785  int
 789  786  _init(void)
 790  787  {
 791  788          int err;
 792  789  
 793  790          if ((err = mod_install(&modl)) != 0)
 794  791                  return (err);
 795  792  
 796  793          impl_bus_add_probe(xpv_enumerate);
 797  794          return (0);
 798  795  }
 799  796  
 800  797  int
 801  798  _fini(void)
 802  799  {
 803  800          int err;
 804  801  
 805  802          if ((err = mod_remove(&modl)) != 0)
 806  803                  return (err);
 807  804  
 808  805          impl_bus_delete_probe(xpv_enumerate);
 809  806          return (0);
 810  807  }
 811  808  
 812  809  int
 813  810  _info(struct modinfo *modinfop)
 814  811  {
 815  812          return (mod_info(&modl, modinfop));
 816  813  }
  
    | 
      ↓ open down ↓ | 
    59 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX