Print this page
    
OS-5192 need faster clock_gettime
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Ryan Zezeski <ryan@zinascii.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/i86xpv/os/xpv_timestamp.c
          +++ new/usr/src/uts/i86xpv/os/xpv_timestamp.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
       25 + * Copyright 2016 Joyent, Inc.
  25   26   */
  26   27  
  27   28  #include <sys/types.h>
  28   29  #include <sys/clock.h>
  29   30  #include <sys/panic.h>
  30   31  #include <sys/atomic.h>
  31   32  #include <sys/hypervisor.h>
  32   33  
  33   34  #include <sys/archsystm.h>
  34   35  
  35   36  /*
  36   37   * On the hypervisor, we have a virtualized system time based upon the
  37   38   * information provided for each VCPU, which is updated every time it is
  38   39   * scheduled onto a real CPU.  Thus, none of the traditional code in
  39   40   * i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
  40   41   * the PSM, and there is no scaling step to apply.
  41   42   *
  42   43   * However, the platform does not guarantee monotonicity; thus we have to fake
  43   44   * this up, which is a deeply unpleasant thing to have to do.
  44   45   *
  45   46   * Note that the virtualized interface still relies on the current TSC to
  46   47   * calculate the time in nanoseconds since the VCPU was scheduled, and is thus
  47   48   * subject to all the problems with that.  For the most part, the hypervisor is
  48   49   * supposed to deal with them.
  49   50   *
  50   51   * Another wrinkle involves suspend/resume/migration.  If we come back and time
  51   52   * is apparently less, we may have resumed on a different machine or on the
  52   53   * same machine after a reboot.  In this case we need to maintain an addend to
  53   54   * ensure time continues reasonably.  Otherwise we could end up taking a very
  54   55   * long time to expire cyclics in the heap.  Thus we have two functions:
  55   56   *
  56   57   * xpv_getsystime()
  57   58   *
  58   59   *      The unadulterated system time from the hypervisor.  This is only to be
  59   60   *      used when programming the hypervisor (setting a timer or calculating
  60   61   *      the TOD).
  61   62   *
  62   63   * xpv_gethrtime()
  63   64   *
  
    | 
      ↓ open down ↓ | 
    29 lines elided | 
    
      ↑ open up ↑ | 
  
  64   65   *      This is the monotonic hrtime counter to be used by everything else such
  65   66   *      as the cyclic subsystem.  We should never pass an hrtime directly into
  66   67   *      a hypervisor interface, as hrtime_addend may well be non-zero.
  67   68   */
  68   69  
  69   70  int hrtime_fake_mt = 1;
  70   71  static volatile hrtime_t hrtime_last;
  71   72  static hrtime_t hrtime_suspend_time;
  72   73  static hrtime_t hrtime_addend;
  73   74  
       75 +volatile uint32_t hres_lock;
       76 +hrtime_t hres_last_tick;
       77 +int64_t hrestime_adj;
       78 +volatile timestruc_t hrestime;
       79 +
  74   80  /*
  75   81   * These functions are used in DTrace probe context, and must be removed from
  76   82   * fbt consideration.  Currently fbt ignores all weak symbols, so this will
  77   83   * achieve that.
  78   84   */
  79   85  #pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
  80   86  #pragma weak xpv_getsystime = dtrace_xpv_getsystime
  81   87  #pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
  82   88  #pragma weak tsc_read = dtrace_xpv_gethrtime
  83   89  
  84   90  hrtime_t
  85   91  dtrace_xpv_getsystime(void)
  86   92  {
  87   93          vcpu_time_info_t *src;
  88   94          vcpu_time_info_t __vti, *dst = &__vti;
  89   95          uint64_t tsc_delta;
  90   96          uint64_t tsc;
  91   97          hrtime_t result;
  92   98          uint32_t stamp;
  93   99  
  94  100          src = &CPU->cpu_m.mcpu_vcpu_info->time;
  95  101  
  96  102          /*
  97  103           * Loop until version has not been changed during our update, and a Xen
  98  104           * update is not under way (lowest bit is set).
  99  105           */
 100  106          do {
 101  107                  dst->version = src->version;
 102  108                  stamp = CPU->cpu_m.mcpu_istamp;
 103  109  
 104  110                  membar_consumer();
 105  111  
 106  112                  dst->tsc_timestamp = src->tsc_timestamp;
 107  113                  dst->system_time = src->system_time;
 108  114                  dst->tsc_to_system_mul = src->tsc_to_system_mul;
 109  115                  dst->tsc_shift = src->tsc_shift;
 110  116  
 111  117                  /*
 112  118                   * Note that this use of the -actual- TSC register
 113  119                   * should probably be the SOLE one in the system on this
 114  120                   * paravirtualized platform.
 115  121                   */
 116  122                  tsc = __rdtsc_insn();
 117  123                  tsc_delta = tsc - dst->tsc_timestamp;
 118  124  
 119  125                  membar_consumer();
 120  126  
 121  127          } while (((src->version & 1) | (dst->version ^ src->version)) ||
 122  128              CPU->cpu_m.mcpu_istamp != stamp);
 123  129  
 124  130          if (dst->tsc_shift >= 0)
 125  131                  tsc_delta <<= dst->tsc_shift;
 126  132          else if (dst->tsc_shift < 0)
 127  133                  tsc_delta >>= -dst->tsc_shift;
 128  134  
 129  135          result = dst->system_time +
 130  136              ((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
 131  137  
 132  138          return (result);
 133  139  }
 134  140  
 135  141  hrtime_t
 136  142  dtrace_xpv_gethrtime(void)
 137  143  {
 138  144          hrtime_t result = xpv_getsystime() + hrtime_addend;
 139  145  
 140  146          if (hrtime_fake_mt) {
 141  147                  hrtime_t last;
 142  148                  do {
 143  149                          last = hrtime_last;
 144  150                          if (result < last)
 145  151                                  result = last + 1;
 146  152                  } while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
 147  153                      last, result) != last);
 148  154          }
 149  155  
 150  156          return (result);
 151  157  }
 152  158  
 153  159  void
 154  160  xpv_time_suspend(void)
 155  161  {
 156  162          hrtime_suspend_time = xpv_getsystime();
 157  163  }
 158  164  
 159  165  void
 160  166  xpv_time_resume(void)
 161  167  {
 162  168          hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
 163  169  
 164  170          if (delta < 0)
 165  171                  hrtime_addend += -delta;
 166  172  }
  
    | 
      ↓ open down ↓ | 
    83 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX