1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/clock.h>
  29 #include <sys/panic.h>
  30 #include <sys/atomic.h>
  31 #include <sys/hypervisor.h>
  32 
  33 #include <sys/archsystm.h>
  34 
  35 /*
  36  * On the hypervisor, we have a virtualized system time based upon the
  37  * information provided for each VCPU, which is updated every time it is
  38  * scheduled onto a real CPU.  Thus, none of the traditional code in
  39  * i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
  40  * the PSM, and there is no scaling step to apply.
  41  *
  42  * However, the platform does not guarantee monotonicity; thus we have to fake
  43  * this up, which is a deeply unpleasant thing to have to do.
  44  *
  45  * Note that the virtualized interface still relies on the current TSC to
  46  * calculate the time in nanoseconds since the VCPU was scheduled, and is thus
  47  * subject to all the problems with that.  For the most part, the hypervisor is
  48  * supposed to deal with them.
  49  *
  50  * Another wrinkle involves suspend/resume/migration.  If we come back and time
  51  * is apparently less, we may have resumed on a different machine or on the
  52  * same machine after a reboot.  In this case we need to maintain an addend to
  53  * ensure time continues reasonably.  Otherwise we could end up taking a very
  54  * long time to expire cyclics in the heap.  Thus we have two functions:
  55  *
  56  * xpv_getsystime()
  57  *
  58  *      The unadulterated system time from the hypervisor.  This is only to be
  59  *      used when programming the hypervisor (setting a timer or calculating
  60  *      the TOD).
  61  *
  62  * xpv_gethrtime()
  63  *
  64  *      This is the monotonic hrtime counter to be used by everything else such
  65  *      as the cyclic subsystem.  We should never pass an hrtime directly into
  66  *      a hypervisor interface, as hrtime_addend may well be non-zero.
  67  */
  68 
  69 int hrtime_fake_mt = 1;
  70 static volatile hrtime_t hrtime_last;
  71 static hrtime_t hrtime_suspend_time;
  72 static hrtime_t hrtime_addend;
  73 
  74 /*
  75  * These functions are used in DTrace probe context, and must be removed from
  76  * fbt consideration.  Currently fbt ignores all weak symbols, so this will
  77  * achieve that.
  78  */
  79 #pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
  80 #pragma weak xpv_getsystime = dtrace_xpv_getsystime
  81 #pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
  82 #pragma weak tsc_read = dtrace_xpv_gethrtime
  83 
  84 hrtime_t
  85 dtrace_xpv_getsystime(void)
  86 {
  87         vcpu_time_info_t *src;
  88         vcpu_time_info_t __vti, *dst = &__vti;
  89         uint64_t tsc_delta;
  90         uint64_t tsc;
  91         hrtime_t result;
  92         uint32_t stamp;
  93 
  94         src = &CPU->cpu_m.mcpu_vcpu_info->time;
  95 
  96         /*
  97          * Loop until version has not been changed during our update, and a Xen
  98          * update is not under way (lowest bit is set).
  99          */
 100         do {
 101                 dst->version = src->version;
 102                 stamp = CPU->cpu_m.mcpu_istamp;
 103 
 104                 membar_consumer();
 105 
 106                 dst->tsc_timestamp = src->tsc_timestamp;
 107                 dst->system_time = src->system_time;
 108                 dst->tsc_to_system_mul = src->tsc_to_system_mul;
 109                 dst->tsc_shift = src->tsc_shift;
 110 
 111                 /*
 112                  * Note that this use of the -actual- TSC register
 113                  * should probably be the SOLE one in the system on this
 114                  * paravirtualized platform.
 115                  */
 116                 tsc = __rdtsc_insn();
 117                 tsc_delta = tsc - dst->tsc_timestamp;
 118 
 119                 membar_consumer();
 120 
 121         } while (((src->version & 1) | (dst->version ^ src->version)) ||
 122             CPU->cpu_m.mcpu_istamp != stamp);
 123 
 124         if (dst->tsc_shift >= 0)
 125                 tsc_delta <<= dst->tsc_shift;
 126         else if (dst->tsc_shift < 0)
 127                 tsc_delta >>= -dst->tsc_shift;
 128 
 129         result = dst->system_time +
 130             ((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
 131 
 132         return (result);
 133 }
 134 
 135 hrtime_t
 136 dtrace_xpv_gethrtime(void)
 137 {
 138         hrtime_t result = xpv_getsystime() + hrtime_addend;
 139 
 140         if (hrtime_fake_mt) {
 141                 hrtime_t last;
 142                 do {
 143                         last = hrtime_last;
 144                         if (result < last)
 145                                 result = last + 1;
 146                 } while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
 147                     last, result) != last);
 148         }
 149 
 150         return (result);
 151 }
 152 
 153 void
 154 xpv_time_suspend(void)
 155 {
 156         hrtime_suspend_time = xpv_getsystime();
 157 }
 158 
 159 void
 160 xpv_time_resume(void)
 161 {
 162         hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
 163 
 164         if (delta < 0)
 165                 hrtime_addend += -delta;
 166 }