Print this page
OS-5192 need faster clock_gettime
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Ryan Zezeski <ryan@zinascii.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/timestamp.c
          +++ new/usr/src/uts/i86pc/os/timestamp.c
↓ open down ↓ 17 lines elided ↑ open up ↑
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   *
  26   26   * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  27   27   * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
       28 + * Copyright 2016 Joyent, Inc.
  28   29   */
  29   30  
  30   31  #include <sys/types.h>
  31   32  #include <sys/param.h>
  32   33  #include <sys/systm.h>
  33   34  #include <sys/disp.h>
  34   35  #include <sys/var.h>
  35   36  #include <sys/cmn_err.h>
  36   37  #include <sys/debug.h>
  37   38  #include <sys/x86_archext.h>
↓ open down ↓ 2 lines elided ↑ open up ↑
  40   41  #include <sys/psm_defs.h>
  41   42  #include <sys/clock.h>
  42   43  #include <sys/atomic.h>
  43   44  #include <sys/lockstat.h>
  44   45  #include <sys/smp_impldefs.h>
  45   46  #include <sys/dtrace.h>
  46   47  #include <sys/time.h>
  47   48  #include <sys/panic.h>
  48   49  #include <sys/cpu.h>
  49   50  #include <sys/sdt.h>
       51 +#include <sys/comm_page.h>
  50   52  
  51   53  /*
  52   54   * Using the Pentium's TSC register for gethrtime()
  53   55   * ------------------------------------------------
  54   56   *
  55   57   * The Pentium family, like many chip architectures, has a high-resolution
  56   58   * timestamp counter ("TSC") which increments once per CPU cycle.  The contents
  57   59   * of the timestamp counter are read with the RDTSC instruction.
  58   60   *
  59   61   * As with its UltraSPARC equivalent (the %tick register), TSC's cycle count
↓ open down ↓ 32 lines elided ↑ open up ↑
  92   94   * kept in sync.  We implement this by having a routine, tsc_tick(), which
  93   95   * is called once per second from the interrupt which drives time-of-day.
  94   96   *
  95   97   * Note that the hrtime base for gethrtime, tsc_hrtime_base, is modified
  96   98   * atomically with nsec_scale under CLOCK_LOCK.  This assures that time
  97   99   * monotonically increases.
  98  100   */
  99  101  
 100  102  #define NSEC_SHIFT 5
 101  103  
 102      -static uint_t nsec_scale;
 103  104  static uint_t nsec_unscale;
 104  105  
 105  106  /*
 106  107   * These two variables used to be grouped together inside of a structure that
 107  108   * lived on a single cache line. A regression (bug ID 4623398) caused the
 108  109   * compiler to emit code that "optimized" away the while-loops below. The
 109  110   * result was that no synchronization between the onlining and onlined CPUs
 110  111   * took place.
 111  112   */
 112  113  static volatile int tsc_ready;
↓ open down ↓ 20 lines elided ↑ open up ↑
 133  134  }
 134  135  
 135  136  #define TSC_CONVERT(tsc, hrt, scale) {                  \
 136  137          unsigned int *_l = (unsigned int *)&(tsc);      \
 137  138          (hrt) = mul32(_l[1], scale) << NSEC_SHIFT;      \
 138  139          (hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \
 139  140  }
 140  141  
 141  142  int tsc_master_slave_sync_needed = 1;
 142  143  
 143      -static int      tsc_max_delta;
 144      -static hrtime_t tsc_sync_tick_delta[NCPU];
 145  144  typedef struct tsc_sync {
 146  145          volatile hrtime_t master_tsc, slave_tsc;
 147  146  } tsc_sync_t;
 148  147  static tsc_sync_t *tscp;
 149  148  static hrtime_t largest_tsc_delta = 0;
 150  149  static ulong_t shortest_write_time = ~0UL;
 151  150  
 152      -static hrtime_t tsc_last = 0;
 153  151  static hrtime_t tsc_last_jumped = 0;
 154      -static hrtime_t tsc_hrtime_base = 0;
 155  152  static int      tsc_jumped = 0;
 156  153  static uint32_t tsc_wayback = 0;
 157  154  /*
 158  155   * The cap of 1 second was chosen since it is the frequency at which the
 159  156   * tsc_tick() function runs which means that when gethrtime() is called it
 160  157   * should never be more than 1 second since tsc_last was updated.
 161  158   */
 162      -static hrtime_t tsc_resume_cap;
 163  159  static hrtime_t tsc_resume_cap_ns = NANOSEC;     /* 1s */
 164  160  
 165  161  static hrtime_t shadow_tsc_hrtime_base;
 166  162  static hrtime_t shadow_tsc_last;
 167  163  static uint_t   shadow_nsec_scale;
 168  164  static uint32_t shadow_hres_lock;
 169  165  int get_tsc_ready();
 170  166  
 171  167  static inline
 172  168  hrtime_t tsc_protect(hrtime_t a) {
↓ open down ↓ 366 lines elided ↑ open up ↑
 539  535                  largest_tsc_delta = tdelta;
 540  536          if (min_write_time < shortest_write_time)
 541  537                  shortest_write_time = min_write_time;
 542  538          /*
 543  539           * Enable delta variants of tsc functions if the largest of all chosen
 544  540           * deltas is > smallest of the write time.
 545  541           */
 546  542          if (largest_tsc_delta > shortest_write_time) {
 547  543                  gethrtimef = tsc_gethrtime_delta;
 548  544                  gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
      545 +                tsc_ncpu = NCPU;
 549  546          }
 550  547          restore_int_flag(flags);
 551  548  }
 552  549  
 553  550  /*
 554  551   * Called by a CPU which has just been onlined.  It is expected that the CPU
 555  552   * performing the online operation will call tsc_sync_master().
 556  553   *
 557  554   * TSC sync is disabled in the context of virtualization. See comments
 558  555   * above tsc_sync_master.
↓ open down ↓ 122 lines elided ↑ open up ↑
 681  678          (void) tsc_gethrtime();
 682  679          tsc_max_delta = tsc_read() - tsc;
 683  680          restore_int_flag(flags);
 684  681          gethrtimef = tsc_gethrtime;
 685  682          gethrtimeunscaledf = tsc_gethrtimeunscaled;
 686  683          scalehrtimef = tsc_scalehrtime;
 687  684          unscalehrtimef = tsc_unscalehrtime;
 688  685          hrtime_tick = tsc_tick;
 689  686          gethrtime_hires = 1;
 690  687          /*
      688 +         * Being part of the comm page, tsc_ncpu communicates the published
      689 +         * length of the tsc_sync_tick_delta array.  This is kept zeroed to
      690 +         * ignore the absent delta data while the TSCs are synced.
      691 +         */
      692 +        tsc_ncpu = 0;
      693 +        /*
 691  694           * Allocate memory for the structure used in the tsc sync logic.
 692  695           * This structure should be aligned on a multiple of cache line size.
 693  696           */
 694  697          tscp = kmem_zalloc(PAGESIZE, KM_SLEEP);
 695  698  
 696  699          /*
 697  700           * Convert the TSC resume cap ns value into its unscaled TSC value.
 698  701           * See tsc_gethrtime().
 699  702           */
 700  703          if (tsc_resume_cap == 0)
↓ open down ↓ 18 lines elided ↑ open up ↑
 719  722  tsc_adjust_delta(hrtime_t tdelta)
 720  723  {
 721  724          int             i;
 722  725  
 723  726          for (i = 0; i < NCPU; i++) {
 724  727                  tsc_sync_tick_delta[i] += tdelta;
 725  728          }
 726  729  
 727  730          gethrtimef = tsc_gethrtime_delta;
 728  731          gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
      732 +        tsc_ncpu = NCPU;
 729  733  }
 730  734  
 731  735  /*
 732  736   * Functions to manage TSC and high-res time on suspend and resume.
 733  737   */
 734  738  
 735  739  /*
 736  740   * declarations needed for time adjustment
 737  741   */
 738  742  extern void     rtcsync(void);
↓ open down ↓ 123 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX