1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2019 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * x86-specific routines used by the CPU Performance counter driver.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/time.h>
  32 #include <sys/atomic.h>
  33 #include <sys/regset.h>
  34 #include <sys/privregs.h>
  35 #include <sys/x86_archext.h>
  36 #include <sys/cpuvar.h>
  37 #include <sys/machcpuvar.h>
  38 #include <sys/archsystm.h>
  39 #include <sys/cpc_pcbe.h>
  40 #include <sys/cpc_impl.h>
  41 #include <sys/x_call.h>
  42 #include <sys/cmn_err.h>
  43 #include <sys/cmt.h>
  44 #include <sys/spl.h>
  45 #include <sys/apic.h>
  46 
  47 static const uint64_t allstopped = 0;
  48 static kcpc_ctx_t *(*overflow_intr_handler)(caddr_t);
  49 
  50 /* Do threads share performance monitoring hardware? */
  51 static int strands_perfmon_shared = 0;
  52 
  53 int kcpc_hw_overflow_intr_installed;            /* set by APIC code */
  54 extern kcpc_ctx_t *kcpc_overflow_intr(caddr_t arg, uint64_t bitmap);
  55 
  56 extern int kcpc_counts_include_idle; /* Project Private /etc/system variable */
  57 
  58 void (*kcpc_hw_enable_cpc_intr)(void);          /* set by APIC code */
  59 
  60 int
  61 kcpc_hw_add_ovf_intr(kcpc_ctx_t *(*handler)(caddr_t))
  62 {
  63         if (x86_type != X86_TYPE_P6)
  64                 return (0);
  65         overflow_intr_handler = handler;
  66         return (ipltospl(APIC_PCINT_IPL));
  67 }
  68 
  69 void
  70 kcpc_hw_rem_ovf_intr(void)
  71 {
  72         overflow_intr_handler = NULL;
  73 }
  74 
  75 /*
  76  * Hook used on P4 systems to catch online/offline events.
  77  */
  78 /*ARGSUSED*/
  79 static int
  80 kcpc_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
  81 {
  82         pg_cmt_t        *chip_pg;
  83         int             active_cpus_cnt;
  84 
  85         if (what != CPU_ON)
  86                 return (0);
  87 
  88         /*
  89          * If any CPU-bound contexts exist, we don't need to invalidate
  90          * anything, as no per-LWP contexts can coexist.
  91          */
  92         if (kcpc_cpuctx || dtrace_cpc_in_use)
  93                 return (0);
  94 
  95         /*
  96          * If this chip now has more than 1 active cpu, we must invalidate all
  97          * contexts in the system.
  98          */
  99         chip_pg = (pg_cmt_t *)pghw_find_pg(cpu[cpuid], PGHW_CHIP);
 100         if (chip_pg != NULL) {
 101                 active_cpus_cnt = GROUP_SIZE(&chip_pg->cmt_cpus_actv);
 102                 if (active_cpus_cnt > 1)
 103                         kcpc_invalidate_all();
 104         }
 105 
 106         return (0);
 107 }
 108 
 109 static kmutex_t cpu_setup_lock; /* protects setup_registered */
 110 static int setup_registered;
 111 
 112 
 113 void
 114 kcpc_hw_init(cpu_t *cp)
 115 {
 116         kthread_t *t = cp->cpu_idle_thread;
 117         uint32_t versionid;
 118         struct cpuid_regs cpuid;
 119 
 120         strands_perfmon_shared = 0;
 121         if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
 122                 if (cpuid_getvendor(cpu[0]) == X86_VENDOR_Intel) {
 123                         /*
 124                          * Intel processors that support Architectural
 125                          * Performance Monitoring Version 3 have per strand
 126                          * performance monitoring hardware.
 127                          * Hence we can allow use of performance counters on
 128                          * multiple strands on the same core simultaneously.
 129                          */
 130                         cpuid.cp_eax = 0x0;
 131                         (void) __cpuid_insn(&cpuid);
 132                         if (cpuid.cp_eax < 0xa) {
 133                                 strands_perfmon_shared = 1;
 134                         } else {
 135                                 cpuid.cp_eax = 0xa;
 136                                 (void) __cpuid_insn(&cpuid);
 137 
 138                                 versionid = cpuid.cp_eax & 0xFF;
 139                                 if (versionid < 3) {
 140                                         strands_perfmon_shared = 1;
 141                                 }
 142                         }
 143                 } else if (cpuid_getvendor(cpu[0]) == X86_VENDOR_AMD ||
 144                     cpuid_getvendor(cpu[0]) == X86_VENDOR_HYGON) {
 145                         /*
 146                          * On AMD systems with HT, all of the performance
 147                          * monitors exist on a per-logical CPU basis.
 148                          */
 149                         strands_perfmon_shared = 0;
 150                 } else {
 151                         strands_perfmon_shared = 1;
 152                 }
 153         }
 154 
 155         if (strands_perfmon_shared) {
 156                 mutex_enter(&cpu_setup_lock);
 157                 if (setup_registered == 0) {
 158                         mutex_enter(&cpu_lock);
 159                         register_cpu_setup_func(kcpc_cpu_setup, NULL);
 160                         mutex_exit(&cpu_lock);
 161                         setup_registered = 1;
 162                 }
 163                 mutex_exit(&cpu_setup_lock);
 164         }
 165 
 166         mutex_init(&cp->cpu_cpc_ctxlock, "cpu_cpc_ctxlock", MUTEX_DEFAULT, 0);
 167 
 168         if (kcpc_counts_include_idle)
 169                 return;
 170 
 171         installctx(t, cp, kcpc_idle_save, kcpc_idle_restore,
 172             NULL, NULL, NULL, NULL);
 173 }
 174 
 175 void
 176 kcpc_hw_fini(cpu_t *cp)
 177 {
 178         ASSERT(cp->cpu_idle_thread == NULL);
 179 
 180         mutex_destroy(&cp->cpu_cpc_ctxlock);
 181 }
 182 
 183 #define BITS(v, u, l)   \
 184         (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
 185 
 186 #define PCBE_NAMELEN 30 /* Enough Room for pcbe.manuf.model.family.stepping */
 187 
 188 /*
 189  * Examine the processor and load an appropriate PCBE.
 190  */
 191 int
 192 kcpc_hw_load_pcbe(void)
 193 {
 194         return (kcpc_pcbe_tryload(cpuid_getvendorstr(CPU), cpuid_getfamily(CPU),
 195             cpuid_getmodel(CPU), cpuid_getstep(CPU)));
 196 }
 197 
 198 /*
 199  * Called by the generic framework to check if it's OK to bind a set to a CPU.
 200  */
 201 int
 202 kcpc_hw_cpu_hook(processorid_t cpuid, ulong_t *kcpc_cpumap)
 203 {
 204         cpu_t           *cpu, *p;
 205         pg_t            *chip_pg;
 206         pg_cpu_itr_t    itr;
 207 
 208         if (!strands_perfmon_shared)
 209                 return (0);
 210 
 211         /*
 212          * Only one logical CPU on each Pentium 4 HT CPU may be bound to at
 213          * once.
 214          *
 215          * This loop is protected by holding cpu_lock, in order to properly
 216          * access the cpu_t of the desired cpu.
 217          */
 218         mutex_enter(&cpu_lock);
 219         if ((cpu = cpu_get(cpuid)) == NULL) {
 220                 mutex_exit(&cpu_lock);
 221                 return (-1);
 222         }
 223 
 224         chip_pg = (pg_t *)pghw_find_pg(cpu, PGHW_CHIP);
 225 
 226         PG_CPU_ITR_INIT(chip_pg, itr);
 227         while ((p = pg_cpu_next(&itr)) != NULL) {
 228                 if (p == cpu)
 229                         continue;
 230                 if (BT_TEST(kcpc_cpumap, p->cpu_id)) {
 231                         mutex_exit(&cpu_lock);
 232                         return (-1);
 233                 }
 234         }
 235 
 236         mutex_exit(&cpu_lock);
 237         return (0);
 238 }
 239 
 240 /*
 241  * Called by the generic framework to check if it's OK to bind a set to an LWP.
 242  */
 243 int
 244 kcpc_hw_lwp_hook(void)
 245 {
 246         pg_cmt_t        *chip;
 247         group_t         *chips;
 248         group_iter_t    i;
 249 
 250         if (!strands_perfmon_shared)
 251                 return (0);
 252 
 253         /*
 254          * Only one CPU per chip may be online.
 255          */
 256         mutex_enter(&cpu_lock);
 257 
 258         chips = pghw_set_lookup(PGHW_CHIP);
 259         if (chips == NULL) {
 260                 mutex_exit(&cpu_lock);
 261                 return (0);
 262         }
 263 
 264         group_iter_init(&i);
 265         while ((chip = group_iterate(chips, &i)) != NULL) {
 266                 if (GROUP_SIZE(&chip->cmt_cpus_actv) > 1) {
 267                         mutex_exit(&cpu_lock);
 268                         return (-1);
 269                 }
 270         }
 271 
 272         mutex_exit(&cpu_lock);
 273         return (0);
 274 }