1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2021 Joyent, Inc.
25 */
26
27 #include <sys/param.h>
28 #include <sys/thread.h>
29 #include <sys/cpuvar.h>
30 #include <sys/inttypes.h>
31 #include <sys/cmn_err.h>
32 #include <sys/time.h>
33 #include <sys/ksynch.h>
34 #include <sys/systm.h>
35 #include <sys/kcpc.h>
36 #include <sys/cpc_impl.h>
37 #include <sys/cpc_pcbe.h>
38 #include <sys/atomic.h>
39 #include <sys/sunddi.h>
40 #include <sys/modctl.h>
41 #include <sys/sdt.h>
42 #include <sys/archsystm.h>
43 #include <sys/promif.h>
44 #include <sys/x_call.h>
45 #include <sys/cap_util.h>
46 #if defined(__x86)
47 #include <asm/clock.h>
48 #include <sys/xc_levels.h>
49 #endif
50
51 static kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */
52 static kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */
53
54
55 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */
56 int kcpc_cpuctx; /* number of cpu-specific contexts */
57
58 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */
59
60 /*
61 * These are set when a PCBE module is loaded.
62 */
63 uint_t cpc_ncounters = 0;
64 pcbe_ops_t *pcbe_ops = NULL;
65
66 /*
67 * Statistics on (mis)behavior
68 */
69 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */
70 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */
71
72 /*
73 * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread
74 * with no valid context will result in a panic.
75 */
76 static int kcpc_nullctx_panic = 0;
77
78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct);
79 static void kcpc_restore(kcpc_ctx_t *ctx);
80 static void kcpc_save(kcpc_ctx_t *ctx);
81 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx);
82 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch);
83 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set);
84 static kcpc_set_t *kcpc_set_create(kcpc_request_t *reqs, int nreqs,
85 int set_flags, int kmem_flags);
86
87 /*
88 * Macros to manipulate context flags. All flag updates should use one of these
89 * two macros
90 *
91 * Flags should be always be updated atomically since some of the updates are
92 * not protected by locks.
93 */
94 #define KCPC_CTX_FLAG_SET(ctx, flag) atomic_or_uint(&(ctx)->kc_flags, (flag))
95 #define KCPC_CTX_FLAG_CLR(ctx, flag) atomic_and_uint(&(ctx)->kc_flags, ~(flag))
96
97 /*
98 * The IS_HIPIL() macro verifies that the code is executed either from a
99 * cross-call or from high-PIL interrupt
100 */
101 #ifdef DEBUG
102 #define IS_HIPIL() (getpil() >= XCALL_PIL)
103 #else
104 #define IS_HIPIL()
105 #endif /* DEBUG */
106
107
108 extern int kcpc_hw_load_pcbe(void);
109
110 /*
111 * Return value from kcpc_hw_load_pcbe()
112 */
113 static int kcpc_pcbe_error = 0;
114
115 /*
116 * Perform one-time initialization of kcpc framework.
117 * This function performs the initialization only the first time it is called.
118 * It is safe to call it multiple times.
119 */
120 int
121 kcpc_init(void)
122 {
123 long hash;
124 static uint32_t kcpc_initialized = 0;
125
126 /*
127 * We already tried loading platform pcbe module and failed
128 */
129 if (kcpc_pcbe_error != 0)
130 return (-1);
131
132 /*
133 * The kcpc framework should be initialized at most once
134 */
135 if (atomic_cas_32(&kcpc_initialized, 0, 1) != 0)
136 return (0);
137
138 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL);
139 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
140 mutex_init(&kcpc_ctx_llock[hash],
141 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15);
142
143 /*
144 * Load platform-specific pcbe module
145 */
146 kcpc_pcbe_error = kcpc_hw_load_pcbe();
147
148 return (kcpc_pcbe_error == 0 ? 0 : -1);
149 }
150
151 void
152 kcpc_register_pcbe(pcbe_ops_t *ops)
153 {
154 pcbe_ops = ops;
155 cpc_ncounters = pcbe_ops->pcbe_ncounters();
156 }
157
158 void
159 kcpc_register_dcpc(void (*func)(uint64_t))
160 {
161 dtrace_cpc_fire = func;
162 }
163
164 void
165 kcpc_unregister_dcpc(void)
166 {
167 dtrace_cpc_fire = NULL;
168 }
169
170 int
171 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode)
172 {
173 cpu_t *cp;
174 kcpc_ctx_t *ctx;
175 int error;
176 int save_spl;
177
178 ctx = kcpc_ctx_alloc(KM_SLEEP);
179
180 if (kcpc_assign_reqs(set, ctx) != 0) {
181 kcpc_ctx_free(ctx);
182 *subcode = CPC_RESOURCE_UNAVAIL;
183 return (EINVAL);
184 }
185
186 ctx->kc_cpuid = cpuid;
187 ctx->kc_thread = curthread;
188
189 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
190
191 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
192 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
193 kcpc_ctx_free(ctx);
194 return (error);
195 }
196
197 set->ks_ctx = ctx;
198 ctx->kc_set = set;
199
200 /*
201 * We must hold cpu_lock to prevent DR, offlining, or unbinding while
202 * we are manipulating the cpu_t and programming the hardware, else the
203 * the cpu_t could go away while we're looking at it.
204 */
205 mutex_enter(&cpu_lock);
206 cp = cpu_get(cpuid);
207
208 if (cp == NULL)
209 /*
210 * The CPU could have been DRd out while we were getting set up.
211 */
212 goto unbound;
213
214 mutex_enter(&cp->cpu_cpc_ctxlock);
215 kpreempt_disable();
216 save_spl = spl_xcall();
217
218 /*
219 * Check to see whether counters for CPU already being used by someone
220 * other than kernel for capacity and utilization (since kernel will
221 * let go of counters for user in kcpc_program() below)
222 */
223 if (cp->cpu_cpc_ctx != NULL && !CU_CPC_ON(cp)) {
224 /*
225 * If this CPU already has a bound set, return an error.
226 */
227 splx(save_spl);
228 kpreempt_enable();
229 mutex_exit(&cp->cpu_cpc_ctxlock);
230 goto unbound;
231 }
232
233 if (curthread->t_bind_cpu != cpuid) {
234 splx(save_spl);
235 kpreempt_enable();
236 mutex_exit(&cp->cpu_cpc_ctxlock);
237 goto unbound;
238 }
239
240 kcpc_program(ctx, B_FALSE, B_TRUE);
241
242 splx(save_spl);
243 kpreempt_enable();
244
245 mutex_exit(&cp->cpu_cpc_ctxlock);
246 mutex_exit(&cpu_lock);
247
248 mutex_enter(&set->ks_lock);
249 set->ks_state |= KCPC_SET_BOUND;
250 cv_signal(&set->ks_condv);
251 mutex_exit(&set->ks_lock);
252
253 return (0);
254
255 unbound:
256 mutex_exit(&cpu_lock);
257 set->ks_ctx = NULL;
258 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
259 kcpc_ctx_free(ctx);
260 return (EAGAIN);
261 }
262
263 int
264 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
265 {
266 kcpc_ctx_t *ctx;
267 int error;
268
269 /*
270 * Only one set is allowed per context, so ensure there is no
271 * existing context.
272 */
273
274 if (t->t_cpc_ctx != NULL)
275 return (EEXIST);
276
277 ctx = kcpc_ctx_alloc(KM_SLEEP);
278
279 /*
280 * The context must begin life frozen until it has been properly
281 * programmed onto the hardware. This prevents the context ops from
282 * worrying about it until we're ready.
283 */
284 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
285 ctx->kc_hrtime = gethrtime();
286
287 if (kcpc_assign_reqs(set, ctx) != 0) {
288 kcpc_ctx_free(ctx);
289 *subcode = CPC_RESOURCE_UNAVAIL;
290 return (EINVAL);
291 }
292
293 ctx->kc_cpuid = -1;
294 if (set->ks_flags & CPC_BIND_LWP_INHERIT)
295 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_LWPINHERIT);
296 ctx->kc_thread = t;
297 t->t_cpc_ctx = ctx;
298 /*
299 * Permit threads to look at their own hardware counters from userland.
300 */
301 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_NONPRIV);
302
303 /*
304 * Create the data store for this set.
305 */
306 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
307
308 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
309 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
310 kcpc_ctx_free(ctx);
311 t->t_cpc_ctx = NULL;
312 return (error);
313 }
314
315 set->ks_ctx = ctx;
316 ctx->kc_set = set;
317
318 /*
319 * Add a device context to the subject thread.
320 */
321 installctx(t, ctx, kcpc_save, kcpc_restore, NULL,
322 kcpc_lwp_create, NULL, kcpc_free, NULL);
323
324 /*
325 * Ask the backend to program the hardware.
326 */
327 if (t == curthread) {
328 int save_spl;
329
330 kpreempt_disable();
331 save_spl = spl_xcall();
332 kcpc_program(ctx, B_TRUE, B_TRUE);
333 splx(save_spl);
334 kpreempt_enable();
335 } else {
336 /*
337 * Since we are the agent LWP, we know the victim LWP is stopped
338 * until we're done here; no need to worry about preemption or
339 * migration here. We still use an atomic op to clear the flag
340 * to ensure the flags are always self-consistent; they can
341 * still be accessed from, for instance, another CPU doing a
342 * kcpc_invalidate_all().
343 */
344 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
345 }
346
347 mutex_enter(&set->ks_lock);
348 set->ks_state |= KCPC_SET_BOUND;
349 cv_signal(&set->ks_condv);
350 mutex_exit(&set->ks_lock);
351
352 return (0);
353 }
354
355 /*
356 * Walk through each request in the set and ask the PCBE to configure a
357 * corresponding counter.
358 */
359 int
360 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode)
361 {
362 int i;
363 int ret;
364 kcpc_request_t *rp;
365
366 for (i = 0; i < set->ks_nreqs; i++) {
367 int n;
368 rp = &set->ks_req[i];
369
370 n = rp->kr_picnum;
371
372 ASSERT(n >= 0 && n < cpc_ncounters);
373
374 ASSERT(ctx->kc_pics[n].kp_req == NULL);
375
376 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) {
377 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT)
378 == 0) {
379 *subcode = -1;
380 return (ENOTSUP);
381 }
382 /*
383 * If any of the counters have requested overflow
384 * notification, we flag the context as being one that
385 * cares about overflow.
386 */
387 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_SIGOVF);
388 }
389
390 rp->kr_config = NULL;
391 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event,
392 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr,
393 &(rp->kr_config), (void *)ctx)) != 0) {
394 kcpc_free_configs(set);
395 *subcode = ret;
396 switch (ret) {
397 case CPC_ATTR_REQUIRES_PRIVILEGE:
398 case CPC_HV_NO_ACCESS:
399 return (EACCES);
400 default:
401 return (EINVAL);
402 }
403 }
404
405 ctx->kc_pics[n].kp_req = rp;
406 rp->kr_picp = &ctx->kc_pics[n];
407 rp->kr_data = set->ks_data + rp->kr_index;
408 *rp->kr_data = rp->kr_preset;
409 }
410
411 return (0);
412 }
413
414 void
415 kcpc_free_configs(kcpc_set_t *set)
416 {
417 int i;
418
419 for (i = 0; i < set->ks_nreqs; i++)
420 if (set->ks_req[i].kr_config != NULL)
421 pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
422 }
423
424 /*
425 * buf points to a user address and the data should be copied out to that
426 * address in the current process.
427 */
428 int
429 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick)
430 {
431 kcpc_ctx_t *ctx = set->ks_ctx;
432 int save_spl;
433
434 mutex_enter(&set->ks_lock);
435 if ((set->ks_state & KCPC_SET_BOUND) == 0) {
436 mutex_exit(&set->ks_lock);
437 return (EINVAL);
438 }
439 mutex_exit(&set->ks_lock);
440
441 /*
442 * Kernel preemption must be disabled while reading the hardware regs,
443 * and if this is a CPU-bound context, while checking the CPU binding of
444 * the current thread.
445 */
446 kpreempt_disable();
447 save_spl = spl_xcall();
448
449 if (ctx->kc_flags & KCPC_CTX_INVALID) {
450 splx(save_spl);
451 kpreempt_enable();
452 return (EAGAIN);
453 }
454
455 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) {
456 if (ctx->kc_cpuid != -1) {
457 if (curthread->t_bind_cpu != ctx->kc_cpuid) {
458 splx(save_spl);
459 kpreempt_enable();
460 return (EAGAIN);
461 }
462 }
463
464 if (ctx->kc_thread == curthread) {
465 uint64_t curtick = KCPC_GET_TICK();
466
467 ctx->kc_hrtime = gethrtime_waitfree();
468 pcbe_ops->pcbe_sample(ctx);
469 ctx->kc_vtick += curtick - ctx->kc_rawtick;
470 ctx->kc_rawtick = curtick;
471 }
472
473 /*
474 * The config may have been invalidated by
475 * the pcbe_sample op.
476 */
477 if (ctx->kc_flags & KCPC_CTX_INVALID) {
478 splx(save_spl);
479 kpreempt_enable();
480 return (EAGAIN);
481 }
482
483 }
484
485 splx(save_spl);
486 kpreempt_enable();
487
488 if (copyout(set->ks_data, buf,
489 set->ks_nreqs * sizeof (uint64_t)) == -1)
490 return (EFAULT);
491 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1)
492 return (EFAULT);
493 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1)
494 return (EFAULT);
495
496 return (0);
497 }
498
499 /*
500 * Stop the counters on the CPU this context is bound to.
501 */
502 static void
503 kcpc_stop_hw(kcpc_ctx_t *ctx)
504 {
505 cpu_t *cp;
506
507 kpreempt_disable();
508
509 if (ctx->kc_cpuid == CPU->cpu_id) {
510 cp = CPU;
511 } else {
512 cp = cpu_get(ctx->kc_cpuid);
513 }
514
515 ASSERT(cp != NULL && cp->cpu_cpc_ctx == ctx);
516 kcpc_cpu_stop(cp, B_FALSE);
517
518 kpreempt_enable();
519 }
520
521 int
522 kcpc_unbind(kcpc_set_t *set)
523 {
524 kcpc_ctx_t *ctx;
525 kthread_t *t;
526
527 /*
528 * We could be racing with the process's agent thread as it
529 * binds the set; we must wait for the set to finish binding
530 * before attempting to tear it down.
531 */
532 mutex_enter(&set->ks_lock);
533 while ((set->ks_state & KCPC_SET_BOUND) == 0)
534 cv_wait(&set->ks_condv, &set->ks_lock);
535 mutex_exit(&set->ks_lock);
536
537 ctx = set->ks_ctx;
538
539 /*
540 * Use kc_lock to synchronize with kcpc_restore().
541 */
542 mutex_enter(&ctx->kc_lock);
543 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
544 mutex_exit(&ctx->kc_lock);
545
546 if (ctx->kc_cpuid == -1) {
547 t = ctx->kc_thread;
548 /*
549 * The context is thread-bound and therefore has a device
550 * context. It will be freed via removectx() calling
551 * freectx() calling kcpc_free().
552 */
553 if (t == curthread) {
554 int save_spl;
555
556 kpreempt_disable();
557 save_spl = spl_xcall();
558 if (!(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED))
559 kcpc_unprogram(ctx, B_TRUE);
560 splx(save_spl);
561 kpreempt_enable();
562 }
563 #ifdef DEBUG
564 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL,
565 kcpc_lwp_create, NULL, kcpc_free) == 0)
566 panic("kcpc_unbind: context %p not preset on thread %p",
567 (void *)ctx, (void *)t);
568 #else
569 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL,
570 kcpc_lwp_create, NULL, kcpc_free);
571 #endif /* DEBUG */
572 t->t_cpc_set = NULL;
573 t->t_cpc_ctx = NULL;
574 } else {
575 /*
576 * If we are unbinding a CPU-bound set from a remote CPU, the
577 * native CPU's idle thread could be in the midst of programming
578 * this context onto the CPU. We grab the context's lock here to
579 * ensure that the idle thread is done with it. When we release
580 * the lock, the CPU no longer has a context and the idle thread
581 * will move on.
582 *
583 * cpu_lock must be held to prevent the CPU from being DR'd out
584 * while we disassociate the context from the cpu_t.
585 */
586 cpu_t *cp;
587 mutex_enter(&cpu_lock);
588 cp = cpu_get(ctx->kc_cpuid);
589 if (cp != NULL) {
590 /*
591 * The CPU may have been DR'd out of the system.
592 */
593 mutex_enter(&cp->cpu_cpc_ctxlock);
594 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0)
595 kcpc_stop_hw(ctx);
596 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED);
597 mutex_exit(&cp->cpu_cpc_ctxlock);
598 }
599 mutex_exit(&cpu_lock);
600 if (ctx->kc_thread == curthread) {
601 kcpc_free(ctx, 0);
602 curthread->t_cpc_set = NULL;
603 }
604 }
605
606 return (0);
607 }
608
609 int
610 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset)
611 {
612 int i;
613
614 ASSERT(set != NULL);
615 ASSERT(set->ks_state & KCPC_SET_BOUND);
616 ASSERT(set->ks_ctx->kc_thread == curthread);
617 ASSERT(set->ks_ctx->kc_cpuid == -1);
618
619 if (index < 0 || index >= set->ks_nreqs)
620 return (EINVAL);
621
622 for (i = 0; i < set->ks_nreqs; i++)
623 if (set->ks_req[i].kr_index == index)
624 break;
625 ASSERT(i != set->ks_nreqs);
626
627 set->ks_req[i].kr_preset = preset;
628 return (0);
629 }
630
631 int
632 kcpc_restart(kcpc_set_t *set)
633 {
634 kcpc_ctx_t *ctx = set->ks_ctx;
635 int i;
636 int save_spl;
637
638 ASSERT(set->ks_state & KCPC_SET_BOUND);
639 ASSERT(ctx->kc_thread == curthread);
640 ASSERT(ctx->kc_cpuid == -1);
641
642 for (i = 0; i < set->ks_nreqs; i++) {
643 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
644 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset,
645 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
646 }
647
648 kpreempt_disable();
649 save_spl = spl_xcall();
650
651 /*
652 * If the user is doing this on a running set, make sure the counters
653 * are stopped first.
654 */
655 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
656 pcbe_ops->pcbe_allstop();
657
658 /*
659 * Ask the backend to program the hardware.
660 */
661 ctx->kc_rawtick = KCPC_GET_TICK();
662 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
663 pcbe_ops->pcbe_program(ctx);
664 splx(save_spl);
665 kpreempt_enable();
666
667 return (0);
668 }
669
670 /*
671 * Caller must hold kcpc_cpuctx_lock.
672 */
673 int
674 kcpc_enable(kthread_t *t, int cmd, int enable)
675 {
676 kcpc_ctx_t *ctx = t->t_cpc_ctx;
677 kcpc_set_t *set = t->t_cpc_set;
678 kcpc_set_t *newset;
679 int i;
680 int flag;
681 int err;
682
683 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock));
684
685 if (ctx == NULL) {
686 /*
687 * This thread has a set but no context; it must be a
688 * CPU-bound set.
689 */
690 ASSERT(t->t_cpc_set != NULL);
691 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1);
692 return (EINVAL);
693 } else if (ctx->kc_flags & KCPC_CTX_INVALID)
694 return (EAGAIN);
695
696 if (cmd == CPC_ENABLE) {
697 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
698 return (EINVAL);
699 kpreempt_disable();
700 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
701 kcpc_restore(ctx);
702 kpreempt_enable();
703 } else if (cmd == CPC_DISABLE) {
704 if (ctx->kc_flags & KCPC_CTX_FREEZE)
705 return (EINVAL);
706 kpreempt_disable();
707 kcpc_save(ctx);
708 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
709 kpreempt_enable();
710 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) {
711 /*
712 * Strategy for usr/sys: stop counters and update set's presets
713 * with current counter values, unbind, update requests with
714 * new config, then re-bind.
715 */
716 flag = (cmd == CPC_USR_EVENTS) ?
717 CPC_COUNT_USER: CPC_COUNT_SYSTEM;
718
719 kpreempt_disable();
720 KCPC_CTX_FLAG_SET(ctx,
721 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
722 pcbe_ops->pcbe_allstop();
723 kpreempt_enable();
724
725 for (i = 0; i < set->ks_nreqs; i++) {
726 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data);
727 if (enable)
728 set->ks_req[i].kr_flags |= flag;
729 else
730 set->ks_req[i].kr_flags &= ~flag;
731 }
732 newset = kcpc_dup_set(set);
733 if (kcpc_unbind(set) != 0)
734 return (EINVAL);
735 t->t_cpc_set = newset;
736 if (kcpc_bind_thread(newset, t, &err) != 0) {
737 t->t_cpc_set = NULL;
738 kcpc_free_set(newset);
739 return (EINVAL);
740 }
741 } else
742 return (EINVAL);
743
744 return (0);
745 }
746
747 /*
748 * Provide PCBEs with a way of obtaining the configs of every counter which will
749 * be programmed together.
750 *
751 * If current is NULL, provide the first config.
752 *
753 * If data != NULL, caller wants to know where the data store associated with
754 * the config we return is located.
755 */
756 void *
757 kcpc_next_config(void *token, void *current, uint64_t **data)
758 {
759 int i;
760 kcpc_pic_t *pic;
761 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token;
762
763 if (current == NULL) {
764 /*
765 * Client would like the first config, which may not be in
766 * counter 0; we need to search through the counters for the
767 * first config.
768 */
769 for (i = 0; i < cpc_ncounters; i++)
770 if (ctx->kc_pics[i].kp_req != NULL)
771 break;
772 /*
773 * There are no counters configured for the given context.
774 */
775 if (i == cpc_ncounters)
776 return (NULL);
777 } else {
778 /*
779 * There surely is a faster way to do this.
780 */
781 for (i = 0; i < cpc_ncounters; i++) {
782 pic = &ctx->kc_pics[i];
783
784 if (pic->kp_req != NULL &&
785 current == pic->kp_req->kr_config)
786 break;
787 }
788
789 /*
790 * We found the current config at picnum i. Now search for the
791 * next configured PIC.
792 */
793 for (i++; i < cpc_ncounters; i++) {
794 pic = &ctx->kc_pics[i];
795 if (pic->kp_req != NULL)
796 break;
797 }
798
799 if (i == cpc_ncounters)
800 return (NULL);
801 }
802
803 if (data != NULL) {
804 *data = ctx->kc_pics[i].kp_req->kr_data;
805 }
806
807 return (ctx->kc_pics[i].kp_req->kr_config);
808 }
809
810
811 kcpc_ctx_t *
812 kcpc_ctx_alloc(int kmem_flags)
813 {
814 kcpc_ctx_t *ctx;
815 long hash;
816
817 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), kmem_flags);
818 if (ctx == NULL)
819 return (NULL);
820
821 hash = CPC_HASH_CTX(ctx);
822 mutex_enter(&kcpc_ctx_llock[hash]);
823 ctx->kc_next = kcpc_ctx_list[hash];
824 kcpc_ctx_list[hash] = ctx;
825 mutex_exit(&kcpc_ctx_llock[hash]);
826
827 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) *
828 cpc_ncounters, KM_SLEEP);
829
830 ctx->kc_cpuid = -1;
831
832 return (ctx);
833 }
834
835 /*
836 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT
837 * in the flags.
838 */
839 static void
840 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx)
841 {
842 kcpc_set_t *ks = ctx->kc_set, *cks;
843 int i, j;
844 int code;
845
846 ASSERT(ks != NULL);
847
848 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0)
849 return;
850
851 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP);
852 cks->ks_state &= ~KCPC_SET_BOUND;
853 cctx->kc_set = cks;
854 cks->ks_flags = ks->ks_flags;
855 cks->ks_nreqs = ks->ks_nreqs;
856 cks->ks_req = kmem_alloc(cks->ks_nreqs *
857 sizeof (kcpc_request_t), KM_SLEEP);
858 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t),
859 KM_SLEEP);
860 cks->ks_ctx = cctx;
861
862 for (i = 0; i < cks->ks_nreqs; i++) {
863 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index;
864 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum;
865 (void) strncpy(cks->ks_req[i].kr_event,
866 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN);
867 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset;
868 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags;
869 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs;
870 if (ks->ks_req[i].kr_nattrs > 0) {
871 cks->ks_req[i].kr_attr =
872 kmem_alloc(ks->ks_req[i].kr_nattrs *
873 sizeof (kcpc_attr_t), KM_SLEEP);
874 }
875 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) {
876 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name,
877 ks->ks_req[i].kr_attr[j].ka_name,
878 CPC_MAX_ATTR_LEN);
879 cks->ks_req[i].kr_attr[j].ka_val =
880 ks->ks_req[i].kr_attr[j].ka_val;
881 }
882 }
883 if (kcpc_configure_reqs(cctx, cks, &code) != 0)
884 kcpc_invalidate_config(cctx);
885
886 mutex_enter(&cks->ks_lock);
887 cks->ks_state |= KCPC_SET_BOUND;
888 cv_signal(&cks->ks_condv);
889 mutex_exit(&cks->ks_lock);
890 }
891
892
893 void
894 kcpc_ctx_free(kcpc_ctx_t *ctx)
895 {
896 kcpc_ctx_t **loc;
897 long hash = CPC_HASH_CTX(ctx);
898
899 mutex_enter(&kcpc_ctx_llock[hash]);
900 loc = &kcpc_ctx_list[hash];
901 ASSERT(*loc != NULL);
902 while (*loc != ctx)
903 loc = &(*loc)->kc_next;
904 *loc = ctx->kc_next;
905 mutex_exit(&kcpc_ctx_llock[hash]);
906
907 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t));
908 cv_destroy(&ctx->kc_condv);
909 mutex_destroy(&ctx->kc_lock);
910 kmem_free(ctx, sizeof (*ctx));
911 }
912
913 /*
914 * Generic interrupt handler used on hardware that generates
915 * overflow interrupts.
916 *
917 * Note: executed at high-level interrupt context!
918 */
919 /*ARGSUSED*/
920 kcpc_ctx_t *
921 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap)
922 {
923 kcpc_ctx_t *ctx;
924 kthread_t *t = curthread;
925 int i;
926
927 /*
928 * On both x86 and UltraSPARC, we may deliver the high-level
929 * interrupt in kernel mode, just after we've started to run an
930 * interrupt thread. (That's because the hardware helpfully
931 * delivers the overflow interrupt some random number of cycles
932 * after the instruction that caused the overflow by which time
933 * we're in some part of the kernel, not necessarily running on
934 * the right thread).
935 *
936 * Check for this case here -- find the pinned thread
937 * that was running when the interrupt went off.
938 */
939 if (t->t_flag & T_INTR_THREAD) {
940 klwp_t *lwp;
941
942 atomic_inc_32(&kcpc_intrctx_count);
943
944 /*
945 * Note that t_lwp is always set to point at the underlying
946 * thread, thus this will work in the presence of nested
947 * interrupts.
948 */
949 ctx = NULL;
950 if ((lwp = t->t_lwp) != NULL) {
951 t = lwptot(lwp);
952 ctx = t->t_cpc_ctx;
953 }
954 } else
955 ctx = t->t_cpc_ctx;
956
957 if (ctx == NULL) {
958 /*
959 * This can easily happen if we're using the counters in
960 * "shared" mode, for example, and an overflow interrupt
961 * occurs while we are running cpustat. In that case, the
962 * bound thread that has the context that belongs to this
963 * CPU is almost certainly sleeping (if it was running on
964 * the CPU we'd have found it above), and the actual
965 * interrupted thread has no knowledge of performance counters!
966 */
967 ctx = curthread->t_cpu->cpu_cpc_ctx;
968 if (ctx != NULL) {
969 /*
970 * Return the bound context for this CPU to
971 * the interrupt handler so that it can synchronously
972 * sample the hardware counters and restart them.
973 */
974 return (ctx);
975 }
976
977 /*
978 * As long as the overflow interrupt really is delivered early
979 * enough after trapping into the kernel to avoid switching
980 * threads, we must always be able to find the cpc context,
981 * or something went terribly wrong i.e. we ended up
982 * running a passivated interrupt thread, a kernel
983 * thread or we interrupted idle, all of which are Very Bad.
984 *
985 * We also could end up here owing to an incredibly unlikely
986 * race condition that exists on x86 based architectures when
987 * the cpc provider is in use; overflow interrupts are directed
988 * to the cpc provider if the 'dtrace_cpc_in_use' variable is
989 * set when we enter the handler. This variable is unset after
990 * overflow interrupts have been disabled on all CPUs and all
991 * contexts have been torn down. To stop interrupts, the cpc
992 * provider issues a xcall to the remote CPU before it tears
993 * down that CPUs context. As high priority xcalls, on an x86
994 * architecture, execute at a higher PIL than this handler, it
995 * is possible (though extremely unlikely) that the xcall could
996 * interrupt the overflow handler before the handler has
997 * checked the 'dtrace_cpc_in_use' variable, stop the counters,
998 * return to the cpc provider which could then rip down
999 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs
1000 * overflow handler has had a chance to check the variable. In
1001 * that case, the handler would direct the overflow into this
1002 * code and no valid context will be found. The default behavior
1003 * when no valid context is found is now to shout a warning to
1004 * the console and bump the 'kcpc_nullctx_count' variable.
1005 */
1006 if (kcpc_nullctx_panic)
1007 panic("null cpc context, thread %p", (void *)t);
1008 #ifdef DEBUG
1009 cmn_err(CE_NOTE,
1010 "null cpc context found in overflow handler!\n");
1011 #endif
1012 atomic_inc_32(&kcpc_nullctx_count);
1013 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) {
1014 /*
1015 * Schedule an ast to sample the counters, which will
1016 * propagate any overflow into the virtualized performance
1017 * counter(s), and may deliver a signal.
1018 */
1019 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
1020 /*
1021 * If a counter has overflowed which was counting on behalf of
1022 * a request which specified CPC_OVF_NOTIFY_EMT, send the
1023 * process a signal.
1024 */
1025 for (i = 0; i < cpc_ncounters; i++) {
1026 if (ctx->kc_pics[i].kp_req != NULL &&
1027 bitmap & (1 << i) &&
1028 ctx->kc_pics[i].kp_req->kr_flags &
1029 CPC_OVF_NOTIFY_EMT) {
1030 /*
1031 * A signal has been requested for this PIC, so
1032 * so freeze the context. The interrupt handler
1033 * has already stopped the counter hardware.
1034 */
1035 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
1036 atomic_or_uint(&ctx->kc_pics[i].kp_flags,
1037 KCPC_PIC_OVERFLOWED);
1038 }
1039 }
1040 aston(t);
1041 } else if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) {
1042 /*
1043 * Thread context is no longer valid, but here may be a valid
1044 * CPU context.
1045 */
1046 return (curthread->t_cpu->cpu_cpc_ctx);
1047 }
1048
1049 return (NULL);
1050 }
1051
1052 /*
1053 * The current thread context had an overflow interrupt; we're
1054 * executing here in high-level interrupt context.
1055 */
1056 /*ARGSUSED*/
1057 uint_t
1058 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
1059 {
1060 kcpc_ctx_t *ctx;
1061 uint64_t bitmap;
1062 uint8_t *state;
1063 int save_spl;
1064
1065 if (pcbe_ops == NULL ||
1066 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0)
1067 return (DDI_INTR_UNCLAIMED);
1068
1069 /*
1070 * Prevent any further interrupts.
1071 */
1072 pcbe_ops->pcbe_allstop();
1073
1074 if (dtrace_cpc_in_use) {
1075 state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state;
1076
1077 /*
1078 * Set the per-CPU state bit to indicate that we are currently
1079 * processing an interrupt if it is currently free. Drop the
1080 * interrupt if the state isn't free (i.e. a configuration
1081 * event is taking place).
1082 */
1083 if (atomic_cas_8(state, DCPC_INTR_FREE,
1084 DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) {
1085 int i;
1086 kcpc_request_t req;
1087
1088 ASSERT(dtrace_cpc_fire != NULL);
1089
1090 (*dtrace_cpc_fire)(bitmap);
1091
1092 ctx = curthread->t_cpu->cpu_cpc_ctx;
1093 if (ctx == NULL) {
1094 #ifdef DEBUG
1095 cmn_err(CE_NOTE, "null cpc context in"
1096 "hardware overflow handler!\n");
1097 #endif
1098 return (DDI_INTR_CLAIMED);
1099 }
1100
1101 /* Reset any counters that have overflowed */
1102 for (i = 0; i < ctx->kc_set->ks_nreqs; i++) {
1103 req = ctx->kc_set->ks_req[i];
1104
1105 if (bitmap & (1 << req.kr_picnum)) {
1106 pcbe_ops->pcbe_configure(req.kr_picnum,
1107 req.kr_event, req.kr_preset,
1108 req.kr_flags, req.kr_nattrs,
1109 req.kr_attr, &(req.kr_config),
1110 (void *)ctx);
1111 }
1112 }
1113 pcbe_ops->pcbe_program(ctx);
1114
1115 /*
1116 * We've finished processing the interrupt so set
1117 * the state back to free.
1118 */
1119 cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state =
1120 DCPC_INTR_FREE;
1121 membar_producer();
1122 }
1123 return (DDI_INTR_CLAIMED);
1124 }
1125
1126 /*
1127 * DTrace isn't involved so pass on accordingly.
1128 *
1129 * If the interrupt has occurred in the context of an lwp owning
1130 * the counters, then the handler posts an AST to the lwp to
1131 * trigger the actual sampling, and optionally deliver a signal or
1132 * restart the counters, on the way out of the kernel using
1133 * kcpc_hw_overflow_ast() (see below).
1134 *
1135 * On the other hand, if the handler returns the context to us
1136 * directly, then it means that there are no other threads in
1137 * the middle of updating it, no AST has been posted, and so we
1138 * should sample the counters here, and restart them with no
1139 * further fuss.
1140 *
1141 * The CPU's CPC context may disappear as a result of cross-call which
1142 * has higher PIL on x86, so protect the context by raising PIL to the
1143 * cross-call level.
1144 */
1145 save_spl = spl_xcall();
1146 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) {
1147 uint64_t curtick = KCPC_GET_TICK();
1148
1149 ctx->kc_hrtime = gethrtime_waitfree();
1150 ctx->kc_vtick += curtick - ctx->kc_rawtick;
1151 ctx->kc_rawtick = curtick;
1152 pcbe_ops->pcbe_sample(ctx);
1153 pcbe_ops->pcbe_program(ctx);
1154 }
1155 splx(save_spl);
1156
1157 return (DDI_INTR_CLAIMED);
1158 }
1159
1160 /*
1161 * Called from trap() when processing the ast posted by the high-level
1162 * interrupt handler.
1163 */
1164 int
1165 kcpc_overflow_ast()
1166 {
1167 kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
1168 int i;
1169 int found = 0;
1170 uint64_t curtick = KCPC_GET_TICK();
1171
1172 ASSERT(ctx != NULL); /* Beware of interrupt skid. */
1173
1174 /*
1175 * An overflow happened: sample the context to ensure that
1176 * the overflow is propagated into the upper bits of the
1177 * virtualized 64-bit counter(s).
1178 */
1179 kpreempt_disable();
1180 ctx->kc_hrtime = gethrtime_waitfree();
1181 pcbe_ops->pcbe_sample(ctx);
1182 kpreempt_enable();
1183
1184 ctx->kc_vtick += curtick - ctx->kc_rawtick;
1185
1186 /*
1187 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED
1188 * if that pic generated an overflow and if the request it was counting
1189 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all
1190 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we
1191 * found any overflowed pics, keep the context frozen and return true
1192 * (thus causing a signal to be sent).
1193 */
1194 for (i = 0; i < cpc_ncounters; i++) {
1195 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) {
1196 atomic_and_uint(&ctx->kc_pics[i].kp_flags,
1197 ~KCPC_PIC_OVERFLOWED);
1198 found = 1;
1199 }
1200 }
1201 if (found)
1202 return (1);
1203
1204 /*
1205 * Otherwise, re-enable the counters and continue life as before.
1206 */
1207 kpreempt_disable();
1208 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
1209 pcbe_ops->pcbe_program(ctx);
1210 kpreempt_enable();
1211 return (0);
1212 }
1213
1214 /*
1215 * Called when switching away from current thread.
1216 */
1217 static void
1218 kcpc_save(kcpc_ctx_t *ctx)
1219 {
1220 int err;
1221 int save_spl;
1222
1223 kpreempt_disable();
1224 save_spl = spl_xcall();
1225
1226 if (ctx->kc_flags & KCPC_CTX_INVALID) {
1227 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) {
1228 splx(save_spl);
1229 kpreempt_enable();
1230 return;
1231 }
1232 /*
1233 * This context has been invalidated but the counters have not
1234 * been stopped. Stop them here and mark the context stopped.
1235 */
1236 kcpc_unprogram(ctx, B_TRUE);
1237 splx(save_spl);
1238 kpreempt_enable();
1239 return;
1240 }
1241
1242 pcbe_ops->pcbe_allstop();
1243 if (ctx->kc_flags & KCPC_CTX_FREEZE) {
1244 splx(save_spl);
1245 kpreempt_enable();
1246 return;
1247 }
1248
1249 /*
1250 * Need to sample for all reqs into each req's current mpic.
1251 */
1252 ctx->kc_hrtime = gethrtime_waitfree();
1253 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick;
1254 pcbe_ops->pcbe_sample(ctx);
1255
1256 /*
1257 * Program counter for measuring capacity and utilization since user
1258 * thread isn't using counter anymore
1259 */
1260 ASSERT(ctx->kc_cpuid == -1);
1261 cu_cpc_program(CPU, &err);
1262 splx(save_spl);
1263 kpreempt_enable();
1264 }
1265
1266 static void
1267 kcpc_restore(kcpc_ctx_t *ctx)
1268 {
1269 int save_spl;
1270
1271 mutex_enter(&ctx->kc_lock);
1272
1273 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) ==
1274 KCPC_CTX_INVALID) {
1275 /*
1276 * The context is invalidated but has not been marked stopped.
1277 * We mark it as such here because we will not start the
1278 * counters during this context switch.
1279 */
1280 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED);
1281 }
1282
1283 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) {
1284 mutex_exit(&ctx->kc_lock);
1285 return;
1286 }
1287
1288 /*
1289 * Set kc_flags to show that a kcpc_restore() is in progress to avoid
1290 * ctx & set related memory objects being freed without us knowing.
1291 * This can happen if an agent thread is executing a kcpc_unbind(),
1292 * with this thread as the target, whilst we're concurrently doing a
1293 * restorectx() during, for example, a proc_exit(). Effectively, by
1294 * doing this, we're asking kcpc_free() to cv_wait() until
1295 * kcpc_restore() has completed.
1296 */
1297 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_RESTORE);
1298 mutex_exit(&ctx->kc_lock);
1299
1300 /*
1301 * While programming the hardware, the counters should be stopped. We
1302 * don't do an explicit pcbe_allstop() here because they should have
1303 * been stopped already by the last consumer.
1304 */
1305 kpreempt_disable();
1306 save_spl = spl_xcall();
1307 kcpc_program(ctx, B_TRUE, B_TRUE);
1308 splx(save_spl);
1309 kpreempt_enable();
1310
1311 /*
1312 * Wake the agent thread if it's waiting in kcpc_free().
1313 */
1314 mutex_enter(&ctx->kc_lock);
1315 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_RESTORE);
1316 cv_signal(&ctx->kc_condv);
1317 mutex_exit(&ctx->kc_lock);
1318 }
1319
1320 /*
1321 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the
1322 * following context operators to the idle thread on each CPU. They stop the
1323 * counters when the idle thread is switched on, and they start them again when
1324 * it is switched off.
1325 */
1326 /*ARGSUSED*/
1327 void
1328 kcpc_idle_save(struct cpu *cp)
1329 {
1330 /*
1331 * The idle thread shouldn't be run anywhere else.
1332 */
1333 ASSERT(CPU == cp);
1334
1335 /*
1336 * We must hold the CPU's context lock to ensure the context isn't freed
1337 * while we're looking at it.
1338 */
1339 mutex_enter(&cp->cpu_cpc_ctxlock);
1340
1341 if ((cp->cpu_cpc_ctx == NULL) ||
1342 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
1343 mutex_exit(&cp->cpu_cpc_ctxlock);
1344 return;
1345 }
1346
1347 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx);
1348 mutex_exit(&cp->cpu_cpc_ctxlock);
1349 }
1350
1351 void
1352 kcpc_idle_restore(struct cpu *cp)
1353 {
1354 /*
1355 * The idle thread shouldn't be run anywhere else.
1356 */
1357 ASSERT(CPU == cp);
1358
1359 /*
1360 * We must hold the CPU's context lock to ensure the context isn't freed
1361 * while we're looking at it.
1362 */
1363 mutex_enter(&cp->cpu_cpc_ctxlock);
1364
1365 if ((cp->cpu_cpc_ctx == NULL) ||
1366 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
1367 mutex_exit(&cp->cpu_cpc_ctxlock);
1368 return;
1369 }
1370
1371 pcbe_ops->pcbe_allstop();
1372 mutex_exit(&cp->cpu_cpc_ctxlock);
1373 }
1374
1375 /*ARGSUSED*/
1376 static void
1377 kcpc_lwp_create(kthread_t *t, kthread_t *ct)
1378 {
1379 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx;
1380 int i;
1381
1382 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0)
1383 return;
1384
1385 rw_enter(&kcpc_cpuctx_lock, RW_READER);
1386 if (ctx->kc_flags & KCPC_CTX_INVALID) {
1387 rw_exit(&kcpc_cpuctx_lock);
1388 return;
1389 }
1390 cctx = kcpc_ctx_alloc(KM_SLEEP);
1391 kcpc_ctx_clone(ctx, cctx);
1392 rw_exit(&kcpc_cpuctx_lock);
1393
1394 /*
1395 * Copy the parent context's kc_flags field, but don't overwrite
1396 * the child's in case it was modified during kcpc_ctx_clone.
1397 */
1398 KCPC_CTX_FLAG_SET(cctx, ctx->kc_flags);
1399 cctx->kc_thread = ct;
1400 cctx->kc_cpuid = -1;
1401 ct->t_cpc_set = cctx->kc_set;
1402 ct->t_cpc_ctx = cctx;
1403
1404 if (cctx->kc_flags & KCPC_CTX_SIGOVF) {
1405 kcpc_set_t *ks = cctx->kc_set;
1406 /*
1407 * Our contract with the user requires us to immediately send an
1408 * overflow signal to all children if we have the LWPINHERIT
1409 * and SIGOVF flags set. In addition, all counters should be
1410 * set to UINT64_MAX, and their pic's overflow flag turned on
1411 * so that our trap() processing knows to send a signal.
1412 */
1413 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
1414 for (i = 0; i < ks->ks_nreqs; i++) {
1415 kcpc_request_t *kr = &ks->ks_req[i];
1416
1417 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) {
1418 *(kr->kr_data) = UINT64_MAX;
1419 atomic_or_uint(&kr->kr_picp->kp_flags,
1420 KCPC_PIC_OVERFLOWED);
1421 }
1422 }
1423 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
1424 aston(ct);
1425 }
1426
1427 installctx(ct, cctx, kcpc_save, kcpc_restore,
1428 NULL, kcpc_lwp_create, NULL, kcpc_free, NULL);
1429 }
1430
1431 /*
1432 * Counter Stoppage Theory
1433 *
1434 * The counters may need to be stopped properly at the following occasions:
1435 *
1436 * 1) An LWP exits.
1437 * 2) A thread exits.
1438 * 3) An LWP performs an exec().
1439 * 4) A bound set is unbound.
1440 *
1441 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need
1442 * to be freed as well.
1443 *
1444 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on
1445 * when the thread is freed, kcpc_free(), called by freectx(), frees the
1446 * context.
1447 *
1448 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit().
1449 *
1450 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has
1451 * been called from exec. It stops the counters _and_ frees the context.
1452 *
1453 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context.
1454 *
1455 * CPU-bound counters are always stopped via kcpc_unbind().
1456 */
1457
1458 /*
1459 * We're being called to delete the context; we ensure that all associated data
1460 * structures are freed, and that the hardware is passivated if this is an exec.
1461 */
1462
1463 /*ARGSUSED*/
1464 void
1465 kcpc_free(kcpc_ctx_t *ctx, int isexec)
1466 {
1467 int i;
1468 kcpc_set_t *set = ctx->kc_set;
1469
1470 ASSERT(set != NULL);
1471
1472 /*
1473 * Wait for kcpc_restore() to finish before we tear things down.
1474 */
1475 mutex_enter(&ctx->kc_lock);
1476 while (ctx->kc_flags & KCPC_CTX_RESTORE)
1477 cv_wait(&ctx->kc_condv, &ctx->kc_lock);
1478 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1479 mutex_exit(&ctx->kc_lock);
1480
1481 if (isexec) {
1482 /*
1483 * This thread is execing, and after the exec it should not have
1484 * any performance counter context. Stop the counters properly
1485 * here so the system isn't surprised by an overflow interrupt
1486 * later.
1487 */
1488 if (ctx->kc_cpuid != -1) {
1489 cpu_t *cp;
1490 /*
1491 * CPU-bound context; stop the appropriate CPU's ctrs.
1492 * Hold cpu_lock while examining the CPU to ensure it
1493 * doesn't go away.
1494 */
1495 mutex_enter(&cpu_lock);
1496 cp = cpu_get(ctx->kc_cpuid);
1497 /*
1498 * The CPU could have been DR'd out, so only stop the
1499 * CPU and clear its context pointer if the CPU still
1500 * exists.
1501 */
1502 if (cp != NULL) {
1503 mutex_enter(&cp->cpu_cpc_ctxlock);
1504 kcpc_stop_hw(ctx);
1505 mutex_exit(&cp->cpu_cpc_ctxlock);
1506 }
1507 mutex_exit(&cpu_lock);
1508 ASSERT(curthread->t_cpc_ctx == NULL);
1509 } else {
1510 int save_spl;
1511
1512 /*
1513 * Thread-bound context; stop _this_ CPU's counters.
1514 */
1515 kpreempt_disable();
1516 save_spl = spl_xcall();
1517 kcpc_unprogram(ctx, B_TRUE);
1518 curthread->t_cpc_ctx = NULL;
1519 splx(save_spl);
1520 kpreempt_enable();
1521 }
1522
1523 /*
1524 * Since we are being called from an exec and we know that
1525 * exec is not permitted via the agent thread, we should clean
1526 * up this thread's CPC state completely, and not leave dangling
1527 * CPC pointers behind.
1528 */
1529 ASSERT(ctx->kc_thread == curthread);
1530 curthread->t_cpc_set = NULL;
1531 }
1532
1533 /*
1534 * Walk through each request in this context's set and free the PCBE's
1535 * configuration if it exists.
1536 */
1537 for (i = 0; i < set->ks_nreqs; i++) {
1538 if (set->ks_req[i].kr_config != NULL)
1539 pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
1540 }
1541
1542 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
1543 kcpc_ctx_free(ctx);
1544 kcpc_free_set(set);
1545 }
1546
1547 /*
1548 * Free the memory associated with a request set.
1549 */
1550 void
1551 kcpc_free_set(kcpc_set_t *set)
1552 {
1553 int i;
1554 kcpc_request_t *req;
1555
1556 ASSERT(set->ks_req != NULL);
1557
1558 for (i = 0; i < set->ks_nreqs; i++) {
1559 req = &set->ks_req[i];
1560
1561 if (req->kr_nattrs != 0) {
1562 kmem_free(req->kr_attr,
1563 req->kr_nattrs * sizeof (kcpc_attr_t));
1564 }
1565 }
1566
1567 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
1568 cv_destroy(&set->ks_condv);
1569 mutex_destroy(&set->ks_lock);
1570 kmem_free(set, sizeof (kcpc_set_t));
1571 }
1572
1573 /*
1574 * Grab every existing context and mark it as invalid.
1575 */
1576 void
1577 kcpc_invalidate_all(void)
1578 {
1579 kcpc_ctx_t *ctx;
1580 long hash;
1581
1582 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) {
1583 mutex_enter(&kcpc_ctx_llock[hash]);
1584 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next)
1585 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1586 mutex_exit(&kcpc_ctx_llock[hash]);
1587 }
1588 }
1589
1590 /*
1591 * Interface for PCBEs to signal that an existing configuration has suddenly
1592 * become invalid.
1593 */
1594 void
1595 kcpc_invalidate_config(void *token)
1596 {
1597 kcpc_ctx_t *ctx = token;
1598
1599 ASSERT(ctx != NULL);
1600
1601 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1602 }
1603
1604 /*
1605 * Called from lwp_exit() and thread_exit()
1606 */
1607 void
1608 kcpc_passivate(void)
1609 {
1610 kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
1611 kcpc_set_t *set = curthread->t_cpc_set;
1612 int save_spl;
1613
1614 if (set == NULL)
1615 return;
1616
1617 if (ctx == NULL) {
1618 /*
1619 * This thread has a set but no context; it must be a CPU-bound
1620 * set. The hardware will be stopped via kcpc_unbind() when the
1621 * process exits and closes its file descriptors with
1622 * kcpc_close(). Our only job here is to clean up this thread's
1623 * state; the set will be freed with the unbind().
1624 */
1625 (void) kcpc_unbind(set);
1626 /*
1627 * Unbinding a set belonging to the current thread should clear
1628 * its set pointer.
1629 */
1630 ASSERT(curthread->t_cpc_set == NULL);
1631 return;
1632 }
1633
1634 kpreempt_disable();
1635 save_spl = spl_xcall();
1636 curthread->t_cpc_set = NULL;
1637
1638 /*
1639 * This thread/LWP is exiting but context switches will continue to
1640 * happen for a bit as the exit proceeds. Kernel preemption must be
1641 * disabled here to prevent a race between checking or setting the
1642 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during
1643 * a context switch.
1644 */
1645 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
1646 kcpc_unprogram(ctx, B_TRUE);
1647 KCPC_CTX_FLAG_SET(ctx,
1648 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
1649 }
1650
1651 /*
1652 * We're cleaning up after this thread; ensure there are no dangling
1653 * CPC pointers left behind. The context and set will be freed by
1654 * freectx().
1655 */
1656 curthread->t_cpc_ctx = NULL;
1657
1658 splx(save_spl);
1659 kpreempt_enable();
1660 }
1661
1662 /*
1663 * Assign the requests in the given set to the PICs in the context.
1664 * Returns 0 if successful, -1 on failure.
1665 */
1666 /*ARGSUSED*/
1667 int
1668 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx)
1669 {
1670 int i;
1671 int *picnum_save;
1672
1673 ASSERT(set->ks_nreqs <= cpc_ncounters);
1674
1675 /*
1676 * Provide kcpc_tryassign() with scratch space to avoid doing an
1677 * alloc/free with every invocation.
1678 */
1679 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP);
1680 /*
1681 * kcpc_tryassign() blindly walks through each request in the set,
1682 * seeing if a counter can count its event. If yes, it assigns that
1683 * counter. However, that counter may have been the only capable counter
1684 * for _another_ request's event. The solution is to try every possible
1685 * request first. Note that this does not cover all solutions, as
1686 * that would require all unique orderings of requests, an n^n operation
1687 * which would be unacceptable for architectures with many counters.
1688 */
1689 for (i = 0; i < set->ks_nreqs; i++)
1690 if (kcpc_tryassign(set, i, picnum_save) == 0)
1691 break;
1692
1693 kmem_free(picnum_save, set->ks_nreqs * sizeof (int));
1694 if (i == set->ks_nreqs)
1695 return (-1);
1696 return (0);
1697 }
1698
1699 static int
1700 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch)
1701 {
1702 int i;
1703 int j;
1704 uint64_t bitmap = 0, resmap = 0;
1705 uint64_t ctrmap;
1706
1707 /*
1708 * We are attempting to assign the reqs to pics, but we may fail. If we
1709 * fail, we need to restore the state of the requests to what it was
1710 * when we found it, as some reqs may have been explicitly assigned to
1711 * a specific PIC beforehand. We do this by snapshotting the assignments
1712 * now and restoring from it later if we fail.
1713 *
1714 * Also we note here which counters have already been claimed by
1715 * requests with explicit counter assignments.
1716 */
1717 for (i = 0; i < set->ks_nreqs; i++) {
1718 scratch[i] = set->ks_req[i].kr_picnum;
1719 if (set->ks_req[i].kr_picnum != -1)
1720 resmap |= (1 << set->ks_req[i].kr_picnum);
1721 }
1722
1723 /*
1724 * Walk through requests assigning them to the first PIC that is
1725 * capable.
1726 */
1727 i = starting_req;
1728 do {
1729 if (set->ks_req[i].kr_picnum != -1) {
1730 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0);
1731 bitmap |= (1 << set->ks_req[i].kr_picnum);
1732 if (++i == set->ks_nreqs)
1733 i = 0;
1734 continue;
1735 }
1736
1737 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event);
1738 for (j = 0; j < cpc_ncounters; j++) {
1739 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 &&
1740 (resmap & (1 << j)) == 0) {
1741 /*
1742 * We can assign this counter because:
1743 *
1744 * 1. It can count the event (ctrmap)
1745 * 2. It hasn't been assigned yet (bitmap)
1746 * 3. It wasn't reserved by a request (resmap)
1747 */
1748 bitmap |= (1 << j);
1749 break;
1750 }
1751 }
1752 if (j == cpc_ncounters) {
1753 for (i = 0; i < set->ks_nreqs; i++)
1754 set->ks_req[i].kr_picnum = scratch[i];
1755 return (-1);
1756 }
1757 set->ks_req[i].kr_picnum = j;
1758
1759 if (++i == set->ks_nreqs)
1760 i = 0;
1761 } while (i != starting_req);
1762
1763 return (0);
1764 }
1765
1766 kcpc_set_t *
1767 kcpc_dup_set(kcpc_set_t *set)
1768 {
1769 kcpc_set_t *new;
1770 int i;
1771 int j;
1772
1773 new = kmem_zalloc(sizeof (*new), KM_SLEEP);
1774 new->ks_state &= ~KCPC_SET_BOUND;
1775 new->ks_flags = set->ks_flags;
1776 new->ks_nreqs = set->ks_nreqs;
1777 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t),
1778 KM_SLEEP);
1779 new->ks_data = NULL;
1780 new->ks_ctx = NULL;
1781
1782 for (i = 0; i < new->ks_nreqs; i++) {
1783 new->ks_req[i].kr_config = NULL;
1784 new->ks_req[i].kr_index = set->ks_req[i].kr_index;
1785 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum;
1786 new->ks_req[i].kr_picp = NULL;
1787 new->ks_req[i].kr_data = NULL;
1788 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event,
1789 CPC_MAX_EVENT_LEN);
1790 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset;
1791 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags;
1792 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs;
1793 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs *
1794 sizeof (kcpc_attr_t), KM_SLEEP);
1795 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) {
1796 new->ks_req[i].kr_attr[j].ka_val =
1797 set->ks_req[i].kr_attr[j].ka_val;
1798 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name,
1799 set->ks_req[i].kr_attr[j].ka_name,
1800 CPC_MAX_ATTR_LEN);
1801 }
1802 }
1803
1804 return (new);
1805 }
1806
1807 int
1808 kcpc_allow_nonpriv(void *token)
1809 {
1810 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV);
1811 }
1812
1813 void
1814 kcpc_invalidate(kthread_t *t)
1815 {
1816 kcpc_ctx_t *ctx = t->t_cpc_ctx;
1817
1818 if (ctx != NULL)
1819 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1820 }
1821
1822 /*
1823 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given
1824 * are used to construct PCBE names, starting with the most specific,
1825 * "pcbe.first.second.third.fourth" and ending with the least specific,
1826 * "pcbe.first".
1827 *
1828 * Returns 0 if a PCBE was successfully loaded and -1 upon error.
1829 */
1830 int
1831 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third)
1832 {
1833 uint_t s[3];
1834
1835 s[0] = first;
1836 s[1] = second;
1837 s[2] = third;
1838
1839 return (modload_qualified("pcbe",
1840 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0);
1841 }
1842
1843 /*
1844 * Create one or more CPC context for given CPU with specified counter event
1845 * requests
1846 *
1847 * If number of requested counter events is less than or equal number of
1848 * hardware counters on a CPU and can all be assigned to the counters on a CPU
1849 * at the same time, then make one CPC context.
1850 *
1851 * Otherwise, multiple CPC contexts are created to allow multiplexing more
1852 * counter events than existing counters onto the counters by iterating through
1853 * all of the CPC contexts, programming the counters with each CPC context one
1854 * at a time and measuring the resulting counter values. Each of the resulting
1855 * CPC contexts contains some number of requested counter events less than or
1856 * equal the number of counters on a CPU depending on whether all the counter
1857 * events can be programmed on all the counters at the same time or not.
1858 *
1859 * Flags to kmem_{,z}alloc() are passed in as an argument to allow specifying
1860 * whether memory allocation should be non-blocking or not. The code will try
1861 * to allocate *whole* CPC contexts if possible. If there is any memory
1862 * allocation failure during the allocations needed for a given CPC context, it
1863 * will skip allocating that CPC context because it cannot allocate the whole
1864 * thing. Thus, the only time that it will end up allocating none (ie. no CPC
1865 * contexts whatsoever) is when it cannot even allocate *one* whole CPC context
1866 * without a memory allocation failure occurring.
1867 */
1868 int
1869 kcpc_cpu_ctx_create(cpu_t *cp, kcpc_request_list_t *req_list, int kmem_flags,
1870 kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz)
1871 {
1872 kcpc_ctx_t **ctx_ptrs;
1873 int nctx;
1874 int nctx_ptrs;
1875 int nreqs;
1876 kcpc_request_t *reqs;
1877
1878 if (cp == NULL || ctx_ptr_array == NULL || ctx_ptr_array_sz == NULL ||
1879 req_list == NULL || req_list->krl_cnt < 1)
1880 return (-1);
1881
1882 /*
1883 * Allocate number of sets assuming that each set contains one and only
1884 * one counter event request for each counter on a CPU
1885 */
1886 nreqs = req_list->krl_cnt;
1887 nctx_ptrs = (nreqs + cpc_ncounters - 1) / cpc_ncounters;
1888 ctx_ptrs = kmem_zalloc(nctx_ptrs * sizeof (kcpc_ctx_t *), kmem_flags);
1889 if (ctx_ptrs == NULL)
1890 return (-2);
1891
1892 /*
1893 * Fill in sets of requests
1894 */
1895 nctx = 0;
1896 reqs = req_list->krl_list;
1897 while (nreqs > 0) {
1898 kcpc_ctx_t *ctx;
1899 kcpc_set_t *set;
1900 int subcode;
1901
1902 /*
1903 * Allocate CPC context and set for requested counter events
1904 */
1905 ctx = kcpc_ctx_alloc(kmem_flags);
1906 set = kcpc_set_create(reqs, nreqs, 0, kmem_flags);
1907 if (set == NULL) {
1908 kcpc_ctx_free(ctx);
1909 break;
1910 }
1911
1912 /*
1913 * Determine assignment of requested counter events to specific
1914 * counters
1915 */
1916 if (kcpc_assign_reqs(set, ctx) != 0) {
1917 /*
1918 * May not be able to assign requested counter events
1919 * to all counters since all counters may not be able
1920 * to do all events, so only do one counter event in
1921 * set of counter requests when this happens since at
1922 * least one of the counters must be able to do the
1923 * event.
1924 */
1925 kcpc_free_set(set);
1926 set = kcpc_set_create(reqs, 1, 0, kmem_flags);
1927 if (set == NULL) {
1928 kcpc_ctx_free(ctx);
1929 break;
1930 }
1931 if (kcpc_assign_reqs(set, ctx) != 0) {
1932 #ifdef DEBUG
1933 cmn_err(CE_NOTE, "!kcpc_cpu_ctx_create: can't "
1934 "assign counter event %s!\n",
1935 set->ks_req->kr_event);
1936 #endif
1937 kcpc_free_set(set);
1938 kcpc_ctx_free(ctx);
1939 reqs++;
1940 nreqs--;
1941 continue;
1942 }
1943 }
1944
1945 /*
1946 * Allocate memory needed to hold requested counter event data
1947 */
1948 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t),
1949 kmem_flags);
1950 if (set->ks_data == NULL) {
1951 kcpc_free_set(set);
1952 kcpc_ctx_free(ctx);
1953 break;
1954 }
1955
1956 /*
1957 * Configure requested counter events
1958 */
1959 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) {
1960 #ifdef DEBUG
1961 cmn_err(CE_NOTE,
1962 "!kcpc_cpu_ctx_create: can't configure "
1963 "set of counter event requests!\n");
1964 #endif
1965 reqs += set->ks_nreqs;
1966 nreqs -= set->ks_nreqs;
1967 kmem_free(set->ks_data,
1968 set->ks_nreqs * sizeof (uint64_t));
1969 kcpc_free_set(set);
1970 kcpc_ctx_free(ctx);
1971 continue;
1972 }
1973
1974 /*
1975 * Point set of counter event requests at this context and fill
1976 * in CPC context
1977 */
1978 set->ks_ctx = ctx;
1979 ctx->kc_set = set;
1980 ctx->kc_cpuid = cp->cpu_id;
1981 ctx->kc_thread = curthread;
1982
1983 ctx_ptrs[nctx] = ctx;
1984
1985 /*
1986 * Update requests and how many are left to be assigned to sets
1987 */
1988 reqs += set->ks_nreqs;
1989 nreqs -= set->ks_nreqs;
1990
1991 /*
1992 * Increment number of CPC contexts and allocate bigger array
1993 * for context pointers as needed
1994 */
1995 nctx++;
1996 if (nctx >= nctx_ptrs) {
1997 kcpc_ctx_t **new;
1998 int new_cnt;
1999
2000 /*
2001 * Allocate more CPC contexts based on how many
2002 * contexts allocated so far and how many counter
2003 * requests left to assign
2004 */
2005 new_cnt = nctx_ptrs +
2006 ((nreqs + cpc_ncounters - 1) / cpc_ncounters);
2007 new = kmem_zalloc(new_cnt * sizeof (kcpc_ctx_t *),
2008 kmem_flags);
2009 if (new == NULL)
2010 break;
2011
2012 /*
2013 * Copy contents of old sets into new ones
2014 */
2015 bcopy(ctx_ptrs, new,
2016 nctx_ptrs * sizeof (kcpc_ctx_t *));
2017
2018 /*
2019 * Free old array of context pointers and use newly
2020 * allocated one instead now
2021 */
2022 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *));
2023 ctx_ptrs = new;
2024 nctx_ptrs = new_cnt;
2025 }
2026 }
2027
2028 /*
2029 * Return NULL if no CPC contexts filled in
2030 */
2031 if (nctx == 0) {
2032 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *));
2033 *ctx_ptr_array = NULL;
2034 *ctx_ptr_array_sz = 0;
2035 return (-2);
2036 }
2037
2038 *ctx_ptr_array = ctx_ptrs;
2039 *ctx_ptr_array_sz = nctx_ptrs * sizeof (kcpc_ctx_t *);
2040 return (nctx);
2041 }
2042
2043 /*
2044 * Return whether PCBE supports given counter event
2045 */
2046 boolean_t
2047 kcpc_event_supported(char *event)
2048 {
2049 if (pcbe_ops == NULL || pcbe_ops->pcbe_event_coverage(event) == 0)
2050 return (B_FALSE);
2051
2052 return (B_TRUE);
2053 }
2054
2055 /*
2056 * Program counters on current CPU with given CPC context
2057 *
2058 * If kernel is interposing on counters to measure hardware capacity and
2059 * utilization, then unprogram counters for kernel *before* programming them
2060 * with specified CPC context.
2061 *
2062 * kcpc_{program,unprogram}() may be called either directly by a thread running
2063 * on the target CPU or from a cross-call from another CPU. To protect
2064 * programming and unprogramming from being interrupted by cross-calls, callers
2065 * who execute kcpc_{program,unprogram} should raise PIL to the level used by
2066 * cross-calls.
2067 */
2068 void
2069 kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread, boolean_t cu_interpose)
2070 {
2071 int error;
2072
2073 ASSERT(IS_HIPIL());
2074
2075 /*
2076 * CPC context shouldn't be NULL, its CPU field should specify current
2077 * CPU or be -1 to specify any CPU when the context is bound to a
2078 * thread, and preemption should be disabled
2079 */
2080 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id ||
2081 ctx->kc_cpuid == -1) && curthread->t_preempt > 0);
2082 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id &&
2083 ctx->kc_cpuid != -1) || curthread->t_preempt < 1)
2084 return;
2085
2086 /*
2087 * Unprogram counters for kernel measuring hardware capacity and
2088 * utilization
2089 */
2090 if (cu_interpose == B_TRUE) {
2091 cu_cpc_unprogram(CPU, &error);
2092 } else {
2093 kcpc_set_t *set = ctx->kc_set;
2094 int i;
2095
2096 ASSERT(set != NULL);
2097
2098 /*
2099 * Since cu_interpose is false, we are programming CU context.
2100 * In general, PCBE can continue from the state saved in the
2101 * set, but it is not very reliable, so we start again from the
2102 * preset value.
2103 */
2104 for (i = 0; i < set->ks_nreqs; i++) {
2105 /*
2106 * Reset the virtual counter value to the preset value.
2107 */
2108 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
2109
2110 /*
2111 * Reset PCBE to the preset value.
2112 */
2113 pcbe_ops->pcbe_configure(0, NULL,
2114 set->ks_req[i].kr_preset,
2115 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
2116 }
2117 }
2118
2119 /*
2120 * Program counters with specified CPC context
2121 */
2122 ctx->kc_rawtick = KCPC_GET_TICK();
2123 pcbe_ops->pcbe_program(ctx);
2124
2125 /*
2126 * Denote that counters programmed for thread or CPU CPC context
2127 * differently
2128 */
2129 if (for_thread == B_TRUE)
2130 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
2131 else
2132 CPU->cpu_cpc_ctx = ctx;
2133 }
2134
2135 /*
2136 * Unprogram counters with given CPC context on current CPU
2137 *
2138 * If kernel is interposing on counters to measure hardware capacity and
2139 * utilization, then program counters for the kernel capacity and utilization
2140 * *after* unprogramming them for given CPC context.
2141 *
2142 * See the comment for kcpc_program regarding the synchronization with
2143 * cross-calls.
2144 */
2145 void
2146 kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose)
2147 {
2148 int error;
2149
2150 ASSERT(IS_HIPIL());
2151
2152 /*
2153 * CPC context shouldn't be NULL, its CPU field should specify current
2154 * CPU or be -1 to specify any CPU when the context is bound to a
2155 * thread, and preemption should be disabled
2156 */
2157 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id ||
2158 ctx->kc_cpuid == -1) && curthread->t_preempt > 0);
2159
2160 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id &&
2161 ctx->kc_cpuid != -1) || curthread->t_preempt < 1 ||
2162 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) != 0) {
2163 return;
2164 }
2165
2166 /*
2167 * Specified CPC context to be unprogrammed should be bound to current
2168 * CPU or thread
2169 */
2170 ASSERT(CPU->cpu_cpc_ctx == ctx || curthread->t_cpc_ctx == ctx);
2171
2172 /*
2173 * Stop counters
2174 */
2175 pcbe_ops->pcbe_allstop();
2176 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED);
2177
2178 /*
2179 * Allow kernel to interpose on counters and program them for its own
2180 * use to measure hardware capacity and utilization if cu_interpose
2181 * argument is true
2182 */
2183 if (cu_interpose == B_TRUE)
2184 cu_cpc_program(CPU, &error);
2185 }
2186
2187 /*
2188 * Read CPU Performance Counter (CPC) on current CPU and call specified update
2189 * routine with data for each counter event currently programmed on CPU
2190 */
2191 int
2192 kcpc_read(kcpc_update_func_t update_func)
2193 {
2194 kcpc_ctx_t *ctx;
2195 int i;
2196 kcpc_request_t *req;
2197 int retval;
2198 kcpc_set_t *set;
2199
2200 ASSERT(IS_HIPIL());
2201
2202 /*
2203 * Can't grab locks or block because may be called inside dispatcher
2204 */
2205 kpreempt_disable();
2206
2207 ctx = CPU->cpu_cpc_ctx;
2208 if (ctx == NULL) {
2209 kpreempt_enable();
2210 return (0);
2211 }
2212
2213 /*
2214 * Read counter data from current CPU
2215 */
2216 pcbe_ops->pcbe_sample(ctx);
2217
2218 set = ctx->kc_set;
2219 if (set == NULL || set->ks_req == NULL) {
2220 kpreempt_enable();
2221 return (0);
2222 }
2223
2224 /*
2225 * Call update function with preset pointer and data for each CPC event
2226 * request currently programmed on current CPU
2227 */
2228 req = set->ks_req;
2229 retval = 0;
2230 for (i = 0; i < set->ks_nreqs; i++) {
2231 int ret;
2232
2233 if (req[i].kr_data == NULL)
2234 break;
2235
2236 ret = update_func(req[i].kr_ptr, *req[i].kr_data);
2237 if (ret < 0)
2238 retval = ret;
2239 }
2240
2241 kpreempt_enable();
2242
2243 return (retval);
2244 }
2245
2246 /*
2247 * Initialize list of counter event requests
2248 */
2249 kcpc_request_list_t *
2250 kcpc_reqs_init(int nreqs, int kmem_flags)
2251 {
2252 kcpc_request_list_t *req_list;
2253 kcpc_request_t *reqs;
2254
2255 if (nreqs < 1)
2256 return (NULL);
2257
2258 req_list = kmem_zalloc(sizeof (kcpc_request_list_t), kmem_flags);
2259 if (req_list == NULL)
2260 return (NULL);
2261
2262 reqs = kmem_zalloc(nreqs * sizeof (kcpc_request_t), kmem_flags);
2263 if (reqs == NULL) {
2264 kmem_free(req_list, sizeof (kcpc_request_list_t));
2265 return (NULL);
2266 }
2267
2268 req_list->krl_list = reqs;
2269 req_list->krl_cnt = 0;
2270 req_list->krl_max = nreqs;
2271 return (req_list);
2272 }
2273
2274
2275 /*
2276 * Add counter event request to given list of counter event requests
2277 */
2278 int
2279 kcpc_reqs_add(kcpc_request_list_t *req_list, char *event, uint64_t preset,
2280 uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr, int kmem_flags)
2281 {
2282 kcpc_request_t *req;
2283
2284 if (req_list == NULL || req_list->krl_list == NULL)
2285 return (-1);
2286
2287 ASSERT(req_list->krl_max != 0);
2288
2289 /*
2290 * Allocate more space (if needed)
2291 */
2292 if (req_list->krl_cnt > req_list->krl_max) {
2293 kcpc_request_t *new;
2294 kcpc_request_t *old;
2295
2296 old = req_list->krl_list;
2297 new = kmem_zalloc((req_list->krl_max +
2298 cpc_ncounters) * sizeof (kcpc_request_t), kmem_flags);
2299 if (new == NULL)
2300 return (-2);
2301
2302 req_list->krl_list = new;
2303 bcopy(old, req_list->krl_list,
2304 req_list->krl_cnt * sizeof (kcpc_request_t));
2305 kmem_free(old, req_list->krl_max * sizeof (kcpc_request_t));
2306 req_list->krl_cnt = 0;
2307 req_list->krl_max += cpc_ncounters;
2308 }
2309
2310 /*
2311 * Fill in request as much as possible now, but some fields will need
2312 * to be set when request is assigned to a set.
2313 */
2314 req = &req_list->krl_list[req_list->krl_cnt];
2315 req->kr_config = NULL;
2316 req->kr_picnum = -1; /* have CPC pick this */
2317 req->kr_index = -1; /* set when assigning request to set */
2318 req->kr_data = NULL; /* set when configuring request */
2319 (void) strcpy(req->kr_event, event);
2320 req->kr_preset = preset;
2321 req->kr_flags = flags;
2322 req->kr_nattrs = nattrs;
2323 req->kr_attr = attr;
2324 /*
2325 * Keep pointer given by caller to give to update function when this
2326 * counter event is sampled/read
2327 */
2328 req->kr_ptr = ptr;
2329
2330 req_list->krl_cnt++;
2331
2332 return (0);
2333 }
2334
2335 /*
2336 * Reset list of CPC event requests so its space can be used for another set
2337 * of requests
2338 */
2339 int
2340 kcpc_reqs_reset(kcpc_request_list_t *req_list)
2341 {
2342 /*
2343 * Return when pointer to request list structure or request is NULL or
2344 * when max requests is less than or equal to 0
2345 */
2346 if (req_list == NULL || req_list->krl_list == NULL ||
2347 req_list->krl_max <= 0)
2348 return (-1);
2349
2350 /*
2351 * Zero out requests and number of requests used
2352 */
2353 bzero(req_list->krl_list, req_list->krl_max * sizeof (kcpc_request_t));
2354 req_list->krl_cnt = 0;
2355 return (0);
2356 }
2357
2358 /*
2359 * Free given list of counter event requests
2360 */
2361 int
2362 kcpc_reqs_fini(kcpc_request_list_t *req_list)
2363 {
2364 kmem_free(req_list->krl_list,
2365 req_list->krl_max * sizeof (kcpc_request_t));
2366 kmem_free(req_list, sizeof (kcpc_request_list_t));
2367 return (0);
2368 }
2369
2370 /*
2371 * Create set of given counter event requests
2372 */
2373 static kcpc_set_t *
2374 kcpc_set_create(kcpc_request_t *reqs, int nreqs, int set_flags, int kmem_flags)
2375 {
2376 int i;
2377 kcpc_set_t *set;
2378
2379 /*
2380 * Allocate set and assign number of requests in set and flags
2381 */
2382 set = kmem_zalloc(sizeof (kcpc_set_t), kmem_flags);
2383 if (set == NULL)
2384 return (NULL);
2385
2386 if (nreqs < cpc_ncounters)
2387 set->ks_nreqs = nreqs;
2388 else
2389 set->ks_nreqs = cpc_ncounters;
2390
2391 set->ks_flags = set_flags;
2392
2393 /*
2394 * Allocate requests needed, copy requests into set, and set index into
2395 * data for each request (which may change when we assign requested
2396 * counter events to counters)
2397 */
2398 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) *
2399 set->ks_nreqs, kmem_flags);
2400 if (set->ks_req == NULL) {
2401 kmem_free(set, sizeof (kcpc_set_t));
2402 return (NULL);
2403 }
2404
2405 bcopy(reqs, set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
2406
2407 for (i = 0; i < set->ks_nreqs; i++)
2408 set->ks_req[i].kr_index = i;
2409
2410 return (set);
2411 }
2412
2413
2414 /*
2415 * Stop counters on current CPU.
2416 *
2417 * If preserve_context is true, the caller is interested in the CPU's CPC
2418 * context and wants it to be preserved.
2419 *
2420 * If preserve_context is false, the caller does not need the CPU's CPC context
2421 * to be preserved, so it is set to NULL.
2422 */
2423 static void
2424 kcpc_cpustop_func(uintptr_t arg1, uintptr_t arg2 __unused)
2425 {
2426 boolean_t preserve_context;
2427 kpreempt_disable();
2428
2429 preserve_context = (boolean_t)arg1;
2430 /*
2431 * Someone already stopped this context before us, so there is nothing
2432 * to do.
2433 */
2434 if (CPU->cpu_cpc_ctx == NULL) {
2435 kpreempt_enable();
2436 return;
2437 }
2438
2439 kcpc_unprogram(CPU->cpu_cpc_ctx, B_TRUE);
2440 /*
2441 * If CU does not use counters, then clear the CPU's CPC context
2442 * If the caller requested to preserve context it should disable CU
2443 * first, so there should be no CU context now.
2444 */
2445 ASSERT(!preserve_context || !CU_CPC_ON(CPU));
2446 if (!preserve_context && CPU->cpu_cpc_ctx != NULL && !CU_CPC_ON(CPU))
2447 CPU->cpu_cpc_ctx = NULL;
2448
2449 kpreempt_enable();
2450 }
2451
2452 /*
2453 * Stop counters on given CPU and set its CPC context to NULL unless
2454 * preserve_context is true.
2455 */
2456 void
2457 kcpc_cpu_stop(cpu_t *cp, boolean_t preserve_context)
2458 {
2459 cpu_call(cp, kcpc_cpustop_func, preserve_context, 0);
2460 }
2461
2462 /*
2463 * Program the context on the current CPU
2464 */
2465 static void
2466 kcpc_remoteprogram_func(uintptr_t arg1, uintptr_t arg2)
2467 {
2468 kcpc_ctx_t *ctx = (kcpc_ctx_t *)arg1;
2469 boolean_t for_thread = (boolean_t)arg2;
2470
2471 ASSERT(ctx != NULL);
2472
2473 kpreempt_disable();
2474 kcpc_program(ctx, for_thread, B_TRUE);
2475 kpreempt_enable();
2476 }
2477
2478 /*
2479 * Program counters on given CPU
2480 */
2481 void
2482 kcpc_cpu_program(cpu_t *cp, kcpc_ctx_t *ctx)
2483 {
2484 cpu_call(cp, kcpc_remoteprogram_func, (uintptr_t)ctx,
2485 (uintptr_t)B_FALSE);
2486 }
2487
2488 char *
2489 kcpc_list_attrs(void)
2490 {
2491 ASSERT(pcbe_ops != NULL);
2492
2493 return (pcbe_ops->pcbe_list_attrs());
2494 }
2495
2496 char *
2497 kcpc_list_events(uint_t pic)
2498 {
2499 ASSERT(pcbe_ops != NULL);
2500
2501 return (pcbe_ops->pcbe_list_events(pic));
2502 }
2503
2504 uint_t
2505 kcpc_pcbe_capabilities(void)
2506 {
2507 ASSERT(pcbe_ops != NULL);
2508
2509 return (pcbe_ops->pcbe_caps);
2510 }
2511
2512 int
2513 kcpc_pcbe_loaded(void)
2514 {
2515 return (pcbe_ops == NULL ? -1 : 0);
2516 }