1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
29 */
30
31 #include <sys/fasttrap_isa.h>
32 #include <sys/fasttrap_impl.h>
33 #include <sys/dtrace.h>
34 #include <sys/dtrace_impl.h>
35 #include <sys/cmn_err.h>
36 #include <sys/regset.h>
37 #include <sys/privregs.h>
38 #include <sys/segments.h>
39 #include <sys/x86_archext.h>
40 #include <sys/sysmacros.h>
41 #include <sys/trap.h>
42 #include <sys/archsystm.h>
43 #include <sys/proc.h>
44 #include <sys/brand.h>
45 #include <sys/machbrand.h>
46
47 /*
48 * Lossless User-Land Tracing on x86
49 * ---------------------------------
50 *
51 * The execution of most instructions is not dependent on the address; for
52 * these instructions it is sufficient to copy them into the user process's
53 * address space and execute them. To effectively single-step an instruction
54 * in user-land, we copy out the following sequence of instructions to scratch
55 * space in the user thread's ulwp_t structure.
56 *
57 * We then set the program counter (%eip or %rip) to point to this scratch
58 * space. Once execution resumes, the original instruction is executed and
59 * then control flow is redirected to what was originally the subsequent
60 * instruction. If the kernel attemps to deliver a signal while single-
61 * stepping, the signal is deferred and the program counter is moved into the
62 * second sequence of instructions. The second sequence ends in a trap into
63 * the kernel where the deferred signal is then properly handled and delivered.
64 *
65 * For instructions whose execute is position dependent, we perform simple
66 * emulation. These instructions are limited to control transfer
67 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
68 * of %rip-relative addressing that means that almost any instruction can be
69 * position dependent. For all the details on how we emulate generic
70 * instructions included %rip-relative instructions, see the code in
71 * fasttrap_pid_probe() below where we handle instructions of type
72 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
73 */
74
75 #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3)
76 #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7)
77 #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7)
78 #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm))
79
80 #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3)
81 #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7)
82 #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7)
83
84 #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1)
85 #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1)
86 #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1)
87 #define FASTTRAP_REX_B(rex) ((rex) & 1)
88 #define FASTTRAP_REX(w, r, x, b) \
89 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
90
91 /*
92 * Single-byte op-codes.
93 */
94 #define FASTTRAP_PUSHL_EBP 0x55
95
96 #define FASTTRAP_JO 0x70
97 #define FASTTRAP_JNO 0x71
98 #define FASTTRAP_JB 0x72
99 #define FASTTRAP_JAE 0x73
100 #define FASTTRAP_JE 0x74
101 #define FASTTRAP_JNE 0x75
102 #define FASTTRAP_JBE 0x76
103 #define FASTTRAP_JA 0x77
104 #define FASTTRAP_JS 0x78
105 #define FASTTRAP_JNS 0x79
106 #define FASTTRAP_JP 0x7a
107 #define FASTTRAP_JNP 0x7b
108 #define FASTTRAP_JL 0x7c
109 #define FASTTRAP_JGE 0x7d
110 #define FASTTRAP_JLE 0x7e
111 #define FASTTRAP_JG 0x7f
112
113 #define FASTTRAP_NOP 0x90
114
115 #define FASTTRAP_MOV_EAX 0xb8
116 #define FASTTRAP_MOV_ECX 0xb9
117
118 #define FASTTRAP_RET16 0xc2
119 #define FASTTRAP_RET 0xc3
120
121 #define FASTTRAP_LOOPNZ 0xe0
122 #define FASTTRAP_LOOPZ 0xe1
123 #define FASTTRAP_LOOP 0xe2
124 #define FASTTRAP_JCXZ 0xe3
125
126 #define FASTTRAP_CALL 0xe8
127 #define FASTTRAP_JMP32 0xe9
128 #define FASTTRAP_JMP8 0xeb
129
130 #define FASTTRAP_INT3 0xcc
131 #define FASTTRAP_INT 0xcd
132
133 #define FASTTRAP_2_BYTE_OP 0x0f
134 #define FASTTRAP_GROUP5_OP 0xff
135
136 /*
137 * Two-byte op-codes (second byte only).
138 */
139 #define FASTTRAP_0F_JO 0x80
140 #define FASTTRAP_0F_JNO 0x81
141 #define FASTTRAP_0F_JB 0x82
142 #define FASTTRAP_0F_JAE 0x83
143 #define FASTTRAP_0F_JE 0x84
144 #define FASTTRAP_0F_JNE 0x85
145 #define FASTTRAP_0F_JBE 0x86
146 #define FASTTRAP_0F_JA 0x87
147 #define FASTTRAP_0F_JS 0x88
148 #define FASTTRAP_0F_JNS 0x89
149 #define FASTTRAP_0F_JP 0x8a
150 #define FASTTRAP_0F_JNP 0x8b
151 #define FASTTRAP_0F_JL 0x8c
152 #define FASTTRAP_0F_JGE 0x8d
153 #define FASTTRAP_0F_JLE 0x8e
154 #define FASTTRAP_0F_JG 0x8f
155
156 #define FASTTRAP_EFLAGS_OF 0x800
157 #define FASTTRAP_EFLAGS_DF 0x400
158 #define FASTTRAP_EFLAGS_SF 0x080
159 #define FASTTRAP_EFLAGS_ZF 0x040
160 #define FASTTRAP_EFLAGS_AF 0x010
161 #define FASTTRAP_EFLAGS_PF 0x004
162 #define FASTTRAP_EFLAGS_CF 0x001
163
164 /*
165 * Instruction prefixes.
166 */
167 #define FASTTRAP_PREFIX_OPERAND 0x66
168 #define FASTTRAP_PREFIX_ADDRESS 0x67
169 #define FASTTRAP_PREFIX_CS 0x2E
170 #define FASTTRAP_PREFIX_DS 0x3E
171 #define FASTTRAP_PREFIX_ES 0x26
172 #define FASTTRAP_PREFIX_FS 0x64
173 #define FASTTRAP_PREFIX_GS 0x65
174 #define FASTTRAP_PREFIX_SS 0x36
175 #define FASTTRAP_PREFIX_LOCK 0xF0
176 #define FASTTRAP_PREFIX_REP 0xF3
177 #define FASTTRAP_PREFIX_REPNE 0xF2
178
179 #define FASTTRAP_NOREG 0xff
180
181 /*
182 * Map between instruction register encodings and the kernel constants which
183 * correspond to indicies into struct regs.
184 */
185 #ifdef __amd64
186 static const uint8_t regmap[16] = {
187 REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
188 REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
189 };
190 #else
191 static const uint8_t regmap[8] = {
192 EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
193 };
194 #endif
195
196 static ulong_t fasttrap_getreg(struct regs *, uint_t);
197
198 static uint64_t
199 fasttrap_anarg(struct regs *rp, int function_entry, int argno)
200 {
201 uint64_t value;
202 int shift = function_entry ? 1 : 0;
203
204 #ifdef __amd64
205 if (curproc->p_model == DATAMODEL_LP64) {
206 uintptr_t *stack;
207
208 /*
209 * In 64-bit mode, the first six arguments are stored in
210 * registers.
211 */
212 if (argno < 6)
213 return ((&rp->r_rdi)[argno]);
214
215 stack = (uintptr_t *)rp->r_sp;
216 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
217 value = dtrace_fulword(&stack[argno - 6 + shift]);
218 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
219 } else {
220 #endif
221 uint32_t *stack = (uint32_t *)rp->r_sp;
222 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
223 value = dtrace_fuword32(&stack[argno + shift]);
224 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
225 #ifdef __amd64
226 }
227 #endif
228
229 return (value);
230 }
231
232 /*ARGSUSED*/
233 int
234 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
235 fasttrap_probe_type_t type)
236 {
237 uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
238 size_t len = FASTTRAP_MAX_INSTR_SIZE;
239 size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
240 uint_t start = 0;
241 int rmindex, size;
242 uint8_t seg, rex = 0;
243
244 /*
245 * Read the instruction at the given address out of the process's
246 * address space. We don't have to worry about a debugger
247 * changing this instruction before we overwrite it with our trap
248 * instruction since P_PR_LOCK is set. Since instructions can span
249 * pages, we potentially read the instruction in two parts. If the
250 * second part fails, we just zero out that part of the instruction.
251 */
252 if (uread(p, &instr[0], first, pc) != 0)
253 return (-1);
254 if (len > first &&
255 uread(p, &instr[first], len - first, pc + first) != 0) {
256 bzero(&instr[first], len - first);
257 len = first;
258 }
259
260 /*
261 * If the disassembly fails, then we have a malformed instruction.
262 */
263 if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
264 return (-1);
265
266 /*
267 * Make sure the disassembler isn't completely broken.
268 */
269 ASSERT(-1 <= rmindex && rmindex < size);
270
271 /*
272 * If the computed size is greater than the number of bytes read,
273 * then it was a malformed instruction possibly because it fell on a
274 * page boundary and the subsequent page was missing or because of
275 * some malicious user.
276 */
277 if (size > len)
278 return (-1);
279
280 tp->ftt_size = (uint8_t)size;
281 tp->ftt_segment = FASTTRAP_SEG_NONE;
282
283 /*
284 * Find the start of the instruction's opcode by processing any
285 * legacy prefixes.
286 */
287 for (;;) {
288 seg = 0;
289 switch (instr[start]) {
290 case FASTTRAP_PREFIX_SS:
291 seg++;
292 /*FALLTHRU*/
293 case FASTTRAP_PREFIX_GS:
294 seg++;
295 /*FALLTHRU*/
296 case FASTTRAP_PREFIX_FS:
297 seg++;
298 /*FALLTHRU*/
299 case FASTTRAP_PREFIX_ES:
300 seg++;
301 /*FALLTHRU*/
302 case FASTTRAP_PREFIX_DS:
303 seg++;
304 /*FALLTHRU*/
305 case FASTTRAP_PREFIX_CS:
306 seg++;
307 /*FALLTHRU*/
308 case FASTTRAP_PREFIX_OPERAND:
309 case FASTTRAP_PREFIX_ADDRESS:
310 case FASTTRAP_PREFIX_LOCK:
311 case FASTTRAP_PREFIX_REP:
312 case FASTTRAP_PREFIX_REPNE:
313 if (seg != 0) {
314 /*
315 * It's illegal for an instruction to specify
316 * two segment prefixes -- give up on this
317 * illegal instruction.
318 */
319 if (tp->ftt_segment != FASTTRAP_SEG_NONE)
320 return (-1);
321
322 tp->ftt_segment = seg;
323 }
324 start++;
325 continue;
326 }
327 break;
328 }
329
330 #ifdef __amd64
331 /*
332 * Identify the REX prefix on 64-bit processes.
333 */
334 if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
335 rex = instr[start++];
336 #endif
337
338 /*
339 * Now that we're pretty sure that the instruction is okay, copy the
340 * valid part to the tracepoint.
341 */
342 bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
343
344 tp->ftt_type = FASTTRAP_T_COMMON;
345 if (instr[start] == FASTTRAP_2_BYTE_OP) {
346 switch (instr[start + 1]) {
347 case FASTTRAP_0F_JO:
348 case FASTTRAP_0F_JNO:
349 case FASTTRAP_0F_JB:
350 case FASTTRAP_0F_JAE:
351 case FASTTRAP_0F_JE:
352 case FASTTRAP_0F_JNE:
353 case FASTTRAP_0F_JBE:
354 case FASTTRAP_0F_JA:
355 case FASTTRAP_0F_JS:
356 case FASTTRAP_0F_JNS:
357 case FASTTRAP_0F_JP:
358 case FASTTRAP_0F_JNP:
359 case FASTTRAP_0F_JL:
360 case FASTTRAP_0F_JGE:
361 case FASTTRAP_0F_JLE:
362 case FASTTRAP_0F_JG:
363 tp->ftt_type = FASTTRAP_T_JCC;
364 tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
365 tp->ftt_dest = pc + tp->ftt_size +
366 /* LINTED - alignment */
367 *(int32_t *)&instr[start + 2];
368 break;
369 }
370 } else if (instr[start] == FASTTRAP_GROUP5_OP) {
371 uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
372 uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
373 uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
374
375 if (reg == 2 || reg == 4) {
376 uint_t i, sz;
377
378 if (reg == 2)
379 tp->ftt_type = FASTTRAP_T_CALL;
380 else
381 tp->ftt_type = FASTTRAP_T_JMP;
382
383 if (mod == 3)
384 tp->ftt_code = 2;
385 else
386 tp->ftt_code = 1;
387
388 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
389
390 /*
391 * See AMD x86-64 Architecture Programmer's Manual
392 * Volume 3, Section 1.2.7, Table 1-12, and
393 * Appendix A.3.1, Table A-15.
394 */
395 if (mod != 3 && rm == 4) {
396 uint8_t sib = instr[start + 2];
397 uint_t index = FASTTRAP_SIB_INDEX(sib);
398 uint_t base = FASTTRAP_SIB_BASE(sib);
399
400 tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
401
402 tp->ftt_index = (index == 4) ?
403 FASTTRAP_NOREG :
404 regmap[index | (FASTTRAP_REX_X(rex) << 3)];
405 tp->ftt_base = (mod == 0 && base == 5) ?
406 FASTTRAP_NOREG :
407 regmap[base | (FASTTRAP_REX_B(rex) << 3)];
408
409 i = 3;
410 sz = mod == 1 ? 1 : 4;
411 } else {
412 /*
413 * In 64-bit mode, mod == 0 and r/m == 5
414 * denotes %rip-relative addressing; in 32-bit
415 * mode, the base register isn't used. In both
416 * modes, there is a 32-bit operand.
417 */
418 if (mod == 0 && rm == 5) {
419 #ifdef __amd64
420 if (p->p_model == DATAMODEL_LP64)
421 tp->ftt_base = REG_RIP;
422 else
423 #endif
424 tp->ftt_base = FASTTRAP_NOREG;
425 sz = 4;
426 } else {
427 uint8_t base = rm |
428 (FASTTRAP_REX_B(rex) << 3);
429
430 tp->ftt_base = regmap[base];
431 sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
432 }
433 tp->ftt_index = FASTTRAP_NOREG;
434 i = 2;
435 }
436
437 if (sz == 1) {
438 tp->ftt_dest = *(int8_t *)&instr[start + i];
439 } else if (sz == 4) {
440 /* LINTED - alignment */
441 tp->ftt_dest = *(int32_t *)&instr[start + i];
442 } else {
443 tp->ftt_dest = 0;
444 }
445 }
446 } else {
447 switch (instr[start]) {
448 case FASTTRAP_RET:
449 tp->ftt_type = FASTTRAP_T_RET;
450 break;
451
452 case FASTTRAP_RET16:
453 tp->ftt_type = FASTTRAP_T_RET16;
454 /* LINTED - alignment */
455 tp->ftt_dest = *(uint16_t *)&instr[start + 1];
456 break;
457
458 case FASTTRAP_JO:
459 case FASTTRAP_JNO:
460 case FASTTRAP_JB:
461 case FASTTRAP_JAE:
462 case FASTTRAP_JE:
463 case FASTTRAP_JNE:
464 case FASTTRAP_JBE:
465 case FASTTRAP_JA:
466 case FASTTRAP_JS:
467 case FASTTRAP_JNS:
468 case FASTTRAP_JP:
469 case FASTTRAP_JNP:
470 case FASTTRAP_JL:
471 case FASTTRAP_JGE:
472 case FASTTRAP_JLE:
473 case FASTTRAP_JG:
474 tp->ftt_type = FASTTRAP_T_JCC;
475 tp->ftt_code = instr[start];
476 tp->ftt_dest = pc + tp->ftt_size +
477 (int8_t)instr[start + 1];
478 break;
479
480 case FASTTRAP_LOOPNZ:
481 case FASTTRAP_LOOPZ:
482 case FASTTRAP_LOOP:
483 tp->ftt_type = FASTTRAP_T_LOOP;
484 tp->ftt_code = instr[start];
485 tp->ftt_dest = pc + tp->ftt_size +
486 (int8_t)instr[start + 1];
487 break;
488
489 case FASTTRAP_JCXZ:
490 tp->ftt_type = FASTTRAP_T_JCXZ;
491 tp->ftt_dest = pc + tp->ftt_size +
492 (int8_t)instr[start + 1];
493 break;
494
495 case FASTTRAP_CALL:
496 tp->ftt_type = FASTTRAP_T_CALL;
497 tp->ftt_dest = pc + tp->ftt_size +
498 /* LINTED - alignment */
499 *(int32_t *)&instr[start + 1];
500 tp->ftt_code = 0;
501 break;
502
503 case FASTTRAP_JMP32:
504 tp->ftt_type = FASTTRAP_T_JMP;
505 tp->ftt_dest = pc + tp->ftt_size +
506 /* LINTED - alignment */
507 *(int32_t *)&instr[start + 1];
508 break;
509 case FASTTRAP_JMP8:
510 tp->ftt_type = FASTTRAP_T_JMP;
511 tp->ftt_dest = pc + tp->ftt_size +
512 (int8_t)instr[start + 1];
513 break;
514
515 case FASTTRAP_PUSHL_EBP:
516 if (start == 0)
517 tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
518 break;
519
520 case FASTTRAP_NOP:
521 #ifdef __amd64
522 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
523
524 /*
525 * On amd64 we have to be careful not to confuse a nop
526 * (actually xchgl %eax, %eax) with an instruction using
527 * the same opcode, but that does something different
528 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
529 */
530 if (FASTTRAP_REX_B(rex) == 0)
531 #endif
532 tp->ftt_type = FASTTRAP_T_NOP;
533 break;
534
535 case FASTTRAP_INT3:
536 /*
537 * The pid provider shares the int3 trap with debugger
538 * breakpoints so we can't instrument them.
539 */
540 ASSERT(instr[start] == FASTTRAP_INSTR);
541 return (-1);
542
543 case FASTTRAP_INT:
544 /*
545 * Interrupts seem like they could be traced with
546 * no negative implications, but it's possible that
547 * a thread could be redirected by the trap handling
548 * code which would eventually return to the
549 * instruction after the interrupt. If the interrupt
550 * were in our scratch space, the subsequent
551 * instruction might be overwritten before we return.
552 * Accordingly we refuse to instrument any interrupt.
553 */
554 return (-1);
555 }
556 }
557
558 #ifdef __amd64
559 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
560 /*
561 * If the process is 64-bit and the instruction type is still
562 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
563 * execute it -- we need to watch for %rip-relative
564 * addressing mode. See the portion of fasttrap_pid_probe()
565 * below where we handle tracepoints with type
566 * FASTTRAP_T_COMMON for how we emulate instructions that
567 * employ %rip-relative addressing.
568 */
569 if (rmindex != -1) {
570 uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
571 uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
572 uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
573
574 ASSERT(rmindex > start);
575
576 if (mod == 0 && rm == 5) {
577 /*
578 * We need to be sure to avoid other
579 * registers used by this instruction. While
580 * the reg field may determine the op code
581 * rather than denoting a register, assuming
582 * that it denotes a register is always safe.
583 * We leave the REX field intact and use
584 * whatever value's there for simplicity.
585 */
586 if (reg != 0) {
587 tp->ftt_ripmode = FASTTRAP_RIP_1 |
588 (FASTTRAP_RIP_X *
589 FASTTRAP_REX_B(rex));
590 rm = 0;
591 } else {
592 tp->ftt_ripmode = FASTTRAP_RIP_2 |
593 (FASTTRAP_RIP_X *
594 FASTTRAP_REX_B(rex));
595 rm = 1;
596 }
597
598 tp->ftt_modrm = tp->ftt_instr[rmindex];
599 tp->ftt_instr[rmindex] =
600 FASTTRAP_MODRM(2, reg, rm);
601 }
602 }
603 }
604 #endif
605
606 return (0);
607 }
608
609 int
610 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
611 {
612 fasttrap_instr_t instr = FASTTRAP_INSTR;
613
614 if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
615 return (-1);
616
617 return (0);
618 }
619
620 int
621 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
622 {
623 uint8_t instr;
624
625 /*
626 * Distinguish between read or write failures and a changed
627 * instruction.
628 */
629 if (uread(p, &instr, 1, tp->ftt_pc) != 0)
630 return (0);
631 if (instr != FASTTRAP_INSTR)
632 return (0);
633 if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
634 return (-1);
635
636 return (0);
637 }
638
639 #ifdef __amd64
640 static uintptr_t
641 fasttrap_fulword_noerr(const void *uaddr)
642 {
643 uintptr_t ret;
644
645 if (fasttrap_fulword(uaddr, &ret) == 0)
646 return (ret);
647
648 return (0);
649 }
650 #endif
651
652 static uint32_t
653 fasttrap_fuword32_noerr(const void *uaddr)
654 {
655 uint32_t ret;
656
657 if (fasttrap_fuword32(uaddr, &ret) == 0)
658 return (ret);
659
660 return (0);
661 }
662
663 static void
664 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
665 uintptr_t new_pc)
666 {
667 fasttrap_tracepoint_t *tp;
668 fasttrap_bucket_t *bucket;
669 fasttrap_id_t *id;
670 kmutex_t *pid_mtx;
671
672 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
673 mutex_enter(pid_mtx);
674 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
675
676 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
677 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
678 tp->ftt_proc->ftpc_acount != 0)
679 break;
680 }
681
682 /*
683 * Don't sweat it if we can't find the tracepoint again; unlike
684 * when we're in fasttrap_pid_probe(), finding the tracepoint here
685 * is not essential to the correct execution of the process.
686 */
687 if (tp == NULL) {
688 mutex_exit(pid_mtx);
689 return;
690 }
691
692 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
693 /*
694 * If there's a branch that could act as a return site, we
695 * need to trace it, and check here if the program counter is
696 * external to the function.
697 */
698 if (tp->ftt_type != FASTTRAP_T_RET &&
699 tp->ftt_type != FASTTRAP_T_RET16 &&
700 new_pc - id->fti_probe->ftp_faddr <
701 id->fti_probe->ftp_fsize)
702 continue;
703
704 dtrace_probe(id->fti_probe->ftp_id,
705 pc - id->fti_probe->ftp_faddr,
706 rp->r_r0, rp->r_r1, 0, 0);
707 }
708
709 mutex_exit(pid_mtx);
710 }
711
712 static void
713 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
714 {
715 sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
716
717 sqp->sq_info.si_signo = SIGSEGV;
718 sqp->sq_info.si_code = SEGV_MAPERR;
719 sqp->sq_info.si_addr = (caddr_t)addr;
720
721 mutex_enter(&p->p_lock);
722 sigaddqa(p, t, sqp);
723 mutex_exit(&p->p_lock);
724
725 if (t != NULL)
726 aston(t);
727 }
728
729 #ifdef __amd64
730 static void
731 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc,
732 uintptr_t *argv)
733 {
734 int i, x, cap = MIN(argc, probe->ftp_nargs);
735 uintptr_t *stack = (uintptr_t *)rp->r_sp;
736
737 for (i = 0; i < cap; i++) {
738 x = probe->ftp_argmap[i];
739
740 if (x < 6)
741 argv[i] = (&rp->r_rdi)[x];
742 else
743 argv[i] = fasttrap_fulword_noerr(&stack[x]);
744 }
745
746 for (; i < argc; i++) {
747 argv[i] = 0;
748 }
749 }
750 #endif
751
752 static void
753 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc,
754 uint32_t *argv)
755 {
756 int i, x, cap = MIN(argc, probe->ftp_nargs);
757 uint32_t *stack = (uint32_t *)rp->r_sp;
758
759 for (i = 0; i < cap; i++) {
760 x = probe->ftp_argmap[i];
761
762 argv[i] = fasttrap_fuword32_noerr(&stack[x]);
763 }
764
765 for (; i < argc; i++) {
766 argv[i] = 0;
767 }
768 }
769
770 static int
771 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr)
772 {
773 proc_t *p = curproc;
774 user_desc_t *desc;
775 uint16_t sel, ndx, type;
776 uintptr_t limit;
777
778 switch (tp->ftt_segment) {
779 case FASTTRAP_SEG_CS:
780 sel = rp->r_cs;
781 break;
782 case FASTTRAP_SEG_DS:
783 sel = rp->r_ds;
784 break;
785 case FASTTRAP_SEG_ES:
786 sel = rp->r_es;
787 break;
788 case FASTTRAP_SEG_FS:
789 sel = rp->r_fs;
790 break;
791 case FASTTRAP_SEG_GS:
792 sel = rp->r_gs;
793 break;
794 case FASTTRAP_SEG_SS:
795 sel = rp->r_ss;
796 break;
797 }
798
799 /*
800 * Make sure the given segment register specifies a user priority
801 * selector rather than a kernel selector.
802 */
803 if (!SELISUPL(sel))
804 return (-1);
805
806 ndx = SELTOIDX(sel);
807
808 /*
809 * Check the bounds and grab the descriptor out of the specified
810 * descriptor table.
811 */
812 if (SELISLDT(sel)) {
813 if (ndx > p->p_ldtlimit)
814 return (-1);
815
816 desc = p->p_ldt + ndx;
817
818 } else {
819 if (ndx >= NGDT)
820 return (-1);
821
822 desc = cpu_get_gdt() + ndx;
823 }
824
825 /*
826 * The descriptor must have user privilege level and it must be
827 * present in memory.
828 */
829 if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1)
830 return (-1);
831
832 type = desc->usd_type;
833
834 /*
835 * If the S bit in the type field is not set, this descriptor can
836 * only be used in system context.
837 */
838 if ((type & 0x10) != 0x10)
839 return (-1);
840
841 limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1);
842
843 if (tp->ftt_segment == FASTTRAP_SEG_CS) {
844 /*
845 * The code/data bit and readable bit must both be set.
846 */
847 if ((type & 0xa) != 0xa)
848 return (-1);
849
850 if (*addr > limit)
851 return (-1);
852 } else {
853 /*
854 * The code/data bit must be clear.
855 */
856 if ((type & 0x8) != 0)
857 return (-1);
858
859 /*
860 * If the expand-down bit is clear, we just check the limit as
861 * it would naturally be applied. Otherwise, we need to check
862 * that the address is the range [limit + 1 .. 0xffff] or
863 * [limit + 1 ... 0xffffffff] depending on if the default
864 * operand size bit is set.
865 */
866 if ((type & 0x4) == 0) {
867 if (*addr > limit)
868 return (-1);
869 } else if (desc->usd_def32) {
870 if (*addr < limit + 1 || 0xffff < *addr)
871 return (-1);
872 } else {
873 if (*addr < limit + 1 || 0xffffffff < *addr)
874 return (-1);
875 }
876 }
877
878 *addr += USEGD_GETBASE(desc);
879
880 return (0);
881 }
882
883 int
884 fasttrap_pid_probe(struct regs *rp)
885 {
886 proc_t *p = curproc;
887 uintptr_t pc = rp->r_pc - 1, new_pc = 0;
888 fasttrap_bucket_t *bucket;
889 kmutex_t *pid_mtx;
890 fasttrap_tracepoint_t *tp, tp_local;
891 pid_t pid;
892 dtrace_icookie_t cookie;
893 uint_t is_enabled = 0;
894
895 /*
896 * It's possible that a user (in a veritable orgy of bad planning)
897 * could redirect this thread's flow of control before it reached the
898 * return probe fasttrap. In this case we need to kill the process
899 * since it's in a unrecoverable state.
900 */
901 if (curthread->t_dtrace_step) {
902 ASSERT(curthread->t_dtrace_on);
903 fasttrap_sigtrap(p, curthread, pc);
904 return (0);
905 }
906
907 /*
908 * Clear all user tracing flags.
909 */
910 curthread->t_dtrace_ft = 0;
911 curthread->t_dtrace_pc = 0;
912 curthread->t_dtrace_npc = 0;
913 curthread->t_dtrace_scrpc = 0;
914 curthread->t_dtrace_astpc = 0;
915 #ifdef __amd64
916 curthread->t_dtrace_regv = 0;
917 #endif
918
919 /*
920 * Treat a child created by a call to vfork(2) as if it were its
921 * parent. We know that there's only one thread of control in such a
922 * process: this one.
923 */
924 while (p->p_flag & SVFORK) {
925 p = p->p_parent;
926 }
927
928 pid = p->p_pid;
929 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
930 mutex_enter(pid_mtx);
931 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
932
933 /*
934 * Lookup the tracepoint that the process just hit.
935 */
936 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
937 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
938 tp->ftt_proc->ftpc_acount != 0)
939 break;
940 }
941
942 /*
943 * If we couldn't find a matching tracepoint, either a tracepoint has
944 * been inserted without using the pid<pid> ioctl interface (see
945 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
946 */
947 if (tp == NULL) {
948 mutex_exit(pid_mtx);
949 return (-1);
950 }
951
952 /*
953 * Set the program counter to the address of the traced instruction
954 * so that it looks right in ustack() output.
955 */
956 rp->r_pc = pc;
957
958 if (tp->ftt_ids != NULL) {
959 fasttrap_id_t *id;
960
961 #ifdef __amd64
962 if (p->p_model == DATAMODEL_LP64) {
963 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
964 fasttrap_probe_t *probe = id->fti_probe;
965
966 if (id->fti_ptype == DTFTP_ENTRY) {
967 /*
968 * We note that this was an entry
969 * probe to help ustack() find the
970 * first caller.
971 */
972 cookie = dtrace_interrupt_disable();
973 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
974 dtrace_probe(probe->ftp_id, rp->r_rdi,
975 rp->r_rsi, rp->r_rdx, rp->r_rcx,
976 rp->r_r8);
977 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
978 dtrace_interrupt_enable(cookie);
979 } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
980 /*
981 * Note that in this case, we don't
982 * call dtrace_probe() since it's only
983 * an artificial probe meant to change
984 * the flow of control so that it
985 * encounters the true probe.
986 */
987 is_enabled = 1;
988 } else if (probe->ftp_argmap == NULL) {
989 dtrace_probe(probe->ftp_id, rp->r_rdi,
990 rp->r_rsi, rp->r_rdx, rp->r_rcx,
991 rp->r_r8);
992 } else {
993 uintptr_t t[5];
994
995 fasttrap_usdt_args64(probe, rp,
996 sizeof (t) / sizeof (t[0]), t);
997
998 dtrace_probe(probe->ftp_id, t[0], t[1],
999 t[2], t[3], t[4]);
1000 }
1001 }
1002 } else {
1003 #endif
1004 uintptr_t s0, s1, s2, s3, s4, s5;
1005 uint32_t *stack = (uint32_t *)rp->r_sp;
1006
1007 /*
1008 * In 32-bit mode, all arguments are passed on the
1009 * stack. If this is a function entry probe, we need
1010 * to skip the first entry on the stack as it
1011 * represents the return address rather than a
1012 * parameter to the function.
1013 */
1014 s0 = fasttrap_fuword32_noerr(&stack[0]);
1015 s1 = fasttrap_fuword32_noerr(&stack[1]);
1016 s2 = fasttrap_fuword32_noerr(&stack[2]);
1017 s3 = fasttrap_fuword32_noerr(&stack[3]);
1018 s4 = fasttrap_fuword32_noerr(&stack[4]);
1019 s5 = fasttrap_fuword32_noerr(&stack[5]);
1020
1021 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1022 fasttrap_probe_t *probe = id->fti_probe;
1023
1024 if (id->fti_ptype == DTFTP_ENTRY) {
1025 /*
1026 * We note that this was an entry
1027 * probe to help ustack() find the
1028 * first caller.
1029 */
1030 cookie = dtrace_interrupt_disable();
1031 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1032 dtrace_probe(probe->ftp_id, s1, s2,
1033 s3, s4, s5);
1034 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1035 dtrace_interrupt_enable(cookie);
1036 } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1037 /*
1038 * Note that in this case, we don't
1039 * call dtrace_probe() since it's only
1040 * an artificial probe meant to change
1041 * the flow of control so that it
1042 * encounters the true probe.
1043 */
1044 is_enabled = 1;
1045 } else if (probe->ftp_argmap == NULL) {
1046 dtrace_probe(probe->ftp_id, s0, s1,
1047 s2, s3, s4);
1048 } else {
1049 uint32_t t[5];
1050
1051 fasttrap_usdt_args32(probe, rp,
1052 sizeof (t) / sizeof (t[0]), t);
1053
1054 dtrace_probe(probe->ftp_id, t[0], t[1],
1055 t[2], t[3], t[4]);
1056 }
1057 }
1058 #ifdef __amd64
1059 }
1060 #endif
1061 }
1062
1063 /*
1064 * We're about to do a bunch of work so we cache a local copy of
1065 * the tracepoint to emulate the instruction, and then find the
1066 * tracepoint again later if we need to light up any return probes.
1067 */
1068 tp_local = *tp;
1069 mutex_exit(pid_mtx);
1070 tp = &tp_local;
1071
1072 /*
1073 * Set the program counter to appear as though the traced instruction
1074 * had completely executed. This ensures that fasttrap_getreg() will
1075 * report the expected value for REG_RIP.
1076 */
1077 rp->r_pc = pc + tp->ftt_size;
1078
1079 /*
1080 * If there's an is-enabled probe connected to this tracepoint it
1081 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1082 * instruction that was placed there by DTrace when the binary was
1083 * linked. As this probe is, in fact, enabled, we need to stuff 1
1084 * into %eax or %rax. Accordingly, we can bypass all the instruction
1085 * emulation logic since we know the inevitable result. It's possible
1086 * that a user could construct a scenario where the 'is-enabled'
1087 * probe was on some other instruction, but that would be a rather
1088 * exotic way to shoot oneself in the foot.
1089 */
1090 if (is_enabled) {
1091 rp->r_r0 = 1;
1092 new_pc = rp->r_pc;
1093 goto done;
1094 }
1095
1096 /*
1097 * We emulate certain types of instructions to ensure correctness
1098 * (in the case of position dependent instructions) or optimize
1099 * common cases. The rest we have the thread execute back in user-
1100 * land.
1101 */
1102 switch (tp->ftt_type) {
1103 case FASTTRAP_T_RET:
1104 case FASTTRAP_T_RET16:
1105 {
1106 uintptr_t dst;
1107 uintptr_t addr;
1108 int ret;
1109
1110 /*
1111 * We have to emulate _every_ facet of the behavior of a ret
1112 * instruction including what happens if the load from %esp
1113 * fails; in that case, we send a SIGSEGV.
1114 */
1115 #ifdef __amd64
1116 if (p->p_model == DATAMODEL_NATIVE) {
1117 #endif
1118 ret = fasttrap_fulword((void *)rp->r_sp, &dst);
1119 addr = rp->r_sp + sizeof (uintptr_t);
1120 #ifdef __amd64
1121 } else {
1122 uint32_t dst32;
1123 ret = fasttrap_fuword32((void *)rp->r_sp, &dst32);
1124 dst = dst32;
1125 addr = rp->r_sp + sizeof (uint32_t);
1126 }
1127 #endif
1128
1129 if (ret == -1) {
1130 fasttrap_sigsegv(p, curthread, rp->r_sp);
1131 new_pc = pc;
1132 break;
1133 }
1134
1135 if (tp->ftt_type == FASTTRAP_T_RET16)
1136 addr += tp->ftt_dest;
1137
1138 rp->r_sp = addr;
1139 new_pc = dst;
1140 break;
1141 }
1142
1143 case FASTTRAP_T_JCC:
1144 {
1145 uint_t taken;
1146
1147 switch (tp->ftt_code) {
1148 case FASTTRAP_JO:
1149 taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0;
1150 break;
1151 case FASTTRAP_JNO:
1152 taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0;
1153 break;
1154 case FASTTRAP_JB:
1155 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0;
1156 break;
1157 case FASTTRAP_JAE:
1158 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0;
1159 break;
1160 case FASTTRAP_JE:
1161 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1162 break;
1163 case FASTTRAP_JNE:
1164 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1165 break;
1166 case FASTTRAP_JBE:
1167 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 ||
1168 (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1169 break;
1170 case FASTTRAP_JA:
1171 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 &&
1172 (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1173 break;
1174 case FASTTRAP_JS:
1175 taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0;
1176 break;
1177 case FASTTRAP_JNS:
1178 taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0;
1179 break;
1180 case FASTTRAP_JP:
1181 taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0;
1182 break;
1183 case FASTTRAP_JNP:
1184 taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0;
1185 break;
1186 case FASTTRAP_JL:
1187 taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1188 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1189 break;
1190 case FASTTRAP_JGE:
1191 taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1192 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1193 break;
1194 case FASTTRAP_JLE:
1195 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 ||
1196 ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1197 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1198 break;
1199 case FASTTRAP_JG:
1200 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1201 ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1202 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1203 break;
1204
1205 }
1206
1207 if (taken)
1208 new_pc = tp->ftt_dest;
1209 else
1210 new_pc = pc + tp->ftt_size;
1211 break;
1212 }
1213
1214 case FASTTRAP_T_LOOP:
1215 {
1216 uint_t taken;
1217 #ifdef __amd64
1218 greg_t cx = rp->r_rcx--;
1219 #else
1220 greg_t cx = rp->r_ecx--;
1221 #endif
1222
1223 switch (tp->ftt_code) {
1224 case FASTTRAP_LOOPNZ:
1225 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1226 cx != 0;
1227 break;
1228 case FASTTRAP_LOOPZ:
1229 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 &&
1230 cx != 0;
1231 break;
1232 case FASTTRAP_LOOP:
1233 taken = (cx != 0);
1234 break;
1235 }
1236
1237 if (taken)
1238 new_pc = tp->ftt_dest;
1239 else
1240 new_pc = pc + tp->ftt_size;
1241 break;
1242 }
1243
1244 case FASTTRAP_T_JCXZ:
1245 {
1246 #ifdef __amd64
1247 greg_t cx = rp->r_rcx;
1248 #else
1249 greg_t cx = rp->r_ecx;
1250 #endif
1251
1252 if (cx == 0)
1253 new_pc = tp->ftt_dest;
1254 else
1255 new_pc = pc + tp->ftt_size;
1256 break;
1257 }
1258
1259 case FASTTRAP_T_PUSHL_EBP:
1260 {
1261 int ret;
1262 uintptr_t addr;
1263 #ifdef __amd64
1264 if (p->p_model == DATAMODEL_NATIVE) {
1265 #endif
1266 addr = rp->r_sp - sizeof (uintptr_t);
1267 ret = fasttrap_sulword((void *)addr, rp->r_fp);
1268 #ifdef __amd64
1269 } else {
1270 addr = rp->r_sp - sizeof (uint32_t);
1271 ret = fasttrap_suword32((void *)addr,
1272 (uint32_t)rp->r_fp);
1273 }
1274 #endif
1275
1276 if (ret == -1) {
1277 fasttrap_sigsegv(p, curthread, addr);
1278 new_pc = pc;
1279 break;
1280 }
1281
1282 rp->r_sp = addr;
1283 new_pc = pc + tp->ftt_size;
1284 break;
1285 }
1286
1287 case FASTTRAP_T_NOP:
1288 new_pc = pc + tp->ftt_size;
1289 break;
1290
1291 case FASTTRAP_T_JMP:
1292 case FASTTRAP_T_CALL:
1293 if (tp->ftt_code == 0) {
1294 new_pc = tp->ftt_dest;
1295 } else {
1296 uintptr_t value, addr = tp->ftt_dest;
1297
1298 if (tp->ftt_base != FASTTRAP_NOREG)
1299 addr += fasttrap_getreg(rp, tp->ftt_base);
1300 if (tp->ftt_index != FASTTRAP_NOREG)
1301 addr += fasttrap_getreg(rp, tp->ftt_index) <<
1302 tp->ftt_scale;
1303
1304 if (tp->ftt_code == 1) {
1305 /*
1306 * If there's a segment prefix for this
1307 * instruction, we'll need to check permissions
1308 * and bounds on the given selector, and adjust
1309 * the address accordingly.
1310 */
1311 if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1312 fasttrap_do_seg(tp, rp, &addr) != 0) {
1313 fasttrap_sigsegv(p, curthread, addr);
1314 new_pc = pc;
1315 break;
1316 }
1317
1318 #ifdef __amd64
1319 if (p->p_model == DATAMODEL_NATIVE) {
1320 #endif
1321 if (fasttrap_fulword((void *)addr,
1322 &value) == -1) {
1323 fasttrap_sigsegv(p, curthread,
1324 addr);
1325 new_pc = pc;
1326 break;
1327 }
1328 new_pc = value;
1329 #ifdef __amd64
1330 } else {
1331 uint32_t value32;
1332 addr = (uintptr_t)(uint32_t)addr;
1333 if (fasttrap_fuword32((void *)addr,
1334 &value32) == -1) {
1335 fasttrap_sigsegv(p, curthread,
1336 addr);
1337 new_pc = pc;
1338 break;
1339 }
1340 new_pc = value32;
1341 }
1342 #endif
1343 } else {
1344 new_pc = addr;
1345 }
1346 }
1347
1348 /*
1349 * If this is a call instruction, we need to push the return
1350 * address onto the stack. If this fails, we send the process
1351 * a SIGSEGV and reset the pc to emulate what would happen if
1352 * this instruction weren't traced.
1353 */
1354 if (tp->ftt_type == FASTTRAP_T_CALL) {
1355 int ret;
1356 uintptr_t addr;
1357 #ifdef __amd64
1358 if (p->p_model == DATAMODEL_NATIVE) {
1359 addr = rp->r_sp - sizeof (uintptr_t);
1360 ret = fasttrap_sulword((void *)addr,
1361 pc + tp->ftt_size);
1362 } else {
1363 #endif
1364 addr = rp->r_sp - sizeof (uint32_t);
1365 ret = fasttrap_suword32((void *)addr,
1366 (uint32_t)(pc + tp->ftt_size));
1367 #ifdef __amd64
1368 }
1369 #endif
1370
1371 if (ret == -1) {
1372 fasttrap_sigsegv(p, curthread, addr);
1373 new_pc = pc;
1374 break;
1375 }
1376
1377 rp->r_sp = addr;
1378 }
1379
1380 break;
1381
1382 case FASTTRAP_T_COMMON:
1383 {
1384 uintptr_t addr;
1385 #if defined(__amd64)
1386 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1387 #else
1388 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
1389 #endif
1390 uint_t i = 0;
1391 klwp_t *lwp = ttolwp(curthread);
1392
1393 /*
1394 * Compute the address of the ulwp_t and step over the
1395 * ul_self pointer. The method used to store the user-land
1396 * thread pointer is very different on 32- and 64-bit
1397 * kernels.
1398 */
1399 #if defined(__amd64)
1400 if (p->p_model == DATAMODEL_LP64) {
1401 addr = lwp->lwp_pcb.pcb_fsbase;
1402
1403 /*
1404 * If we're branded, convert the fsbase from the
1405 * brand's fsbase to the native fsbase.
1406 */
1407 if (PROC_IS_BRANDED(p) && BRMOP(p)->b_fsbase != NULL)
1408 addr = BRMOP(p)->b_fsbase(lwp, addr);
1409
1410 addr += sizeof (void *);
1411 } else {
1412 addr = lwp->lwp_pcb.pcb_gsbase;
1413 addr += sizeof (caddr32_t);
1414 }
1415 #else
1416 addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
1417 addr += sizeof (void *);
1418 #endif
1419
1420 /*
1421 * Generic Instruction Tracing
1422 * ---------------------------
1423 *
1424 * This is the layout of the scratch space in the user-land
1425 * thread structure for our generated instructions.
1426 *
1427 * 32-bit mode bytes
1428 * ------------------------ -----
1429 * a: <original instruction> <= 15
1430 * jmp <pc + tp->ftt_size> 5
1431 * b: <original instrction> <= 15
1432 * int T_DTRACE_RET 2
1433 * -----
1434 * <= 37
1435 *
1436 * 64-bit mode bytes
1437 * ------------------------ -----
1438 * a: <original instruction> <= 15
1439 * jmp 0(%rip) 6
1440 * <pc + tp->ftt_size> 8
1441 * b: <original instruction> <= 15
1442 * int T_DTRACE_RET 2
1443 * -----
1444 * <= 46
1445 *
1446 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1447 * to b. If we encounter a signal on the way out of the
1448 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1449 * so that we execute the original instruction and re-enter
1450 * the kernel rather than redirecting to the next instruction.
1451 *
1452 * If there are return probes (so we know that we're going to
1453 * need to reenter the kernel after executing the original
1454 * instruction), the scratch space will just contain the
1455 * original instruction followed by an interrupt -- the same
1456 * data as at b.
1457 *
1458 * %rip-relative Addressing
1459 * ------------------------
1460 *
1461 * There's a further complication in 64-bit mode due to %rip-
1462 * relative addressing. While this is clearly a beneficial
1463 * architectural decision for position independent code, it's
1464 * hard not to see it as a personal attack against the pid
1465 * provider since before there was a relatively small set of
1466 * instructions to emulate; with %rip-relative addressing,
1467 * almost every instruction can potentially depend on the
1468 * address at which it's executed. Rather than emulating
1469 * the broad spectrum of instructions that can now be
1470 * position dependent, we emulate jumps and others as in
1471 * 32-bit mode, and take a different tack for instructions
1472 * using %rip-relative addressing.
1473 *
1474 * For every instruction that uses the ModRM byte, the
1475 * in-kernel disassembler reports its location. We use the
1476 * ModRM byte to identify that an instruction uses
1477 * %rip-relative addressing and to see what other registers
1478 * the instruction uses. To emulate those instructions,
1479 * we modify the instruction to be %rax-relative rather than
1480 * %rip-relative (or %rcx-relative if the instruction uses
1481 * %rax; or %r8- or %r9-relative if the REX.B is present so
1482 * we don't have to rewrite the REX prefix). We then load
1483 * the value that %rip would have been into the scratch
1484 * register and generate an instruction to reset the scratch
1485 * register back to its original value. The instruction
1486 * sequence looks like this:
1487 *
1488 * 64-mode %rip-relative bytes
1489 * ------------------------ -----
1490 * a: <modified instruction> <= 15
1491 * movq $<value>, %<scratch> 6
1492 * jmp 0(%rip) 6
1493 * <pc + tp->ftt_size> 8
1494 * b: <modified instruction> <= 15
1495 * int T_DTRACE_RET 2
1496 * -----
1497 * 52
1498 *
1499 * We set curthread->t_dtrace_regv so that upon receiving
1500 * a signal we can reset the value of the scratch register.
1501 */
1502
1503 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1504
1505 curthread->t_dtrace_scrpc = addr;
1506 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1507 i += tp->ftt_size;
1508
1509 #ifdef __amd64
1510 if (tp->ftt_ripmode != 0) {
1511 greg_t *reg;
1512
1513 ASSERT(p->p_model == DATAMODEL_LP64);
1514 ASSERT(tp->ftt_ripmode &
1515 (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1516
1517 /*
1518 * If this was a %rip-relative instruction, we change
1519 * it to be either a %rax- or %rcx-relative
1520 * instruction (depending on whether those registers
1521 * are used as another operand; or %r8- or %r9-
1522 * relative depending on the value of REX.B). We then
1523 * set that register and generate a movq instruction
1524 * to reset the value.
1525 */
1526 if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1527 scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1528 else
1529 scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1530
1531 if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1532 scratch[i++] = FASTTRAP_MOV_EAX;
1533 else
1534 scratch[i++] = FASTTRAP_MOV_ECX;
1535
1536 switch (tp->ftt_ripmode) {
1537 case FASTTRAP_RIP_1:
1538 reg = &rp->r_rax;
1539 curthread->t_dtrace_reg = REG_RAX;
1540 break;
1541 case FASTTRAP_RIP_2:
1542 reg = &rp->r_rcx;
1543 curthread->t_dtrace_reg = REG_RCX;
1544 break;
1545 case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1546 reg = &rp->r_r8;
1547 curthread->t_dtrace_reg = REG_R8;
1548 break;
1549 case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1550 reg = &rp->r_r9;
1551 curthread->t_dtrace_reg = REG_R9;
1552 break;
1553 }
1554
1555 /* LINTED - alignment */
1556 *(uint64_t *)&scratch[i] = *reg;
1557 curthread->t_dtrace_regv = *reg;
1558 *reg = pc + tp->ftt_size;
1559 i += sizeof (uint64_t);
1560 }
1561 #endif
1562
1563 /*
1564 * Generate the branch instruction to what would have
1565 * normally been the subsequent instruction. In 32-bit mode,
1566 * this is just a relative branch; in 64-bit mode this is a
1567 * %rip-relative branch that loads the 64-bit pc value
1568 * immediately after the jmp instruction.
1569 */
1570 #ifdef __amd64
1571 if (p->p_model == DATAMODEL_LP64) {
1572 scratch[i++] = FASTTRAP_GROUP5_OP;
1573 scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1574 /* LINTED - alignment */
1575 *(uint32_t *)&scratch[i] = 0;
1576 i += sizeof (uint32_t);
1577 /* LINTED - alignment */
1578 *(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1579 i += sizeof (uint64_t);
1580 } else {
1581 #endif
1582 /*
1583 * Set up the jmp to the next instruction; note that
1584 * the size of the traced instruction cancels out.
1585 */
1586 scratch[i++] = FASTTRAP_JMP32;
1587 /* LINTED - alignment */
1588 *(uint32_t *)&scratch[i] = pc - addr - 5;
1589 i += sizeof (uint32_t);
1590 #ifdef __amd64
1591 }
1592 #endif
1593
1594 curthread->t_dtrace_astpc = addr + i;
1595 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1596 i += tp->ftt_size;
1597 scratch[i++] = FASTTRAP_INT;
1598 scratch[i++] = T_DTRACE_RET;
1599
1600 ASSERT(i <= sizeof (scratch));
1601
1602 if (fasttrap_copyout(scratch, (char *)addr, i)) {
1603 fasttrap_sigtrap(p, curthread, pc);
1604 new_pc = pc;
1605 break;
1606 }
1607
1608 if (tp->ftt_retids != NULL) {
1609 curthread->t_dtrace_step = 1;
1610 curthread->t_dtrace_ret = 1;
1611 new_pc = curthread->t_dtrace_astpc;
1612 } else {
1613 new_pc = curthread->t_dtrace_scrpc;
1614 }
1615
1616 curthread->t_dtrace_pc = pc;
1617 curthread->t_dtrace_npc = pc + tp->ftt_size;
1618 curthread->t_dtrace_on = 1;
1619 break;
1620 }
1621
1622 default:
1623 panic("fasttrap: mishandled an instruction");
1624 }
1625
1626 done:
1627 /*
1628 * If there were no return probes when we first found the tracepoint,
1629 * we should feel no obligation to honor any return probes that were
1630 * subsequently enabled -- they'll just have to wait until the next
1631 * time around.
1632 */
1633 if (tp->ftt_retids != NULL) {
1634 /*
1635 * We need to wait until the results of the instruction are
1636 * apparent before invoking any return probes. If this
1637 * instruction was emulated we can just call
1638 * fasttrap_return_common(); if it needs to be executed, we
1639 * need to wait until the user thread returns to the kernel.
1640 */
1641 if (tp->ftt_type != FASTTRAP_T_COMMON) {
1642 /*
1643 * Set the program counter to the address of the traced
1644 * instruction so that it looks right in ustack()
1645 * output. We had previously set it to the end of the
1646 * instruction to simplify %rip-relative addressing.
1647 */
1648 rp->r_pc = pc;
1649
1650 fasttrap_return_common(rp, pc, pid, new_pc);
1651 } else {
1652 ASSERT(curthread->t_dtrace_ret != 0);
1653 ASSERT(curthread->t_dtrace_pc == pc);
1654 ASSERT(curthread->t_dtrace_scrpc != 0);
1655 ASSERT(new_pc == curthread->t_dtrace_astpc);
1656 }
1657 }
1658
1659 rp->r_pc = new_pc;
1660
1661 return (0);
1662 }
1663
1664 int
1665 fasttrap_return_probe(struct regs *rp)
1666 {
1667 proc_t *p = curproc;
1668 uintptr_t pc = curthread->t_dtrace_pc;
1669 uintptr_t npc = curthread->t_dtrace_npc;
1670
1671 curthread->t_dtrace_pc = 0;
1672 curthread->t_dtrace_npc = 0;
1673 curthread->t_dtrace_scrpc = 0;
1674 curthread->t_dtrace_astpc = 0;
1675
1676 /*
1677 * Treat a child created by a call to vfork(2) as if it were its
1678 * parent. We know that there's only one thread of control in such a
1679 * process: this one.
1680 */
1681 while (p->p_flag & SVFORK) {
1682 p = p->p_parent;
1683 }
1684
1685 /*
1686 * We set rp->r_pc to the address of the traced instruction so
1687 * that it appears to dtrace_probe() that we're on the original
1688 * instruction, and so that the user can't easily detect our
1689 * complex web of lies. dtrace_return_probe() (our caller)
1690 * will correctly set %pc after we return.
1691 */
1692 rp->r_pc = pc;
1693
1694 fasttrap_return_common(rp, pc, p->p_pid, npc);
1695
1696 return (0);
1697 }
1698
1699 /*ARGSUSED*/
1700 uint64_t
1701 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1702 int aframes)
1703 {
1704 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno));
1705 }
1706
1707 /*ARGSUSED*/
1708 uint64_t
1709 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1710 int aframes)
1711 {
1712 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno));
1713 }
1714
1715 static ulong_t
1716 fasttrap_getreg(struct regs *rp, uint_t reg)
1717 {
1718 #ifdef __amd64
1719 switch (reg) {
1720 case REG_R15: return (rp->r_r15);
1721 case REG_R14: return (rp->r_r14);
1722 case REG_R13: return (rp->r_r13);
1723 case REG_R12: return (rp->r_r12);
1724 case REG_R11: return (rp->r_r11);
1725 case REG_R10: return (rp->r_r10);
1726 case REG_R9: return (rp->r_r9);
1727 case REG_R8: return (rp->r_r8);
1728 case REG_RDI: return (rp->r_rdi);
1729 case REG_RSI: return (rp->r_rsi);
1730 case REG_RBP: return (rp->r_rbp);
1731 case REG_RBX: return (rp->r_rbx);
1732 case REG_RDX: return (rp->r_rdx);
1733 case REG_RCX: return (rp->r_rcx);
1734 case REG_RAX: return (rp->r_rax);
1735 case REG_TRAPNO: return (rp->r_trapno);
1736 case REG_ERR: return (rp->r_err);
1737 case REG_RIP: return (rp->r_rip);
1738 case REG_CS: return (rp->r_cs);
1739 case REG_RFL: return (rp->r_rfl);
1740 case REG_RSP: return (rp->r_rsp);
1741 case REG_SS: return (rp->r_ss);
1742 case REG_FS: return (rp->r_fs);
1743 case REG_GS: return (rp->r_gs);
1744 case REG_DS: return (rp->r_ds);
1745 case REG_ES: return (rp->r_es);
1746 case REG_FSBASE: return (rdmsr(MSR_AMD_FSBASE));
1747 case REG_GSBASE: return (rdmsr(MSR_AMD_GSBASE));
1748 }
1749
1750 panic("dtrace: illegal register constant");
1751 /*NOTREACHED*/
1752 #else
1753 if (reg >= _NGREG)
1754 panic("dtrace: illegal register constant");
1755
1756 return (((greg_t *)&rp->r_gs)[reg]);
1757 #endif
1758 }