1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  29  */
  30 
  31 #include <sys/fasttrap_isa.h>
  32 #include <sys/fasttrap_impl.h>
  33 #include <sys/dtrace.h>
  34 #include <sys/dtrace_impl.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/regset.h>
  37 #include <sys/privregs.h>
  38 #include <sys/segments.h>
  39 #include <sys/x86_archext.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/trap.h>
  42 #include <sys/archsystm.h>
  43 #include <sys/proc.h>
  44 #include <sys/brand.h>
  45 #include <sys/machbrand.h>
  46 
  47 /*
  48  * Lossless User-Land Tracing on x86
  49  * ---------------------------------
  50  *
  51  * The execution of most instructions is not dependent on the address; for
  52  * these instructions it is sufficient to copy them into the user process's
  53  * address space and execute them. To effectively single-step an instruction
  54  * in user-land, we copy out the following sequence of instructions to scratch
  55  * space in the user thread's ulwp_t structure.
  56  *
  57  * We then set the program counter (%eip or %rip) to point to this scratch
  58  * space. Once execution resumes, the original instruction is executed and
  59  * then control flow is redirected to what was originally the subsequent
  60  * instruction. If the kernel attemps to deliver a signal while single-
  61  * stepping, the signal is deferred and the program counter is moved into the
  62  * second sequence of instructions. The second sequence ends in a trap into
  63  * the kernel where the deferred signal is then properly handled and delivered.
  64  *
  65  * For instructions whose execute is position dependent, we perform simple
  66  * emulation. These instructions are limited to control transfer
  67  * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
  68  * of %rip-relative addressing that means that almost any instruction can be
  69  * position dependent. For all the details on how we emulate generic
  70  * instructions included %rip-relative instructions, see the code in
  71  * fasttrap_pid_probe() below where we handle instructions of type
  72  * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
  73  */
  74 
  75 #define FASTTRAP_MODRM_MOD(modrm)       (((modrm) >> 6) & 0x3)
  76 #define FASTTRAP_MODRM_REG(modrm)       (((modrm) >> 3) & 0x7)
  77 #define FASTTRAP_MODRM_RM(modrm)        ((modrm) & 0x7)
  78 #define FASTTRAP_MODRM(mod, reg, rm)    (((mod) << 6) | ((reg) << 3) | (rm))
  79 
  80 #define FASTTRAP_SIB_SCALE(sib)         (((sib) >> 6) & 0x3)
  81 #define FASTTRAP_SIB_INDEX(sib)         (((sib) >> 3) & 0x7)
  82 #define FASTTRAP_SIB_BASE(sib)          ((sib) & 0x7)
  83 
  84 #define FASTTRAP_REX_W(rex)             (((rex) >> 3) & 1)
  85 #define FASTTRAP_REX_R(rex)             (((rex) >> 2) & 1)
  86 #define FASTTRAP_REX_X(rex)             (((rex) >> 1) & 1)
  87 #define FASTTRAP_REX_B(rex)             ((rex) & 1)
  88 #define FASTTRAP_REX(w, r, x, b)        \
  89         (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
  90 
  91 /*
  92  * Single-byte op-codes.
  93  */
  94 #define FASTTRAP_PUSHL_EBP      0x55
  95 
  96 #define FASTTRAP_JO             0x70
  97 #define FASTTRAP_JNO            0x71
  98 #define FASTTRAP_JB             0x72
  99 #define FASTTRAP_JAE            0x73
 100 #define FASTTRAP_JE             0x74
 101 #define FASTTRAP_JNE            0x75
 102 #define FASTTRAP_JBE            0x76
 103 #define FASTTRAP_JA             0x77
 104 #define FASTTRAP_JS             0x78
 105 #define FASTTRAP_JNS            0x79
 106 #define FASTTRAP_JP             0x7a
 107 #define FASTTRAP_JNP            0x7b
 108 #define FASTTRAP_JL             0x7c
 109 #define FASTTRAP_JGE            0x7d
 110 #define FASTTRAP_JLE            0x7e
 111 #define FASTTRAP_JG             0x7f
 112 
 113 #define FASTTRAP_NOP            0x90
 114 
 115 #define FASTTRAP_MOV_EAX        0xb8
 116 #define FASTTRAP_MOV_ECX        0xb9
 117 
 118 #define FASTTRAP_RET16          0xc2
 119 #define FASTTRAP_RET            0xc3
 120 
 121 #define FASTTRAP_LOOPNZ         0xe0
 122 #define FASTTRAP_LOOPZ          0xe1
 123 #define FASTTRAP_LOOP           0xe2
 124 #define FASTTRAP_JCXZ           0xe3
 125 
 126 #define FASTTRAP_CALL           0xe8
 127 #define FASTTRAP_JMP32          0xe9
 128 #define FASTTRAP_JMP8           0xeb
 129 
 130 #define FASTTRAP_INT3           0xcc
 131 #define FASTTRAP_INT            0xcd
 132 
 133 #define FASTTRAP_2_BYTE_OP      0x0f
 134 #define FASTTRAP_GROUP5_OP      0xff
 135 
 136 /*
 137  * Two-byte op-codes (second byte only).
 138  */
 139 #define FASTTRAP_0F_JO          0x80
 140 #define FASTTRAP_0F_JNO         0x81
 141 #define FASTTRAP_0F_JB          0x82
 142 #define FASTTRAP_0F_JAE         0x83
 143 #define FASTTRAP_0F_JE          0x84
 144 #define FASTTRAP_0F_JNE         0x85
 145 #define FASTTRAP_0F_JBE         0x86
 146 #define FASTTRAP_0F_JA          0x87
 147 #define FASTTRAP_0F_JS          0x88
 148 #define FASTTRAP_0F_JNS         0x89
 149 #define FASTTRAP_0F_JP          0x8a
 150 #define FASTTRAP_0F_JNP         0x8b
 151 #define FASTTRAP_0F_JL          0x8c
 152 #define FASTTRAP_0F_JGE         0x8d
 153 #define FASTTRAP_0F_JLE         0x8e
 154 #define FASTTRAP_0F_JG          0x8f
 155 
 156 #define FASTTRAP_EFLAGS_OF      0x800
 157 #define FASTTRAP_EFLAGS_DF      0x400
 158 #define FASTTRAP_EFLAGS_SF      0x080
 159 #define FASTTRAP_EFLAGS_ZF      0x040
 160 #define FASTTRAP_EFLAGS_AF      0x010
 161 #define FASTTRAP_EFLAGS_PF      0x004
 162 #define FASTTRAP_EFLAGS_CF      0x001
 163 
 164 /*
 165  * Instruction prefixes.
 166  */
 167 #define FASTTRAP_PREFIX_OPERAND 0x66
 168 #define FASTTRAP_PREFIX_ADDRESS 0x67
 169 #define FASTTRAP_PREFIX_CS      0x2E
 170 #define FASTTRAP_PREFIX_DS      0x3E
 171 #define FASTTRAP_PREFIX_ES      0x26
 172 #define FASTTRAP_PREFIX_FS      0x64
 173 #define FASTTRAP_PREFIX_GS      0x65
 174 #define FASTTRAP_PREFIX_SS      0x36
 175 #define FASTTRAP_PREFIX_LOCK    0xF0
 176 #define FASTTRAP_PREFIX_REP     0xF3
 177 #define FASTTRAP_PREFIX_REPNE   0xF2
 178 
 179 #define FASTTRAP_NOREG  0xff
 180 
 181 /*
 182  * Map between instruction register encodings and the kernel constants which
 183  * correspond to indicies into struct regs.
 184  */
 185 #ifdef __amd64
 186 static const uint8_t regmap[16] = {
 187         REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
 188         REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
 189 };
 190 #else
 191 static const uint8_t regmap[8] = {
 192         EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
 193 };
 194 #endif
 195 
 196 static ulong_t fasttrap_getreg(struct regs *, uint_t);
 197 
 198 static uint64_t
 199 fasttrap_anarg(struct regs *rp, int function_entry, int argno)
 200 {
 201         uint64_t value;
 202         int shift = function_entry ? 1 : 0;
 203 
 204 #ifdef __amd64
 205         if (curproc->p_model == DATAMODEL_LP64) {
 206                 uintptr_t *stack;
 207 
 208                 /*
 209                  * In 64-bit mode, the first six arguments are stored in
 210                  * registers.
 211                  */
 212                 if (argno < 6)
 213                         return ((&rp->r_rdi)[argno]);
 214 
 215                 stack = (uintptr_t *)rp->r_sp;
 216                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 217                 value = dtrace_fulword(&stack[argno - 6 + shift]);
 218                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 219         } else {
 220 #endif
 221                 uint32_t *stack = (uint32_t *)rp->r_sp;
 222                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 223                 value = dtrace_fuword32(&stack[argno + shift]);
 224                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 225 #ifdef __amd64
 226         }
 227 #endif
 228 
 229         return (value);
 230 }
 231 
 232 /*ARGSUSED*/
 233 int
 234 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
 235     fasttrap_probe_type_t type)
 236 {
 237         uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
 238         size_t len = FASTTRAP_MAX_INSTR_SIZE;
 239         size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
 240         uint_t start = 0;
 241         int rmindex, size;
 242         uint8_t seg, rex = 0;
 243 
 244         /*
 245          * Read the instruction at the given address out of the process's
 246          * address space. We don't have to worry about a debugger
 247          * changing this instruction before we overwrite it with our trap
 248          * instruction since P_PR_LOCK is set. Since instructions can span
 249          * pages, we potentially read the instruction in two parts. If the
 250          * second part fails, we just zero out that part of the instruction.
 251          */
 252         if (uread(p, &instr[0], first, pc) != 0)
 253                 return (-1);
 254         if (len > first &&
 255             uread(p, &instr[first], len - first, pc + first) != 0) {
 256                 bzero(&instr[first], len - first);
 257                 len = first;
 258         }
 259 
 260         /*
 261          * If the disassembly fails, then we have a malformed instruction.
 262          */
 263         if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
 264                 return (-1);
 265 
 266         /*
 267          * Make sure the disassembler isn't completely broken.
 268          */
 269         ASSERT(-1 <= rmindex && rmindex < size);
 270 
 271         /*
 272          * If the computed size is greater than the number of bytes read,
 273          * then it was a malformed instruction possibly because it fell on a
 274          * page boundary and the subsequent page was missing or because of
 275          * some malicious user.
 276          */
 277         if (size > len)
 278                 return (-1);
 279 
 280         tp->ftt_size = (uint8_t)size;
 281         tp->ftt_segment = FASTTRAP_SEG_NONE;
 282 
 283         /*
 284          * Find the start of the instruction's opcode by processing any
 285          * legacy prefixes.
 286          */
 287         for (;;) {
 288                 seg = 0;
 289                 switch (instr[start]) {
 290                 case FASTTRAP_PREFIX_SS:
 291                         seg++;
 292                         /*FALLTHRU*/
 293                 case FASTTRAP_PREFIX_GS:
 294                         seg++;
 295                         /*FALLTHRU*/
 296                 case FASTTRAP_PREFIX_FS:
 297                         seg++;
 298                         /*FALLTHRU*/
 299                 case FASTTRAP_PREFIX_ES:
 300                         seg++;
 301                         /*FALLTHRU*/
 302                 case FASTTRAP_PREFIX_DS:
 303                         seg++;
 304                         /*FALLTHRU*/
 305                 case FASTTRAP_PREFIX_CS:
 306                         seg++;
 307                         /*FALLTHRU*/
 308                 case FASTTRAP_PREFIX_OPERAND:
 309                 case FASTTRAP_PREFIX_ADDRESS:
 310                 case FASTTRAP_PREFIX_LOCK:
 311                 case FASTTRAP_PREFIX_REP:
 312                 case FASTTRAP_PREFIX_REPNE:
 313                         if (seg != 0) {
 314                                 /*
 315                                  * It's illegal for an instruction to specify
 316                                  * two segment prefixes -- give up on this
 317                                  * illegal instruction.
 318                                  */
 319                                 if (tp->ftt_segment != FASTTRAP_SEG_NONE)
 320                                         return (-1);
 321 
 322                                 tp->ftt_segment = seg;
 323                         }
 324                         start++;
 325                         continue;
 326                 }
 327                 break;
 328         }
 329 
 330 #ifdef __amd64
 331         /*
 332          * Identify the REX prefix on 64-bit processes.
 333          */
 334         if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
 335                 rex = instr[start++];
 336 #endif
 337 
 338         /*
 339          * Now that we're pretty sure that the instruction is okay, copy the
 340          * valid part to the tracepoint.
 341          */
 342         bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
 343 
 344         tp->ftt_type = FASTTRAP_T_COMMON;
 345         if (instr[start] == FASTTRAP_2_BYTE_OP) {
 346                 switch (instr[start + 1]) {
 347                 case FASTTRAP_0F_JO:
 348                 case FASTTRAP_0F_JNO:
 349                 case FASTTRAP_0F_JB:
 350                 case FASTTRAP_0F_JAE:
 351                 case FASTTRAP_0F_JE:
 352                 case FASTTRAP_0F_JNE:
 353                 case FASTTRAP_0F_JBE:
 354                 case FASTTRAP_0F_JA:
 355                 case FASTTRAP_0F_JS:
 356                 case FASTTRAP_0F_JNS:
 357                 case FASTTRAP_0F_JP:
 358                 case FASTTRAP_0F_JNP:
 359                 case FASTTRAP_0F_JL:
 360                 case FASTTRAP_0F_JGE:
 361                 case FASTTRAP_0F_JLE:
 362                 case FASTTRAP_0F_JG:
 363                         tp->ftt_type = FASTTRAP_T_JCC;
 364                         tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
 365                         tp->ftt_dest = pc + tp->ftt_size +
 366                             /* LINTED - alignment */
 367                             *(int32_t *)&instr[start + 2];
 368                         break;
 369                 }
 370         } else if (instr[start] == FASTTRAP_GROUP5_OP) {
 371                 uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
 372                 uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
 373                 uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
 374 
 375                 if (reg == 2 || reg == 4) {
 376                         uint_t i, sz;
 377 
 378                         if (reg == 2)
 379                                 tp->ftt_type = FASTTRAP_T_CALL;
 380                         else
 381                                 tp->ftt_type = FASTTRAP_T_JMP;
 382 
 383                         if (mod == 3)
 384                                 tp->ftt_code = 2;
 385                         else
 386                                 tp->ftt_code = 1;
 387 
 388                         ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 389 
 390                         /*
 391                          * See AMD x86-64 Architecture Programmer's Manual
 392                          * Volume 3, Section 1.2.7, Table 1-12, and
 393                          * Appendix A.3.1, Table A-15.
 394                          */
 395                         if (mod != 3 && rm == 4) {
 396                                 uint8_t sib = instr[start + 2];
 397                                 uint_t index = FASTTRAP_SIB_INDEX(sib);
 398                                 uint_t base = FASTTRAP_SIB_BASE(sib);
 399 
 400                                 tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
 401 
 402                                 tp->ftt_index = (index == 4) ?
 403                                     FASTTRAP_NOREG :
 404                                     regmap[index | (FASTTRAP_REX_X(rex) << 3)];
 405                                 tp->ftt_base = (mod == 0 && base == 5) ?
 406                                     FASTTRAP_NOREG :
 407                                     regmap[base | (FASTTRAP_REX_B(rex) << 3)];
 408 
 409                                 i = 3;
 410                                 sz = mod == 1 ? 1 : 4;
 411                         } else {
 412                                 /*
 413                                  * In 64-bit mode, mod == 0 and r/m == 5
 414                                  * denotes %rip-relative addressing; in 32-bit
 415                                  * mode, the base register isn't used. In both
 416                                  * modes, there is a 32-bit operand.
 417                                  */
 418                                 if (mod == 0 && rm == 5) {
 419 #ifdef __amd64
 420                                         if (p->p_model == DATAMODEL_LP64)
 421                                                 tp->ftt_base = REG_RIP;
 422                                         else
 423 #endif
 424                                                 tp->ftt_base = FASTTRAP_NOREG;
 425                                         sz = 4;
 426                                 } else  {
 427                                         uint8_t base = rm |
 428                                             (FASTTRAP_REX_B(rex) << 3);
 429 
 430                                         tp->ftt_base = regmap[base];
 431                                         sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
 432                                 }
 433                                 tp->ftt_index = FASTTRAP_NOREG;
 434                                 i = 2;
 435                         }
 436 
 437                         if (sz == 1) {
 438                                 tp->ftt_dest = *(int8_t *)&instr[start + i];
 439                         } else if (sz == 4) {
 440                                 /* LINTED - alignment */
 441                                 tp->ftt_dest = *(int32_t *)&instr[start + i];
 442                         } else {
 443                                 tp->ftt_dest = 0;
 444                         }
 445                 }
 446         } else {
 447                 switch (instr[start]) {
 448                 case FASTTRAP_RET:
 449                         tp->ftt_type = FASTTRAP_T_RET;
 450                         break;
 451 
 452                 case FASTTRAP_RET16:
 453                         tp->ftt_type = FASTTRAP_T_RET16;
 454                         /* LINTED - alignment */
 455                         tp->ftt_dest = *(uint16_t *)&instr[start + 1];
 456                         break;
 457 
 458                 case FASTTRAP_JO:
 459                 case FASTTRAP_JNO:
 460                 case FASTTRAP_JB:
 461                 case FASTTRAP_JAE:
 462                 case FASTTRAP_JE:
 463                 case FASTTRAP_JNE:
 464                 case FASTTRAP_JBE:
 465                 case FASTTRAP_JA:
 466                 case FASTTRAP_JS:
 467                 case FASTTRAP_JNS:
 468                 case FASTTRAP_JP:
 469                 case FASTTRAP_JNP:
 470                 case FASTTRAP_JL:
 471                 case FASTTRAP_JGE:
 472                 case FASTTRAP_JLE:
 473                 case FASTTRAP_JG:
 474                         tp->ftt_type = FASTTRAP_T_JCC;
 475                         tp->ftt_code = instr[start];
 476                         tp->ftt_dest = pc + tp->ftt_size +
 477                             (int8_t)instr[start + 1];
 478                         break;
 479 
 480                 case FASTTRAP_LOOPNZ:
 481                 case FASTTRAP_LOOPZ:
 482                 case FASTTRAP_LOOP:
 483                         tp->ftt_type = FASTTRAP_T_LOOP;
 484                         tp->ftt_code = instr[start];
 485                         tp->ftt_dest = pc + tp->ftt_size +
 486                             (int8_t)instr[start + 1];
 487                         break;
 488 
 489                 case FASTTRAP_JCXZ:
 490                         tp->ftt_type = FASTTRAP_T_JCXZ;
 491                         tp->ftt_dest = pc + tp->ftt_size +
 492                             (int8_t)instr[start + 1];
 493                         break;
 494 
 495                 case FASTTRAP_CALL:
 496                         tp->ftt_type = FASTTRAP_T_CALL;
 497                         tp->ftt_dest = pc + tp->ftt_size +
 498                             /* LINTED - alignment */
 499                             *(int32_t *)&instr[start + 1];
 500                         tp->ftt_code = 0;
 501                         break;
 502 
 503                 case FASTTRAP_JMP32:
 504                         tp->ftt_type = FASTTRAP_T_JMP;
 505                         tp->ftt_dest = pc + tp->ftt_size +
 506                             /* LINTED - alignment */
 507                             *(int32_t *)&instr[start + 1];
 508                         break;
 509                 case FASTTRAP_JMP8:
 510                         tp->ftt_type = FASTTRAP_T_JMP;
 511                         tp->ftt_dest = pc + tp->ftt_size +
 512                             (int8_t)instr[start + 1];
 513                         break;
 514 
 515                 case FASTTRAP_PUSHL_EBP:
 516                         if (start == 0)
 517                                 tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
 518                         break;
 519 
 520                 case FASTTRAP_NOP:
 521 #ifdef __amd64
 522                         ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 523 
 524                         /*
 525                          * On amd64 we have to be careful not to confuse a nop
 526                          * (actually xchgl %eax, %eax) with an instruction using
 527                          * the same opcode, but that does something different
 528                          * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
 529                          */
 530                         if (FASTTRAP_REX_B(rex) == 0)
 531 #endif
 532                                 tp->ftt_type = FASTTRAP_T_NOP;
 533                         break;
 534 
 535                 case FASTTRAP_INT3:
 536                         /*
 537                          * The pid provider shares the int3 trap with debugger
 538                          * breakpoints so we can't instrument them.
 539                          */
 540                         ASSERT(instr[start] == FASTTRAP_INSTR);
 541                         return (-1);
 542 
 543                 case FASTTRAP_INT:
 544                         /*
 545                          * Interrupts seem like they could be traced with
 546                          * no negative implications, but it's possible that
 547                          * a thread could be redirected by the trap handling
 548                          * code which would eventually return to the
 549                          * instruction after the interrupt. If the interrupt
 550                          * were in our scratch space, the subsequent
 551                          * instruction might be overwritten before we return.
 552                          * Accordingly we refuse to instrument any interrupt.
 553                          */
 554                         return (-1);
 555                 }
 556         }
 557 
 558 #ifdef __amd64
 559         if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
 560                 /*
 561                  * If the process is 64-bit and the instruction type is still
 562                  * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
 563                  * execute it -- we need to watch for %rip-relative
 564                  * addressing mode. See the portion of fasttrap_pid_probe()
 565                  * below where we handle tracepoints with type
 566                  * FASTTRAP_T_COMMON for how we emulate instructions that
 567                  * employ %rip-relative addressing.
 568                  */
 569                 if (rmindex != -1) {
 570                         uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
 571                         uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
 572                         uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
 573 
 574                         ASSERT(rmindex > start);
 575 
 576                         if (mod == 0 && rm == 5) {
 577                                 /*
 578                                  * We need to be sure to avoid other
 579                                  * registers used by this instruction. While
 580                                  * the reg field may determine the op code
 581                                  * rather than denoting a register, assuming
 582                                  * that it denotes a register is always safe.
 583                                  * We leave the REX field intact and use
 584                                  * whatever value's there for simplicity.
 585                                  */
 586                                 if (reg != 0) {
 587                                         tp->ftt_ripmode = FASTTRAP_RIP_1 |
 588                                             (FASTTRAP_RIP_X *
 589                                             FASTTRAP_REX_B(rex));
 590                                         rm = 0;
 591                                 } else {
 592                                         tp->ftt_ripmode = FASTTRAP_RIP_2 |
 593                                             (FASTTRAP_RIP_X *
 594                                             FASTTRAP_REX_B(rex));
 595                                         rm = 1;
 596                                 }
 597 
 598                                 tp->ftt_modrm = tp->ftt_instr[rmindex];
 599                                 tp->ftt_instr[rmindex] =
 600                                     FASTTRAP_MODRM(2, reg, rm);
 601                         }
 602                 }
 603         }
 604 #endif
 605 
 606         return (0);
 607 }
 608 
 609 int
 610 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
 611 {
 612         fasttrap_instr_t instr = FASTTRAP_INSTR;
 613 
 614         if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
 615                 return (-1);
 616 
 617         return (0);
 618 }
 619 
 620 int
 621 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
 622 {
 623         uint8_t instr;
 624 
 625         /*
 626          * Distinguish between read or write failures and a changed
 627          * instruction.
 628          */
 629         if (uread(p, &instr, 1, tp->ftt_pc) != 0)
 630                 return (0);
 631         if (instr != FASTTRAP_INSTR)
 632                 return (0);
 633         if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
 634                 return (-1);
 635 
 636         return (0);
 637 }
 638 
 639 #ifdef __amd64
 640 static uintptr_t
 641 fasttrap_fulword_noerr(const void *uaddr)
 642 {
 643         uintptr_t ret;
 644 
 645         if (fasttrap_fulword(uaddr, &ret) == 0)
 646                 return (ret);
 647 
 648         return (0);
 649 }
 650 #endif
 651 
 652 static uint32_t
 653 fasttrap_fuword32_noerr(const void *uaddr)
 654 {
 655         uint32_t ret;
 656 
 657         if (fasttrap_fuword32(uaddr, &ret) == 0)
 658                 return (ret);
 659 
 660         return (0);
 661 }
 662 
 663 static void
 664 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
 665     uintptr_t new_pc)
 666 {
 667         fasttrap_tracepoint_t *tp;
 668         fasttrap_bucket_t *bucket;
 669         fasttrap_id_t *id;
 670         kmutex_t *pid_mtx;
 671 
 672         pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 673         mutex_enter(pid_mtx);
 674         bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 675 
 676         for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 677                 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 678                     tp->ftt_proc->ftpc_acount != 0)
 679                         break;
 680         }
 681 
 682         /*
 683          * Don't sweat it if we can't find the tracepoint again; unlike
 684          * when we're in fasttrap_pid_probe(), finding the tracepoint here
 685          * is not essential to the correct execution of the process.
 686          */
 687         if (tp == NULL) {
 688                 mutex_exit(pid_mtx);
 689                 return;
 690         }
 691 
 692         for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
 693                 /*
 694                  * If there's a branch that could act as a return site, we
 695                  * need to trace it, and check here if the program counter is
 696                  * external to the function.
 697                  */
 698                 if (tp->ftt_type != FASTTRAP_T_RET &&
 699                     tp->ftt_type != FASTTRAP_T_RET16 &&
 700                     new_pc - id->fti_probe->ftp_faddr <
 701                     id->fti_probe->ftp_fsize)
 702                         continue;
 703 
 704                 dtrace_probe(id->fti_probe->ftp_id,
 705                     pc - id->fti_probe->ftp_faddr,
 706                     rp->r_r0, rp->r_r1, 0, 0);
 707         }
 708 
 709         mutex_exit(pid_mtx);
 710 }
 711 
 712 static void
 713 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
 714 {
 715         sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 716 
 717         sqp->sq_info.si_signo = SIGSEGV;
 718         sqp->sq_info.si_code = SEGV_MAPERR;
 719         sqp->sq_info.si_addr = (caddr_t)addr;
 720 
 721         mutex_enter(&p->p_lock);
 722         sigaddqa(p, t, sqp);
 723         mutex_exit(&p->p_lock);
 724 
 725         if (t != NULL)
 726                 aston(t);
 727 }
 728 
 729 #ifdef __amd64
 730 static void
 731 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc,
 732     uintptr_t *argv)
 733 {
 734         int i, x, cap = MIN(argc, probe->ftp_nargs);
 735         uintptr_t *stack = (uintptr_t *)rp->r_sp;
 736 
 737         for (i = 0; i < cap; i++) {
 738                 x = probe->ftp_argmap[i];
 739 
 740                 if (x < 6)
 741                         argv[i] = (&rp->r_rdi)[x];
 742                 else
 743                         argv[i] = fasttrap_fulword_noerr(&stack[x]);
 744         }
 745 
 746         for (; i < argc; i++) {
 747                 argv[i] = 0;
 748         }
 749 }
 750 #endif
 751 
 752 static void
 753 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc,
 754     uint32_t *argv)
 755 {
 756         int i, x, cap = MIN(argc, probe->ftp_nargs);
 757         uint32_t *stack = (uint32_t *)rp->r_sp;
 758 
 759         for (i = 0; i < cap; i++) {
 760                 x = probe->ftp_argmap[i];
 761 
 762                 argv[i] = fasttrap_fuword32_noerr(&stack[x]);
 763         }
 764 
 765         for (; i < argc; i++) {
 766                 argv[i] = 0;
 767         }
 768 }
 769 
 770 static int
 771 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr)
 772 {
 773         proc_t *p = curproc;
 774         user_desc_t *desc;
 775         uint16_t sel, ndx, type;
 776         uintptr_t limit;
 777 
 778         switch (tp->ftt_segment) {
 779         case FASTTRAP_SEG_CS:
 780                 sel = rp->r_cs;
 781                 break;
 782         case FASTTRAP_SEG_DS:
 783                 sel = rp->r_ds;
 784                 break;
 785         case FASTTRAP_SEG_ES:
 786                 sel = rp->r_es;
 787                 break;
 788         case FASTTRAP_SEG_FS:
 789                 sel = rp->r_fs;
 790                 break;
 791         case FASTTRAP_SEG_GS:
 792                 sel = rp->r_gs;
 793                 break;
 794         case FASTTRAP_SEG_SS:
 795                 sel = rp->r_ss;
 796                 break;
 797         }
 798 
 799         /*
 800          * Make sure the given segment register specifies a user priority
 801          * selector rather than a kernel selector.
 802          */
 803         if (!SELISUPL(sel))
 804                 return (-1);
 805 
 806         ndx = SELTOIDX(sel);
 807 
 808         /*
 809          * Check the bounds and grab the descriptor out of the specified
 810          * descriptor table.
 811          */
 812         if (SELISLDT(sel)) {
 813                 if (ndx > p->p_ldtlimit)
 814                         return (-1);
 815 
 816                 desc = p->p_ldt + ndx;
 817 
 818         } else {
 819                 if (ndx >= NGDT)
 820                         return (-1);
 821 
 822                 desc = cpu_get_gdt() + ndx;
 823         }
 824 
 825         /*
 826          * The descriptor must have user privilege level and it must be
 827          * present in memory.
 828          */
 829         if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1)
 830                 return (-1);
 831 
 832         type = desc->usd_type;
 833 
 834         /*
 835          * If the S bit in the type field is not set, this descriptor can
 836          * only be used in system context.
 837          */
 838         if ((type & 0x10) != 0x10)
 839                 return (-1);
 840 
 841         limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1);
 842 
 843         if (tp->ftt_segment == FASTTRAP_SEG_CS) {
 844                 /*
 845                  * The code/data bit and readable bit must both be set.
 846                  */
 847                 if ((type & 0xa) != 0xa)
 848                         return (-1);
 849 
 850                 if (*addr > limit)
 851                         return (-1);
 852         } else {
 853                 /*
 854                  * The code/data bit must be clear.
 855                  */
 856                 if ((type & 0x8) != 0)
 857                         return (-1);
 858 
 859                 /*
 860                  * If the expand-down bit is clear, we just check the limit as
 861                  * it would naturally be applied. Otherwise, we need to check
 862                  * that the address is the range [limit + 1 .. 0xffff] or
 863                  * [limit + 1 ... 0xffffffff] depending on if the default
 864                  * operand size bit is set.
 865                  */
 866                 if ((type & 0x4) == 0) {
 867                         if (*addr > limit)
 868                                 return (-1);
 869                 } else if (desc->usd_def32) {
 870                         if (*addr < limit + 1 || 0xffff < *addr)
 871                                 return (-1);
 872                 } else {
 873                         if (*addr < limit + 1 || 0xffffffff < *addr)
 874                                 return (-1);
 875                 }
 876         }
 877 
 878         *addr += USEGD_GETBASE(desc);
 879 
 880         return (0);
 881 }
 882 
 883 int
 884 fasttrap_pid_probe(struct regs *rp)
 885 {
 886         proc_t *p = curproc;
 887         uintptr_t pc = rp->r_pc - 1, new_pc = 0;
 888         fasttrap_bucket_t *bucket;
 889         kmutex_t *pid_mtx;
 890         fasttrap_tracepoint_t *tp, tp_local;
 891         pid_t pid;
 892         dtrace_icookie_t cookie;
 893         uint_t is_enabled = 0;
 894 
 895         /*
 896          * It's possible that a user (in a veritable orgy of bad planning)
 897          * could redirect this thread's flow of control before it reached the
 898          * return probe fasttrap. In this case we need to kill the process
 899          * since it's in a unrecoverable state.
 900          */
 901         if (curthread->t_dtrace_step) {
 902                 ASSERT(curthread->t_dtrace_on);
 903                 fasttrap_sigtrap(p, curthread, pc);
 904                 return (0);
 905         }
 906 
 907         /*
 908          * Clear all user tracing flags.
 909          */
 910         curthread->t_dtrace_ft = 0;
 911         curthread->t_dtrace_pc = 0;
 912         curthread->t_dtrace_npc = 0;
 913         curthread->t_dtrace_scrpc = 0;
 914         curthread->t_dtrace_astpc = 0;
 915 #ifdef __amd64
 916         curthread->t_dtrace_regv = 0;
 917 #endif
 918 
 919         /*
 920          * Treat a child created by a call to vfork(2) as if it were its
 921          * parent. We know that there's only one thread of control in such a
 922          * process: this one.
 923          */
 924         while (p->p_flag & SVFORK) {
 925                 p = p->p_parent;
 926         }
 927 
 928         pid = p->p_pid;
 929         pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 930         mutex_enter(pid_mtx);
 931         bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 932 
 933         /*
 934          * Lookup the tracepoint that the process just hit.
 935          */
 936         for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 937                 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 938                     tp->ftt_proc->ftpc_acount != 0)
 939                         break;
 940         }
 941 
 942         /*
 943          * If we couldn't find a matching tracepoint, either a tracepoint has
 944          * been inserted without using the pid<pid> ioctl interface (see
 945          * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
 946          */
 947         if (tp == NULL) {
 948                 mutex_exit(pid_mtx);
 949                 return (-1);
 950         }
 951 
 952         /*
 953          * Set the program counter to the address of the traced instruction
 954          * so that it looks right in ustack() output.
 955          */
 956         rp->r_pc = pc;
 957 
 958         if (tp->ftt_ids != NULL) {
 959                 fasttrap_id_t *id;
 960 
 961 #ifdef __amd64
 962                 if (p->p_model == DATAMODEL_LP64) {
 963                         for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 964                                 fasttrap_probe_t *probe = id->fti_probe;
 965 
 966                                 if (id->fti_ptype == DTFTP_ENTRY) {
 967                                         /*
 968                                          * We note that this was an entry
 969                                          * probe to help ustack() find the
 970                                          * first caller.
 971                                          */
 972                                         cookie = dtrace_interrupt_disable();
 973                                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 974                                         dtrace_probe(probe->ftp_id, rp->r_rdi,
 975                                             rp->r_rsi, rp->r_rdx, rp->r_rcx,
 976                                             rp->r_r8);
 977                                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 978                                         dtrace_interrupt_enable(cookie);
 979                                 } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
 980                                         /*
 981                                          * Note that in this case, we don't
 982                                          * call dtrace_probe() since it's only
 983                                          * an artificial probe meant to change
 984                                          * the flow of control so that it
 985                                          * encounters the true probe.
 986                                          */
 987                                         is_enabled = 1;
 988                                 } else if (probe->ftp_argmap == NULL) {
 989                                         dtrace_probe(probe->ftp_id, rp->r_rdi,
 990                                             rp->r_rsi, rp->r_rdx, rp->r_rcx,
 991                                             rp->r_r8);
 992                                 } else {
 993                                         uintptr_t t[5];
 994 
 995                                         fasttrap_usdt_args64(probe, rp,
 996                                             sizeof (t) / sizeof (t[0]), t);
 997 
 998                                         dtrace_probe(probe->ftp_id, t[0], t[1],
 999                                             t[2], t[3], t[4]);
1000                                 }
1001                         }
1002                 } else {
1003 #endif
1004                         uintptr_t s0, s1, s2, s3, s4, s5;
1005                         uint32_t *stack = (uint32_t *)rp->r_sp;
1006 
1007                         /*
1008                          * In 32-bit mode, all arguments are passed on the
1009                          * stack. If this is a function entry probe, we need
1010                          * to skip the first entry on the stack as it
1011                          * represents the return address rather than a
1012                          * parameter to the function.
1013                          */
1014                         s0 = fasttrap_fuword32_noerr(&stack[0]);
1015                         s1 = fasttrap_fuword32_noerr(&stack[1]);
1016                         s2 = fasttrap_fuword32_noerr(&stack[2]);
1017                         s3 = fasttrap_fuword32_noerr(&stack[3]);
1018                         s4 = fasttrap_fuword32_noerr(&stack[4]);
1019                         s5 = fasttrap_fuword32_noerr(&stack[5]);
1020 
1021                         for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1022                                 fasttrap_probe_t *probe = id->fti_probe;
1023 
1024                                 if (id->fti_ptype == DTFTP_ENTRY) {
1025                                         /*
1026                                          * We note that this was an entry
1027                                          * probe to help ustack() find the
1028                                          * first caller.
1029                                          */
1030                                         cookie = dtrace_interrupt_disable();
1031                                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1032                                         dtrace_probe(probe->ftp_id, s1, s2,
1033                                             s3, s4, s5);
1034                                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1035                                         dtrace_interrupt_enable(cookie);
1036                                 } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1037                                         /*
1038                                          * Note that in this case, we don't
1039                                          * call dtrace_probe() since it's only
1040                                          * an artificial probe meant to change
1041                                          * the flow of control so that it
1042                                          * encounters the true probe.
1043                                          */
1044                                         is_enabled = 1;
1045                                 } else if (probe->ftp_argmap == NULL) {
1046                                         dtrace_probe(probe->ftp_id, s0, s1,
1047                                             s2, s3, s4);
1048                                 } else {
1049                                         uint32_t t[5];
1050 
1051                                         fasttrap_usdt_args32(probe, rp,
1052                                             sizeof (t) / sizeof (t[0]), t);
1053 
1054                                         dtrace_probe(probe->ftp_id, t[0], t[1],
1055                                             t[2], t[3], t[4]);
1056                                 }
1057                         }
1058 #ifdef __amd64
1059                 }
1060 #endif
1061         }
1062 
1063         /*
1064          * We're about to do a bunch of work so we cache a local copy of
1065          * the tracepoint to emulate the instruction, and then find the
1066          * tracepoint again later if we need to light up any return probes.
1067          */
1068         tp_local = *tp;
1069         mutex_exit(pid_mtx);
1070         tp = &tp_local;
1071 
1072         /*
1073          * Set the program counter to appear as though the traced instruction
1074          * had completely executed. This ensures that fasttrap_getreg() will
1075          * report the expected value for REG_RIP.
1076          */
1077         rp->r_pc = pc + tp->ftt_size;
1078 
1079         /*
1080          * If there's an is-enabled probe connected to this tracepoint it
1081          * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1082          * instruction that was placed there by DTrace when the binary was
1083          * linked. As this probe is, in fact, enabled, we need to stuff 1
1084          * into %eax or %rax. Accordingly, we can bypass all the instruction
1085          * emulation logic since we know the inevitable result. It's possible
1086          * that a user could construct a scenario where the 'is-enabled'
1087          * probe was on some other instruction, but that would be a rather
1088          * exotic way to shoot oneself in the foot.
1089          */
1090         if (is_enabled) {
1091                 rp->r_r0 = 1;
1092                 new_pc = rp->r_pc;
1093                 goto done;
1094         }
1095 
1096         /*
1097          * We emulate certain types of instructions to ensure correctness
1098          * (in the case of position dependent instructions) or optimize
1099          * common cases. The rest we have the thread execute back in user-
1100          * land.
1101          */
1102         switch (tp->ftt_type) {
1103         case FASTTRAP_T_RET:
1104         case FASTTRAP_T_RET16:
1105         {
1106                 uintptr_t dst;
1107                 uintptr_t addr;
1108                 int ret;
1109 
1110                 /*
1111                  * We have to emulate _every_ facet of the behavior of a ret
1112                  * instruction including what happens if the load from %esp
1113                  * fails; in that case, we send a SIGSEGV.
1114                  */
1115 #ifdef __amd64
1116                 if (p->p_model == DATAMODEL_NATIVE) {
1117 #endif
1118                         ret = fasttrap_fulword((void *)rp->r_sp, &dst);
1119                         addr = rp->r_sp + sizeof (uintptr_t);
1120 #ifdef __amd64
1121                 } else {
1122                         uint32_t dst32;
1123                         ret = fasttrap_fuword32((void *)rp->r_sp, &dst32);
1124                         dst = dst32;
1125                         addr = rp->r_sp + sizeof (uint32_t);
1126                 }
1127 #endif
1128 
1129                 if (ret == -1) {
1130                         fasttrap_sigsegv(p, curthread, rp->r_sp);
1131                         new_pc = pc;
1132                         break;
1133                 }
1134 
1135                 if (tp->ftt_type == FASTTRAP_T_RET16)
1136                         addr += tp->ftt_dest;
1137 
1138                 rp->r_sp = addr;
1139                 new_pc = dst;
1140                 break;
1141         }
1142 
1143         case FASTTRAP_T_JCC:
1144         {
1145                 uint_t taken;
1146 
1147                 switch (tp->ftt_code) {
1148                 case FASTTRAP_JO:
1149                         taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0;
1150                         break;
1151                 case FASTTRAP_JNO:
1152                         taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0;
1153                         break;
1154                 case FASTTRAP_JB:
1155                         taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0;
1156                         break;
1157                 case FASTTRAP_JAE:
1158                         taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0;
1159                         break;
1160                 case FASTTRAP_JE:
1161                         taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1162                         break;
1163                 case FASTTRAP_JNE:
1164                         taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1165                         break;
1166                 case FASTTRAP_JBE:
1167                         taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 ||
1168                             (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1169                         break;
1170                 case FASTTRAP_JA:
1171                         taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 &&
1172                             (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1173                         break;
1174                 case FASTTRAP_JS:
1175                         taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0;
1176                         break;
1177                 case FASTTRAP_JNS:
1178                         taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0;
1179                         break;
1180                 case FASTTRAP_JP:
1181                         taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0;
1182                         break;
1183                 case FASTTRAP_JNP:
1184                         taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0;
1185                         break;
1186                 case FASTTRAP_JL:
1187                         taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1188                             ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1189                         break;
1190                 case FASTTRAP_JGE:
1191                         taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1192                             ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1193                         break;
1194                 case FASTTRAP_JLE:
1195                         taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 ||
1196                             ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1197                             ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1198                         break;
1199                 case FASTTRAP_JG:
1200                         taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1201                             ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1202                             ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1203                         break;
1204 
1205                 }
1206 
1207                 if (taken)
1208                         new_pc = tp->ftt_dest;
1209                 else
1210                         new_pc = pc + tp->ftt_size;
1211                 break;
1212         }
1213 
1214         case FASTTRAP_T_LOOP:
1215         {
1216                 uint_t taken;
1217 #ifdef __amd64
1218                 greg_t cx = rp->r_rcx--;
1219 #else
1220                 greg_t cx = rp->r_ecx--;
1221 #endif
1222 
1223                 switch (tp->ftt_code) {
1224                 case FASTTRAP_LOOPNZ:
1225                         taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1226                             cx != 0;
1227                         break;
1228                 case FASTTRAP_LOOPZ:
1229                         taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 &&
1230                             cx != 0;
1231                         break;
1232                 case FASTTRAP_LOOP:
1233                         taken = (cx != 0);
1234                         break;
1235                 }
1236 
1237                 if (taken)
1238                         new_pc = tp->ftt_dest;
1239                 else
1240                         new_pc = pc + tp->ftt_size;
1241                 break;
1242         }
1243 
1244         case FASTTRAP_T_JCXZ:
1245         {
1246 #ifdef __amd64
1247                 greg_t cx = rp->r_rcx;
1248 #else
1249                 greg_t cx = rp->r_ecx;
1250 #endif
1251 
1252                 if (cx == 0)
1253                         new_pc = tp->ftt_dest;
1254                 else
1255                         new_pc = pc + tp->ftt_size;
1256                 break;
1257         }
1258 
1259         case FASTTRAP_T_PUSHL_EBP:
1260         {
1261                 int ret;
1262                 uintptr_t addr;
1263 #ifdef __amd64
1264                 if (p->p_model == DATAMODEL_NATIVE) {
1265 #endif
1266                         addr = rp->r_sp - sizeof (uintptr_t);
1267                         ret = fasttrap_sulword((void *)addr, rp->r_fp);
1268 #ifdef __amd64
1269                 } else {
1270                         addr = rp->r_sp - sizeof (uint32_t);
1271                         ret = fasttrap_suword32((void *)addr,
1272                             (uint32_t)rp->r_fp);
1273                 }
1274 #endif
1275 
1276                 if (ret == -1) {
1277                         fasttrap_sigsegv(p, curthread, addr);
1278                         new_pc = pc;
1279                         break;
1280                 }
1281 
1282                 rp->r_sp = addr;
1283                 new_pc = pc + tp->ftt_size;
1284                 break;
1285         }
1286 
1287         case FASTTRAP_T_NOP:
1288                 new_pc = pc + tp->ftt_size;
1289                 break;
1290 
1291         case FASTTRAP_T_JMP:
1292         case FASTTRAP_T_CALL:
1293                 if (tp->ftt_code == 0) {
1294                         new_pc = tp->ftt_dest;
1295                 } else {
1296                         uintptr_t value, addr = tp->ftt_dest;
1297 
1298                         if (tp->ftt_base != FASTTRAP_NOREG)
1299                                 addr += fasttrap_getreg(rp, tp->ftt_base);
1300                         if (tp->ftt_index != FASTTRAP_NOREG)
1301                                 addr += fasttrap_getreg(rp, tp->ftt_index) <<
1302                                     tp->ftt_scale;
1303 
1304                         if (tp->ftt_code == 1) {
1305                                 /*
1306                                  * If there's a segment prefix for this
1307                                  * instruction, we'll need to check permissions
1308                                  * and bounds on the given selector, and adjust
1309                                  * the address accordingly.
1310                                  */
1311                                 if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1312                                     fasttrap_do_seg(tp, rp, &addr) != 0) {
1313                                         fasttrap_sigsegv(p, curthread, addr);
1314                                         new_pc = pc;
1315                                         break;
1316                                 }
1317 
1318 #ifdef __amd64
1319                                 if (p->p_model == DATAMODEL_NATIVE) {
1320 #endif
1321                                         if (fasttrap_fulword((void *)addr,
1322                                             &value) == -1) {
1323                                                 fasttrap_sigsegv(p, curthread,
1324                                                     addr);
1325                                                 new_pc = pc;
1326                                                 break;
1327                                         }
1328                                         new_pc = value;
1329 #ifdef __amd64
1330                                 } else {
1331                                         uint32_t value32;
1332                                         addr = (uintptr_t)(uint32_t)addr;
1333                                         if (fasttrap_fuword32((void *)addr,
1334                                             &value32) == -1) {
1335                                                 fasttrap_sigsegv(p, curthread,
1336                                                     addr);
1337                                                 new_pc = pc;
1338                                                 break;
1339                                         }
1340                                         new_pc = value32;
1341                                 }
1342 #endif
1343                         } else {
1344                                 new_pc = addr;
1345                         }
1346                 }
1347 
1348                 /*
1349                  * If this is a call instruction, we need to push the return
1350                  * address onto the stack. If this fails, we send the process
1351                  * a SIGSEGV and reset the pc to emulate what would happen if
1352                  * this instruction weren't traced.
1353                  */
1354                 if (tp->ftt_type == FASTTRAP_T_CALL) {
1355                         int ret;
1356                         uintptr_t addr;
1357 #ifdef __amd64
1358                         if (p->p_model == DATAMODEL_NATIVE) {
1359                                 addr = rp->r_sp - sizeof (uintptr_t);
1360                                 ret = fasttrap_sulword((void *)addr,
1361                                     pc + tp->ftt_size);
1362                         } else {
1363 #endif
1364                                 addr = rp->r_sp - sizeof (uint32_t);
1365                                 ret = fasttrap_suword32((void *)addr,
1366                                     (uint32_t)(pc + tp->ftt_size));
1367 #ifdef __amd64
1368                         }
1369 #endif
1370 
1371                         if (ret == -1) {
1372                                 fasttrap_sigsegv(p, curthread, addr);
1373                                 new_pc = pc;
1374                                 break;
1375                         }
1376 
1377                         rp->r_sp = addr;
1378                 }
1379 
1380                 break;
1381 
1382         case FASTTRAP_T_COMMON:
1383         {
1384                 uintptr_t addr;
1385 #if defined(__amd64)
1386                 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1387 #else
1388                 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
1389 #endif
1390                 uint_t i = 0;
1391                 klwp_t *lwp = ttolwp(curthread);
1392 
1393                 /*
1394                  * Compute the address of the ulwp_t and step over the
1395                  * ul_self pointer. The method used to store the user-land
1396                  * thread pointer is very different on 32- and 64-bit
1397                  * kernels.
1398                  */
1399 #if defined(__amd64)
1400                 if (p->p_model == DATAMODEL_LP64) {
1401                         addr = lwp->lwp_pcb.pcb_fsbase;
1402 
1403                         /*
1404                          * If we're branded, convert the fsbase from the
1405                          * brand's fsbase to the native fsbase.
1406                          */
1407                         if (PROC_IS_BRANDED(p) && BRMOP(p)->b_fsbase != NULL)
1408                                 addr = BRMOP(p)->b_fsbase(lwp, addr);
1409 
1410                         addr += sizeof (void *);
1411                 } else {
1412                         addr = lwp->lwp_pcb.pcb_gsbase;
1413                         addr += sizeof (caddr32_t);
1414                 }
1415 #else
1416                 addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
1417                 addr += sizeof (void *);
1418 #endif
1419 
1420                 /*
1421                  * Generic Instruction Tracing
1422                  * ---------------------------
1423                  *
1424                  * This is the layout of the scratch space in the user-land
1425                  * thread structure for our generated instructions.
1426                  *
1427                  *      32-bit mode                     bytes
1428                  *      ------------------------        -----
1429                  * a:   <original instruction>            <= 15
1430                  *      jmp     <pc + tp->ftt_size>        5
1431                  * b:   <original instrction>             <= 15
1432                  *      int     T_DTRACE_RET                2
1433                  *                                      -----
1434                  *                                      <= 37
1435                  *
1436                  *      64-bit mode                     bytes
1437                  *      ------------------------        -----
1438                  * a:   <original instruction>            <= 15
1439                  *      jmp     0(%rip)                     6
1440                  *      <pc + tp->ftt_size>                8
1441                  * b:   <original instruction>            <= 15
1442                  *      int     T_DTRACE_RET                2
1443                  *                                      -----
1444                  *                                      <= 46
1445                  *
1446                  * The %pc is set to a, and curthread->t_dtrace_astpc is set
1447                  * to b. If we encounter a signal on the way out of the
1448                  * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1449                  * so that we execute the original instruction and re-enter
1450                  * the kernel rather than redirecting to the next instruction.
1451                  *
1452                  * If there are return probes (so we know that we're going to
1453                  * need to reenter the kernel after executing the original
1454                  * instruction), the scratch space will just contain the
1455                  * original instruction followed by an interrupt -- the same
1456                  * data as at b.
1457                  *
1458                  * %rip-relative Addressing
1459                  * ------------------------
1460                  *
1461                  * There's a further complication in 64-bit mode due to %rip-
1462                  * relative addressing. While this is clearly a beneficial
1463                  * architectural decision for position independent code, it's
1464                  * hard not to see it as a personal attack against the pid
1465                  * provider since before there was a relatively small set of
1466                  * instructions to emulate; with %rip-relative addressing,
1467                  * almost every instruction can potentially depend on the
1468                  * address at which it's executed. Rather than emulating
1469                  * the broad spectrum of instructions that can now be
1470                  * position dependent, we emulate jumps and others as in
1471                  * 32-bit mode, and take a different tack for instructions
1472                  * using %rip-relative addressing.
1473                  *
1474                  * For every instruction that uses the ModRM byte, the
1475                  * in-kernel disassembler reports its location. We use the
1476                  * ModRM byte to identify that an instruction uses
1477                  * %rip-relative addressing and to see what other registers
1478                  * the instruction uses. To emulate those instructions,
1479                  * we modify the instruction to be %rax-relative rather than
1480                  * %rip-relative (or %rcx-relative if the instruction uses
1481                  * %rax; or %r8- or %r9-relative if the REX.B is present so
1482                  * we don't have to rewrite the REX prefix). We then load
1483                  * the value that %rip would have been into the scratch
1484                  * register and generate an instruction to reset the scratch
1485                  * register back to its original value. The instruction
1486                  * sequence looks like this:
1487                  *
1488                  *      64-mode %rip-relative           bytes
1489                  *      ------------------------        -----
1490                  * a:   <modified instruction>            <= 15
1491                  *      movq    $<value>, %<scratch>            6
1492                  *      jmp     0(%rip)                     6
1493                  *      <pc + tp->ftt_size>                8
1494                  * b:   <modified instruction>    <= 15
1495                  *      int     T_DTRACE_RET                2
1496                  *                                      -----
1497                  *                                         52
1498                  *
1499                  * We set curthread->t_dtrace_regv so that upon receiving
1500                  * a signal we can reset the value of the scratch register.
1501                  */
1502 
1503                 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1504 
1505                 curthread->t_dtrace_scrpc = addr;
1506                 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1507                 i += tp->ftt_size;
1508 
1509 #ifdef __amd64
1510                 if (tp->ftt_ripmode != 0) {
1511                         greg_t *reg;
1512 
1513                         ASSERT(p->p_model == DATAMODEL_LP64);
1514                         ASSERT(tp->ftt_ripmode &
1515                             (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1516 
1517                         /*
1518                          * If this was a %rip-relative instruction, we change
1519                          * it to be either a %rax- or %rcx-relative
1520                          * instruction (depending on whether those registers
1521                          * are used as another operand; or %r8- or %r9-
1522                          * relative depending on the value of REX.B). We then
1523                          * set that register and generate a movq instruction
1524                          * to reset the value.
1525                          */
1526                         if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1527                                 scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1528                         else
1529                                 scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1530 
1531                         if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1532                                 scratch[i++] = FASTTRAP_MOV_EAX;
1533                         else
1534                                 scratch[i++] = FASTTRAP_MOV_ECX;
1535 
1536                         switch (tp->ftt_ripmode) {
1537                         case FASTTRAP_RIP_1:
1538                                 reg = &rp->r_rax;
1539                                 curthread->t_dtrace_reg = REG_RAX;
1540                                 break;
1541                         case FASTTRAP_RIP_2:
1542                                 reg = &rp->r_rcx;
1543                                 curthread->t_dtrace_reg = REG_RCX;
1544                                 break;
1545                         case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1546                                 reg = &rp->r_r8;
1547                                 curthread->t_dtrace_reg = REG_R8;
1548                                 break;
1549                         case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1550                                 reg = &rp->r_r9;
1551                                 curthread->t_dtrace_reg = REG_R9;
1552                                 break;
1553                         }
1554 
1555                         /* LINTED - alignment */
1556                         *(uint64_t *)&scratch[i] = *reg;
1557                         curthread->t_dtrace_regv = *reg;
1558                         *reg = pc + tp->ftt_size;
1559                         i += sizeof (uint64_t);
1560                 }
1561 #endif
1562 
1563                 /*
1564                  * Generate the branch instruction to what would have
1565                  * normally been the subsequent instruction. In 32-bit mode,
1566                  * this is just a relative branch; in 64-bit mode this is a
1567                  * %rip-relative branch that loads the 64-bit pc value
1568                  * immediately after the jmp instruction.
1569                  */
1570 #ifdef __amd64
1571                 if (p->p_model == DATAMODEL_LP64) {
1572                         scratch[i++] = FASTTRAP_GROUP5_OP;
1573                         scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1574                         /* LINTED - alignment */
1575                         *(uint32_t *)&scratch[i] = 0;
1576                         i += sizeof (uint32_t);
1577                         /* LINTED - alignment */
1578                         *(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1579                         i += sizeof (uint64_t);
1580                 } else {
1581 #endif
1582                         /*
1583                          * Set up the jmp to the next instruction; note that
1584                          * the size of the traced instruction cancels out.
1585                          */
1586                         scratch[i++] = FASTTRAP_JMP32;
1587                         /* LINTED - alignment */
1588                         *(uint32_t *)&scratch[i] = pc - addr - 5;
1589                         i += sizeof (uint32_t);
1590 #ifdef __amd64
1591                 }
1592 #endif
1593 
1594                 curthread->t_dtrace_astpc = addr + i;
1595                 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1596                 i += tp->ftt_size;
1597                 scratch[i++] = FASTTRAP_INT;
1598                 scratch[i++] = T_DTRACE_RET;
1599 
1600                 ASSERT(i <= sizeof (scratch));
1601 
1602                 if (fasttrap_copyout(scratch, (char *)addr, i)) {
1603                         fasttrap_sigtrap(p, curthread, pc);
1604                         new_pc = pc;
1605                         break;
1606                 }
1607 
1608                 if (tp->ftt_retids != NULL) {
1609                         curthread->t_dtrace_step = 1;
1610                         curthread->t_dtrace_ret = 1;
1611                         new_pc = curthread->t_dtrace_astpc;
1612                 } else {
1613                         new_pc = curthread->t_dtrace_scrpc;
1614                 }
1615 
1616                 curthread->t_dtrace_pc = pc;
1617                 curthread->t_dtrace_npc = pc + tp->ftt_size;
1618                 curthread->t_dtrace_on = 1;
1619                 break;
1620         }
1621 
1622         default:
1623                 panic("fasttrap: mishandled an instruction");
1624         }
1625 
1626 done:
1627         /*
1628          * If there were no return probes when we first found the tracepoint,
1629          * we should feel no obligation to honor any return probes that were
1630          * subsequently enabled -- they'll just have to wait until the next
1631          * time around.
1632          */
1633         if (tp->ftt_retids != NULL) {
1634                 /*
1635                  * We need to wait until the results of the instruction are
1636                  * apparent before invoking any return probes. If this
1637                  * instruction was emulated we can just call
1638                  * fasttrap_return_common(); if it needs to be executed, we
1639                  * need to wait until the user thread returns to the kernel.
1640                  */
1641                 if (tp->ftt_type != FASTTRAP_T_COMMON) {
1642                         /*
1643                          * Set the program counter to the address of the traced
1644                          * instruction so that it looks right in ustack()
1645                          * output. We had previously set it to the end of the
1646                          * instruction to simplify %rip-relative addressing.
1647                          */
1648                         rp->r_pc = pc;
1649 
1650                         fasttrap_return_common(rp, pc, pid, new_pc);
1651                 } else {
1652                         ASSERT(curthread->t_dtrace_ret != 0);
1653                         ASSERT(curthread->t_dtrace_pc == pc);
1654                         ASSERT(curthread->t_dtrace_scrpc != 0);
1655                         ASSERT(new_pc == curthread->t_dtrace_astpc);
1656                 }
1657         }
1658 
1659         rp->r_pc = new_pc;
1660 
1661         return (0);
1662 }
1663 
1664 int
1665 fasttrap_return_probe(struct regs *rp)
1666 {
1667         proc_t *p = curproc;
1668         uintptr_t pc = curthread->t_dtrace_pc;
1669         uintptr_t npc = curthread->t_dtrace_npc;
1670 
1671         curthread->t_dtrace_pc = 0;
1672         curthread->t_dtrace_npc = 0;
1673         curthread->t_dtrace_scrpc = 0;
1674         curthread->t_dtrace_astpc = 0;
1675 
1676         /*
1677          * Treat a child created by a call to vfork(2) as if it were its
1678          * parent. We know that there's only one thread of control in such a
1679          * process: this one.
1680          */
1681         while (p->p_flag & SVFORK) {
1682                 p = p->p_parent;
1683         }
1684 
1685         /*
1686          * We set rp->r_pc to the address of the traced instruction so
1687          * that it appears to dtrace_probe() that we're on the original
1688          * instruction, and so that the user can't easily detect our
1689          * complex web of lies. dtrace_return_probe() (our caller)
1690          * will correctly set %pc after we return.
1691          */
1692         rp->r_pc = pc;
1693 
1694         fasttrap_return_common(rp, pc, p->p_pid, npc);
1695 
1696         return (0);
1697 }
1698 
1699 /*ARGSUSED*/
1700 uint64_t
1701 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1702     int aframes)
1703 {
1704         return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno));
1705 }
1706 
1707 /*ARGSUSED*/
1708 uint64_t
1709 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1710     int aframes)
1711 {
1712         return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno));
1713 }
1714 
1715 static ulong_t
1716 fasttrap_getreg(struct regs *rp, uint_t reg)
1717 {
1718 #ifdef __amd64
1719         switch (reg) {
1720         case REG_R15:           return (rp->r_r15);
1721         case REG_R14:           return (rp->r_r14);
1722         case REG_R13:           return (rp->r_r13);
1723         case REG_R12:           return (rp->r_r12);
1724         case REG_R11:           return (rp->r_r11);
1725         case REG_R10:           return (rp->r_r10);
1726         case REG_R9:            return (rp->r_r9);
1727         case REG_R8:            return (rp->r_r8);
1728         case REG_RDI:           return (rp->r_rdi);
1729         case REG_RSI:           return (rp->r_rsi);
1730         case REG_RBP:           return (rp->r_rbp);
1731         case REG_RBX:           return (rp->r_rbx);
1732         case REG_RDX:           return (rp->r_rdx);
1733         case REG_RCX:           return (rp->r_rcx);
1734         case REG_RAX:           return (rp->r_rax);
1735         case REG_TRAPNO:        return (rp->r_trapno);
1736         case REG_ERR:           return (rp->r_err);
1737         case REG_RIP:           return (rp->r_rip);
1738         case REG_CS:            return (rp->r_cs);
1739         case REG_RFL:           return (rp->r_rfl);
1740         case REG_RSP:           return (rp->r_rsp);
1741         case REG_SS:            return (rp->r_ss);
1742         case REG_FS:            return (rp->r_fs);
1743         case REG_GS:            return (rp->r_gs);
1744         case REG_DS:            return (rp->r_ds);
1745         case REG_ES:            return (rp->r_es);
1746         case REG_FSBASE:        return (rdmsr(MSR_AMD_FSBASE));
1747         case REG_GSBASE:        return (rdmsr(MSR_AMD_GSBASE));
1748         }
1749 
1750         panic("dtrace: illegal register constant");
1751         /*NOTREACHED*/
1752 #else
1753         if (reg >= _NGREG)
1754                 panic("dtrace: illegal register constant");
1755 
1756         return (((greg_t *)&rp->r_gs)[reg]);
1757 #endif
1758 }