Print this page
    
OS-3712 lx brand: DTrace pid provider induces core dumps on 64-bit processes (cstyle)
OS-3712 lx brand: DTrace pid provider induces core dumps on 64-bit processes
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/intel/dtrace/fasttrap_isa.c
          +++ new/usr/src/uts/intel/dtrace/fasttrap_isa.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27      -#pragma ident   "%Z%%M% %I%     %E% SMI"
       27 +/*
       28 + * Copyright (c) 2015, Joyent, Inc. All rights reserved.
       29 + */
  28   30  
  29   31  #include <sys/fasttrap_isa.h>
  30   32  #include <sys/fasttrap_impl.h>
  31   33  #include <sys/dtrace.h>
  32   34  #include <sys/dtrace_impl.h>
  33   35  #include <sys/cmn_err.h>
  34   36  #include <sys/regset.h>
  35   37  #include <sys/privregs.h>
  36   38  #include <sys/segments.h>
  37   39  #include <sys/x86_archext.h>
  38   40  #include <sys/sysmacros.h>
  39   41  #include <sys/trap.h>
  40   42  #include <sys/archsystm.h>
       43 +#include <sys/proc.h>
       44 +#include <sys/brand.h>
       45 +#include <sys/machbrand.h>
  41   46  
  42   47  /*
  43   48   * Lossless User-Land Tracing on x86
  44   49   * ---------------------------------
  45   50   *
  46   51   * The execution of most instructions is not dependent on the address; for
  47   52   * these instructions it is sufficient to copy them into the user process's
  48   53   * address space and execute them. To effectively single-step an instruction
  49   54   * in user-land, we copy out the following sequence of instructions to scratch
  50   55   * space in the user thread's ulwp_t structure.
  51   56   *
  52   57   * We then set the program counter (%eip or %rip) to point to this scratch
  53   58   * space. Once execution resumes, the original instruction is executed and
  54   59   * then control flow is redirected to what was originally the subsequent
  55   60   * instruction. If the kernel attemps to deliver a signal while single-
  56   61   * stepping, the signal is deferred and the program counter is moved into the
  57   62   * second sequence of instructions. The second sequence ends in a trap into
  58   63   * the kernel where the deferred signal is then properly handled and delivered.
  59   64   *
  60   65   * For instructions whose execute is position dependent, we perform simple
  61   66   * emulation. These instructions are limited to control transfer
  62   67   * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
  63   68   * of %rip-relative addressing that means that almost any instruction can be
  64   69   * position dependent. For all the details on how we emulate generic
  65   70   * instructions included %rip-relative instructions, see the code in
  66   71   * fasttrap_pid_probe() below where we handle instructions of type
  67   72   * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
  68   73   */
  69   74  
  70   75  #define FASTTRAP_MODRM_MOD(modrm)       (((modrm) >> 6) & 0x3)
  71   76  #define FASTTRAP_MODRM_REG(modrm)       (((modrm) >> 3) & 0x7)
  72   77  #define FASTTRAP_MODRM_RM(modrm)        ((modrm) & 0x7)
  73   78  #define FASTTRAP_MODRM(mod, reg, rm)    (((mod) << 6) | ((reg) << 3) | (rm))
  74   79  
  75   80  #define FASTTRAP_SIB_SCALE(sib)         (((sib) >> 6) & 0x3)
  76   81  #define FASTTRAP_SIB_INDEX(sib)         (((sib) >> 3) & 0x7)
  77   82  #define FASTTRAP_SIB_BASE(sib)          ((sib) & 0x7)
  78   83  
  79   84  #define FASTTRAP_REX_W(rex)             (((rex) >> 3) & 1)
  80   85  #define FASTTRAP_REX_R(rex)             (((rex) >> 2) & 1)
  81   86  #define FASTTRAP_REX_X(rex)             (((rex) >> 1) & 1)
  82   87  #define FASTTRAP_REX_B(rex)             ((rex) & 1)
  83   88  #define FASTTRAP_REX(w, r, x, b)        \
  84   89          (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
  85   90  
  86   91  /*
  87   92   * Single-byte op-codes.
  88   93   */
  89   94  #define FASTTRAP_PUSHL_EBP      0x55
  90   95  
  91   96  #define FASTTRAP_JO             0x70
  92   97  #define FASTTRAP_JNO            0x71
  93   98  #define FASTTRAP_JB             0x72
  94   99  #define FASTTRAP_JAE            0x73
  95  100  #define FASTTRAP_JE             0x74
  96  101  #define FASTTRAP_JNE            0x75
  97  102  #define FASTTRAP_JBE            0x76
  98  103  #define FASTTRAP_JA             0x77
  99  104  #define FASTTRAP_JS             0x78
 100  105  #define FASTTRAP_JNS            0x79
 101  106  #define FASTTRAP_JP             0x7a
 102  107  #define FASTTRAP_JNP            0x7b
 103  108  #define FASTTRAP_JL             0x7c
 104  109  #define FASTTRAP_JGE            0x7d
 105  110  #define FASTTRAP_JLE            0x7e
 106  111  #define FASTTRAP_JG             0x7f
 107  112  
 108  113  #define FASTTRAP_NOP            0x90
 109  114  
 110  115  #define FASTTRAP_MOV_EAX        0xb8
 111  116  #define FASTTRAP_MOV_ECX        0xb9
 112  117  
 113  118  #define FASTTRAP_RET16          0xc2
 114  119  #define FASTTRAP_RET            0xc3
 115  120  
 116  121  #define FASTTRAP_LOOPNZ         0xe0
 117  122  #define FASTTRAP_LOOPZ          0xe1
 118  123  #define FASTTRAP_LOOP           0xe2
 119  124  #define FASTTRAP_JCXZ           0xe3
 120  125  
 121  126  #define FASTTRAP_CALL           0xe8
 122  127  #define FASTTRAP_JMP32          0xe9
 123  128  #define FASTTRAP_JMP8           0xeb
 124  129  
 125  130  #define FASTTRAP_INT3           0xcc
 126  131  #define FASTTRAP_INT            0xcd
 127  132  
 128  133  #define FASTTRAP_2_BYTE_OP      0x0f
 129  134  #define FASTTRAP_GROUP5_OP      0xff
 130  135  
 131  136  /*
 132  137   * Two-byte op-codes (second byte only).
 133  138   */
 134  139  #define FASTTRAP_0F_JO          0x80
 135  140  #define FASTTRAP_0F_JNO         0x81
 136  141  #define FASTTRAP_0F_JB          0x82
 137  142  #define FASTTRAP_0F_JAE         0x83
 138  143  #define FASTTRAP_0F_JE          0x84
 139  144  #define FASTTRAP_0F_JNE         0x85
 140  145  #define FASTTRAP_0F_JBE         0x86
 141  146  #define FASTTRAP_0F_JA          0x87
 142  147  #define FASTTRAP_0F_JS          0x88
 143  148  #define FASTTRAP_0F_JNS         0x89
 144  149  #define FASTTRAP_0F_JP          0x8a
 145  150  #define FASTTRAP_0F_JNP         0x8b
 146  151  #define FASTTRAP_0F_JL          0x8c
 147  152  #define FASTTRAP_0F_JGE         0x8d
 148  153  #define FASTTRAP_0F_JLE         0x8e
 149  154  #define FASTTRAP_0F_JG          0x8f
 150  155  
 151  156  #define FASTTRAP_EFLAGS_OF      0x800
 152  157  #define FASTTRAP_EFLAGS_DF      0x400
 153  158  #define FASTTRAP_EFLAGS_SF      0x080
 154  159  #define FASTTRAP_EFLAGS_ZF      0x040
 155  160  #define FASTTRAP_EFLAGS_AF      0x010
 156  161  #define FASTTRAP_EFLAGS_PF      0x004
 157  162  #define FASTTRAP_EFLAGS_CF      0x001
 158  163  
 159  164  /*
 160  165   * Instruction prefixes.
 161  166   */
 162  167  #define FASTTRAP_PREFIX_OPERAND 0x66
 163  168  #define FASTTRAP_PREFIX_ADDRESS 0x67
 164  169  #define FASTTRAP_PREFIX_CS      0x2E
 165  170  #define FASTTRAP_PREFIX_DS      0x3E
 166  171  #define FASTTRAP_PREFIX_ES      0x26
 167  172  #define FASTTRAP_PREFIX_FS      0x64
 168  173  #define FASTTRAP_PREFIX_GS      0x65
 169  174  #define FASTTRAP_PREFIX_SS      0x36
 170  175  #define FASTTRAP_PREFIX_LOCK    0xF0
 171  176  #define FASTTRAP_PREFIX_REP     0xF3
 172  177  #define FASTTRAP_PREFIX_REPNE   0xF2
 173  178  
 174  179  #define FASTTRAP_NOREG  0xff
 175  180  
 176  181  /*
 177  182   * Map between instruction register encodings and the kernel constants which
 178  183   * correspond to indicies into struct regs.
 179  184   */
 180  185  #ifdef __amd64
 181  186  static const uint8_t regmap[16] = {
 182  187          REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
 183  188          REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
 184  189  };
 185  190  #else
 186  191  static const uint8_t regmap[8] = {
 187  192          EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
 188  193  };
 189  194  #endif
 190  195  
 191  196  static ulong_t fasttrap_getreg(struct regs *, uint_t);
 192  197  
 193  198  static uint64_t
 194  199  fasttrap_anarg(struct regs *rp, int function_entry, int argno)
 195  200  {
 196  201          uint64_t value;
 197  202          int shift = function_entry ? 1 : 0;
 198  203  
 199  204  #ifdef __amd64
 200  205          if (curproc->p_model == DATAMODEL_LP64) {
 201  206                  uintptr_t *stack;
 202  207  
 203  208                  /*
 204  209                   * In 64-bit mode, the first six arguments are stored in
 205  210                   * registers.
 206  211                   */
 207  212                  if (argno < 6)
 208  213                          return ((&rp->r_rdi)[argno]);
 209  214  
 210  215                  stack = (uintptr_t *)rp->r_sp;
 211  216                  DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 212  217                  value = dtrace_fulword(&stack[argno - 6 + shift]);
 213  218                  DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 214  219          } else {
 215  220  #endif
 216  221                  uint32_t *stack = (uint32_t *)rp->r_sp;
 217  222                  DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 218  223                  value = dtrace_fuword32(&stack[argno + shift]);
 219  224                  DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 220  225  #ifdef __amd64
 221  226          }
 222  227  #endif
 223  228  
 224  229          return (value);
 225  230  }
 226  231  
 227  232  /*ARGSUSED*/
 228  233  int
 229  234  fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
 230  235      fasttrap_probe_type_t type)
 231  236  {
 232  237          uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
 233  238          size_t len = FASTTRAP_MAX_INSTR_SIZE;
 234  239          size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
 235  240          uint_t start = 0;
 236  241          int rmindex, size;
 237  242          uint8_t seg, rex = 0;
 238  243  
 239  244          /*
 240  245           * Read the instruction at the given address out of the process's
 241  246           * address space. We don't have to worry about a debugger
 242  247           * changing this instruction before we overwrite it with our trap
 243  248           * instruction since P_PR_LOCK is set. Since instructions can span
 244  249           * pages, we potentially read the instruction in two parts. If the
 245  250           * second part fails, we just zero out that part of the instruction.
 246  251           */
 247  252          if (uread(p, &instr[0], first, pc) != 0)
 248  253                  return (-1);
 249  254          if (len > first &&
 250  255              uread(p, &instr[first], len - first, pc + first) != 0) {
 251  256                  bzero(&instr[first], len - first);
 252  257                  len = first;
 253  258          }
 254  259  
 255  260          /*
 256  261           * If the disassembly fails, then we have a malformed instruction.
 257  262           */
 258  263          if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
 259  264                  return (-1);
 260  265  
 261  266          /*
 262  267           * Make sure the disassembler isn't completely broken.
 263  268           */
 264  269          ASSERT(-1 <= rmindex && rmindex < size);
 265  270  
 266  271          /*
 267  272           * If the computed size is greater than the number of bytes read,
 268  273           * then it was a malformed instruction possibly because it fell on a
 269  274           * page boundary and the subsequent page was missing or because of
 270  275           * some malicious user.
 271  276           */
 272  277          if (size > len)
 273  278                  return (-1);
 274  279  
 275  280          tp->ftt_size = (uint8_t)size;
 276  281          tp->ftt_segment = FASTTRAP_SEG_NONE;
 277  282  
 278  283          /*
 279  284           * Find the start of the instruction's opcode by processing any
 280  285           * legacy prefixes.
 281  286           */
 282  287          for (;;) {
 283  288                  seg = 0;
 284  289                  switch (instr[start]) {
 285  290                  case FASTTRAP_PREFIX_SS:
 286  291                          seg++;
 287  292                          /*FALLTHRU*/
 288  293                  case FASTTRAP_PREFIX_GS:
 289  294                          seg++;
 290  295                          /*FALLTHRU*/
 291  296                  case FASTTRAP_PREFIX_FS:
 292  297                          seg++;
 293  298                          /*FALLTHRU*/
 294  299                  case FASTTRAP_PREFIX_ES:
 295  300                          seg++;
 296  301                          /*FALLTHRU*/
 297  302                  case FASTTRAP_PREFIX_DS:
 298  303                          seg++;
 299  304                          /*FALLTHRU*/
 300  305                  case FASTTRAP_PREFIX_CS:
 301  306                          seg++;
 302  307                          /*FALLTHRU*/
 303  308                  case FASTTRAP_PREFIX_OPERAND:
 304  309                  case FASTTRAP_PREFIX_ADDRESS:
 305  310                  case FASTTRAP_PREFIX_LOCK:
 306  311                  case FASTTRAP_PREFIX_REP:
 307  312                  case FASTTRAP_PREFIX_REPNE:
 308  313                          if (seg != 0) {
 309  314                                  /*
 310  315                                   * It's illegal for an instruction to specify
 311  316                                   * two segment prefixes -- give up on this
 312  317                                   * illegal instruction.
 313  318                                   */
 314  319                                  if (tp->ftt_segment != FASTTRAP_SEG_NONE)
 315  320                                          return (-1);
 316  321  
 317  322                                  tp->ftt_segment = seg;
 318  323                          }
 319  324                          start++;
 320  325                          continue;
 321  326                  }
 322  327                  break;
 323  328          }
 324  329  
 325  330  #ifdef __amd64
 326  331          /*
 327  332           * Identify the REX prefix on 64-bit processes.
 328  333           */
 329  334          if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
 330  335                  rex = instr[start++];
 331  336  #endif
 332  337  
 333  338          /*
 334  339           * Now that we're pretty sure that the instruction is okay, copy the
 335  340           * valid part to the tracepoint.
 336  341           */
 337  342          bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
 338  343  
 339  344          tp->ftt_type = FASTTRAP_T_COMMON;
 340  345          if (instr[start] == FASTTRAP_2_BYTE_OP) {
 341  346                  switch (instr[start + 1]) {
 342  347                  case FASTTRAP_0F_JO:
 343  348                  case FASTTRAP_0F_JNO:
 344  349                  case FASTTRAP_0F_JB:
 345  350                  case FASTTRAP_0F_JAE:
 346  351                  case FASTTRAP_0F_JE:
 347  352                  case FASTTRAP_0F_JNE:
 348  353                  case FASTTRAP_0F_JBE:
 349  354                  case FASTTRAP_0F_JA:
 350  355                  case FASTTRAP_0F_JS:
 351  356                  case FASTTRAP_0F_JNS:
 352  357                  case FASTTRAP_0F_JP:
 353  358                  case FASTTRAP_0F_JNP:
 354  359                  case FASTTRAP_0F_JL:
 355  360                  case FASTTRAP_0F_JGE:
 356  361                  case FASTTRAP_0F_JLE:
 357  362                  case FASTTRAP_0F_JG:
 358  363                          tp->ftt_type = FASTTRAP_T_JCC;
 359  364                          tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
 360  365                          tp->ftt_dest = pc + tp->ftt_size +
 361  366                              /* LINTED - alignment */
 362  367                              *(int32_t *)&instr[start + 2];
 363  368                          break;
 364  369                  }
 365  370          } else if (instr[start] == FASTTRAP_GROUP5_OP) {
 366  371                  uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
 367  372                  uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
 368  373                  uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
 369  374  
 370  375                  if (reg == 2 || reg == 4) {
 371  376                          uint_t i, sz;
 372  377  
 373  378                          if (reg == 2)
 374  379                                  tp->ftt_type = FASTTRAP_T_CALL;
 375  380                          else
 376  381                                  tp->ftt_type = FASTTRAP_T_JMP;
 377  382  
 378  383                          if (mod == 3)
 379  384                                  tp->ftt_code = 2;
 380  385                          else
 381  386                                  tp->ftt_code = 1;
 382  387  
 383  388                          ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 384  389  
 385  390                          /*
 386  391                           * See AMD x86-64 Architecture Programmer's Manual
 387  392                           * Volume 3, Section 1.2.7, Table 1-12, and
 388  393                           * Appendix A.3.1, Table A-15.
 389  394                           */
 390  395                          if (mod != 3 && rm == 4) {
 391  396                                  uint8_t sib = instr[start + 2];
 392  397                                  uint_t index = FASTTRAP_SIB_INDEX(sib);
 393  398                                  uint_t base = FASTTRAP_SIB_BASE(sib);
 394  399  
 395  400                                  tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
 396  401  
 397  402                                  tp->ftt_index = (index == 4) ?
 398  403                                      FASTTRAP_NOREG :
 399  404                                      regmap[index | (FASTTRAP_REX_X(rex) << 3)];
 400  405                                  tp->ftt_base = (mod == 0 && base == 5) ?
 401  406                                      FASTTRAP_NOREG :
 402  407                                      regmap[base | (FASTTRAP_REX_B(rex) << 3)];
 403  408  
 404  409                                  i = 3;
 405  410                                  sz = mod == 1 ? 1 : 4;
 406  411                          } else {
 407  412                                  /*
 408  413                                   * In 64-bit mode, mod == 0 and r/m == 5
 409  414                                   * denotes %rip-relative addressing; in 32-bit
 410  415                                   * mode, the base register isn't used. In both
 411  416                                   * modes, there is a 32-bit operand.
 412  417                                   */
 413  418                                  if (mod == 0 && rm == 5) {
 414  419  #ifdef __amd64
 415  420                                          if (p->p_model == DATAMODEL_LP64)
 416  421                                                  tp->ftt_base = REG_RIP;
 417  422                                          else
 418  423  #endif
 419  424                                                  tp->ftt_base = FASTTRAP_NOREG;
 420  425                                          sz = 4;
 421  426                                  } else  {
 422  427                                          uint8_t base = rm |
 423  428                                              (FASTTRAP_REX_B(rex) << 3);
 424  429  
 425  430                                          tp->ftt_base = regmap[base];
 426  431                                          sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
 427  432                                  }
 428  433                                  tp->ftt_index = FASTTRAP_NOREG;
 429  434                                  i = 2;
 430  435                          }
 431  436  
 432  437                          if (sz == 1) {
 433  438                                  tp->ftt_dest = *(int8_t *)&instr[start + i];
 434  439                          } else if (sz == 4) {
 435  440                                  /* LINTED - alignment */
 436  441                                  tp->ftt_dest = *(int32_t *)&instr[start + i];
 437  442                          } else {
 438  443                                  tp->ftt_dest = 0;
 439  444                          }
 440  445                  }
 441  446          } else {
 442  447                  switch (instr[start]) {
 443  448                  case FASTTRAP_RET:
 444  449                          tp->ftt_type = FASTTRAP_T_RET;
 445  450                          break;
 446  451  
 447  452                  case FASTTRAP_RET16:
 448  453                          tp->ftt_type = FASTTRAP_T_RET16;
 449  454                          /* LINTED - alignment */
 450  455                          tp->ftt_dest = *(uint16_t *)&instr[start + 1];
 451  456                          break;
 452  457  
 453  458                  case FASTTRAP_JO:
 454  459                  case FASTTRAP_JNO:
 455  460                  case FASTTRAP_JB:
 456  461                  case FASTTRAP_JAE:
 457  462                  case FASTTRAP_JE:
 458  463                  case FASTTRAP_JNE:
 459  464                  case FASTTRAP_JBE:
 460  465                  case FASTTRAP_JA:
 461  466                  case FASTTRAP_JS:
 462  467                  case FASTTRAP_JNS:
 463  468                  case FASTTRAP_JP:
 464  469                  case FASTTRAP_JNP:
 465  470                  case FASTTRAP_JL:
 466  471                  case FASTTRAP_JGE:
 467  472                  case FASTTRAP_JLE:
 468  473                  case FASTTRAP_JG:
 469  474                          tp->ftt_type = FASTTRAP_T_JCC;
 470  475                          tp->ftt_code = instr[start];
 471  476                          tp->ftt_dest = pc + tp->ftt_size +
 472  477                              (int8_t)instr[start + 1];
 473  478                          break;
 474  479  
 475  480                  case FASTTRAP_LOOPNZ:
 476  481                  case FASTTRAP_LOOPZ:
 477  482                  case FASTTRAP_LOOP:
 478  483                          tp->ftt_type = FASTTRAP_T_LOOP;
 479  484                          tp->ftt_code = instr[start];
 480  485                          tp->ftt_dest = pc + tp->ftt_size +
 481  486                              (int8_t)instr[start + 1];
 482  487                          break;
 483  488  
 484  489                  case FASTTRAP_JCXZ:
 485  490                          tp->ftt_type = FASTTRAP_T_JCXZ;
 486  491                          tp->ftt_dest = pc + tp->ftt_size +
 487  492                              (int8_t)instr[start + 1];
 488  493                          break;
 489  494  
 490  495                  case FASTTRAP_CALL:
 491  496                          tp->ftt_type = FASTTRAP_T_CALL;
 492  497                          tp->ftt_dest = pc + tp->ftt_size +
 493  498                              /* LINTED - alignment */
 494  499                              *(int32_t *)&instr[start + 1];
 495  500                          tp->ftt_code = 0;
 496  501                          break;
 497  502  
 498  503                  case FASTTRAP_JMP32:
 499  504                          tp->ftt_type = FASTTRAP_T_JMP;
 500  505                          tp->ftt_dest = pc + tp->ftt_size +
 501  506                              /* LINTED - alignment */
 502  507                              *(int32_t *)&instr[start + 1];
 503  508                          break;
 504  509                  case FASTTRAP_JMP8:
 505  510                          tp->ftt_type = FASTTRAP_T_JMP;
 506  511                          tp->ftt_dest = pc + tp->ftt_size +
 507  512                              (int8_t)instr[start + 1];
 508  513                          break;
 509  514  
 510  515                  case FASTTRAP_PUSHL_EBP:
 511  516                          if (start == 0)
 512  517                                  tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
 513  518                          break;
 514  519  
 515  520                  case FASTTRAP_NOP:
 516  521  #ifdef __amd64
 517  522                          ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 518  523  
 519  524                          /*
 520  525                           * On amd64 we have to be careful not to confuse a nop
 521  526                           * (actually xchgl %eax, %eax) with an instruction using
 522  527                           * the same opcode, but that does something different
 523  528                           * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
 524  529                           */
 525  530                          if (FASTTRAP_REX_B(rex) == 0)
 526  531  #endif
 527  532                                  tp->ftt_type = FASTTRAP_T_NOP;
 528  533                          break;
 529  534  
 530  535                  case FASTTRAP_INT3:
 531  536                          /*
 532  537                           * The pid provider shares the int3 trap with debugger
 533  538                           * breakpoints so we can't instrument them.
 534  539                           */
 535  540                          ASSERT(instr[start] == FASTTRAP_INSTR);
 536  541                          return (-1);
 537  542  
 538  543                  case FASTTRAP_INT:
 539  544                          /*
 540  545                           * Interrupts seem like they could be traced with
 541  546                           * no negative implications, but it's possible that
 542  547                           * a thread could be redirected by the trap handling
 543  548                           * code which would eventually return to the
 544  549                           * instruction after the interrupt. If the interrupt
 545  550                           * were in our scratch space, the subsequent
 546  551                           * instruction might be overwritten before we return.
 547  552                           * Accordingly we refuse to instrument any interrupt.
 548  553                           */
 549  554                          return (-1);
 550  555                  }
 551  556          }
 552  557  
 553  558  #ifdef __amd64
 554  559          if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
 555  560                  /*
 556  561                   * If the process is 64-bit and the instruction type is still
 557  562                   * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
 558  563                   * execute it -- we need to watch for %rip-relative
 559  564                   * addressing mode. See the portion of fasttrap_pid_probe()
 560  565                   * below where we handle tracepoints with type
 561  566                   * FASTTRAP_T_COMMON for how we emulate instructions that
 562  567                   * employ %rip-relative addressing.
 563  568                   */
 564  569                  if (rmindex != -1) {
 565  570                          uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
 566  571                          uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
 567  572                          uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
 568  573  
 569  574                          ASSERT(rmindex > start);
 570  575  
 571  576                          if (mod == 0 && rm == 5) {
 572  577                                  /*
 573  578                                   * We need to be sure to avoid other
 574  579                                   * registers used by this instruction. While
 575  580                                   * the reg field may determine the op code
 576  581                                   * rather than denoting a register, assuming
 577  582                                   * that it denotes a register is always safe.
 578  583                                   * We leave the REX field intact and use
 579  584                                   * whatever value's there for simplicity.
 580  585                                   */
 581  586                                  if (reg != 0) {
 582  587                                          tp->ftt_ripmode = FASTTRAP_RIP_1 |
 583  588                                              (FASTTRAP_RIP_X *
 584  589                                              FASTTRAP_REX_B(rex));
 585  590                                          rm = 0;
 586  591                                  } else {
 587  592                                          tp->ftt_ripmode = FASTTRAP_RIP_2 |
 588  593                                              (FASTTRAP_RIP_X *
 589  594                                              FASTTRAP_REX_B(rex));
 590  595                                          rm = 1;
 591  596                                  }
 592  597  
 593  598                                  tp->ftt_modrm = tp->ftt_instr[rmindex];
 594  599                                  tp->ftt_instr[rmindex] =
 595  600                                      FASTTRAP_MODRM(2, reg, rm);
 596  601                          }
 597  602                  }
 598  603          }
 599  604  #endif
 600  605  
 601  606          return (0);
 602  607  }
 603  608  
 604  609  int
 605  610  fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
 606  611  {
 607  612          fasttrap_instr_t instr = FASTTRAP_INSTR;
 608  613  
 609  614          if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
 610  615                  return (-1);
 611  616  
 612  617          return (0);
 613  618  }
 614  619  
 615  620  int
 616  621  fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
 617  622  {
 618  623          uint8_t instr;
 619  624  
 620  625          /*
 621  626           * Distinguish between read or write failures and a changed
 622  627           * instruction.
 623  628           */
 624  629          if (uread(p, &instr, 1, tp->ftt_pc) != 0)
 625  630                  return (0);
 626  631          if (instr != FASTTRAP_INSTR)
 627  632                  return (0);
 628  633          if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
 629  634                  return (-1);
 630  635  
 631  636          return (0);
 632  637  }
 633  638  
 634  639  #ifdef __amd64
 635  640  static uintptr_t
 636  641  fasttrap_fulword_noerr(const void *uaddr)
 637  642  {
 638  643          uintptr_t ret;
 639  644  
 640  645          if (fasttrap_fulword(uaddr, &ret) == 0)
 641  646                  return (ret);
 642  647  
 643  648          return (0);
 644  649  }
 645  650  #endif
 646  651  
 647  652  static uint32_t
 648  653  fasttrap_fuword32_noerr(const void *uaddr)
 649  654  {
 650  655          uint32_t ret;
 651  656  
 652  657          if (fasttrap_fuword32(uaddr, &ret) == 0)
 653  658                  return (ret);
 654  659  
 655  660          return (0);
 656  661  }
 657  662  
 658  663  static void
 659  664  fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
 660  665      uintptr_t new_pc)
 661  666  {
 662  667          fasttrap_tracepoint_t *tp;
 663  668          fasttrap_bucket_t *bucket;
 664  669          fasttrap_id_t *id;
 665  670          kmutex_t *pid_mtx;
 666  671  
 667  672          pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 668  673          mutex_enter(pid_mtx);
 669  674          bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 670  675  
 671  676          for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 672  677                  if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 673  678                      tp->ftt_proc->ftpc_acount != 0)
 674  679                          break;
 675  680          }
 676  681  
 677  682          /*
 678  683           * Don't sweat it if we can't find the tracepoint again; unlike
 679  684           * when we're in fasttrap_pid_probe(), finding the tracepoint here
 680  685           * is not essential to the correct execution of the process.
 681  686           */
 682  687          if (tp == NULL) {
 683  688                  mutex_exit(pid_mtx);
 684  689                  return;
 685  690          }
 686  691  
 687  692          for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
 688  693                  /*
 689  694                   * If there's a branch that could act as a return site, we
 690  695                   * need to trace it, and check here if the program counter is
 691  696                   * external to the function.
 692  697                   */
 693  698                  if (tp->ftt_type != FASTTRAP_T_RET &&
 694  699                      tp->ftt_type != FASTTRAP_T_RET16 &&
 695  700                      new_pc - id->fti_probe->ftp_faddr <
 696  701                      id->fti_probe->ftp_fsize)
 697  702                          continue;
 698  703  
 699  704                  dtrace_probe(id->fti_probe->ftp_id,
 700  705                      pc - id->fti_probe->ftp_faddr,
 701  706                      rp->r_r0, rp->r_r1, 0, 0);
 702  707          }
 703  708  
 704  709          mutex_exit(pid_mtx);
 705  710  }
 706  711  
 707  712  static void
 708  713  fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
 709  714  {
 710  715          sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 711  716  
 712  717          sqp->sq_info.si_signo = SIGSEGV;
 713  718          sqp->sq_info.si_code = SEGV_MAPERR;
 714  719          sqp->sq_info.si_addr = (caddr_t)addr;
 715  720  
 716  721          mutex_enter(&p->p_lock);
 717  722          sigaddqa(p, t, sqp);
 718  723          mutex_exit(&p->p_lock);
 719  724  
 720  725          if (t != NULL)
 721  726                  aston(t);
 722  727  }
 723  728  
 724  729  #ifdef __amd64
 725  730  static void
 726  731  fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc,
 727  732      uintptr_t *argv)
 728  733  {
 729  734          int i, x, cap = MIN(argc, probe->ftp_nargs);
 730  735          uintptr_t *stack = (uintptr_t *)rp->r_sp;
 731  736  
 732  737          for (i = 0; i < cap; i++) {
 733  738                  x = probe->ftp_argmap[i];
 734  739  
 735  740                  if (x < 6)
 736  741                          argv[i] = (&rp->r_rdi)[x];
 737  742                  else
 738  743                          argv[i] = fasttrap_fulword_noerr(&stack[x]);
 739  744          }
 740  745  
 741  746          for (; i < argc; i++) {
 742  747                  argv[i] = 0;
 743  748          }
 744  749  }
 745  750  #endif
 746  751  
 747  752  static void
 748  753  fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc,
 749  754      uint32_t *argv)
 750  755  {
 751  756          int i, x, cap = MIN(argc, probe->ftp_nargs);
 752  757          uint32_t *stack = (uint32_t *)rp->r_sp;
 753  758  
 754  759          for (i = 0; i < cap; i++) {
 755  760                  x = probe->ftp_argmap[i];
 756  761  
 757  762                  argv[i] = fasttrap_fuword32_noerr(&stack[x]);
 758  763          }
 759  764  
 760  765          for (; i < argc; i++) {
 761  766                  argv[i] = 0;
 762  767          }
 763  768  }
 764  769  
 765  770  static int
 766  771  fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr)
 767  772  {
 768  773          proc_t *p = curproc;
 769  774          user_desc_t *desc;
 770  775          uint16_t sel, ndx, type;
 771  776          uintptr_t limit;
 772  777  
 773  778          switch (tp->ftt_segment) {
 774  779          case FASTTRAP_SEG_CS:
 775  780                  sel = rp->r_cs;
 776  781                  break;
 777  782          case FASTTRAP_SEG_DS:
 778  783                  sel = rp->r_ds;
 779  784                  break;
 780  785          case FASTTRAP_SEG_ES:
 781  786                  sel = rp->r_es;
 782  787                  break;
 783  788          case FASTTRAP_SEG_FS:
 784  789                  sel = rp->r_fs;
 785  790                  break;
 786  791          case FASTTRAP_SEG_GS:
 787  792                  sel = rp->r_gs;
 788  793                  break;
 789  794          case FASTTRAP_SEG_SS:
 790  795                  sel = rp->r_ss;
 791  796                  break;
 792  797          }
 793  798  
 794  799          /*
 795  800           * Make sure the given segment register specifies a user priority
 796  801           * selector rather than a kernel selector.
 797  802           */
 798  803          if (!SELISUPL(sel))
 799  804                  return (-1);
 800  805  
 801  806          ndx = SELTOIDX(sel);
 802  807  
 803  808          /*
 804  809           * Check the bounds and grab the descriptor out of the specified
 805  810           * descriptor table.
 806  811           */
 807  812          if (SELISLDT(sel)) {
 808  813                  if (ndx > p->p_ldtlimit)
 809  814                          return (-1);
 810  815  
 811  816                  desc = p->p_ldt + ndx;
 812  817  
 813  818          } else {
 814  819                  if (ndx >= NGDT)
 815  820                          return (-1);
 816  821  
 817  822                  desc = cpu_get_gdt() + ndx;
 818  823          }
 819  824  
 820  825          /*
 821  826           * The descriptor must have user privilege level and it must be
 822  827           * present in memory.
 823  828           */
 824  829          if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1)
 825  830                  return (-1);
 826  831  
 827  832          type = desc->usd_type;
 828  833  
 829  834          /*
 830  835           * If the S bit in the type field is not set, this descriptor can
 831  836           * only be used in system context.
 832  837           */
 833  838          if ((type & 0x10) != 0x10)
 834  839                  return (-1);
 835  840  
 836  841          limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1);
 837  842  
 838  843          if (tp->ftt_segment == FASTTRAP_SEG_CS) {
 839  844                  /*
 840  845                   * The code/data bit and readable bit must both be set.
 841  846                   */
 842  847                  if ((type & 0xa) != 0xa)
 843  848                          return (-1);
 844  849  
 845  850                  if (*addr > limit)
 846  851                          return (-1);
 847  852          } else {
 848  853                  /*
 849  854                   * The code/data bit must be clear.
 850  855                   */
 851  856                  if ((type & 0x8) != 0)
 852  857                          return (-1);
 853  858  
 854  859                  /*
 855  860                   * If the expand-down bit is clear, we just check the limit as
 856  861                   * it would naturally be applied. Otherwise, we need to check
 857  862                   * that the address is the range [limit + 1 .. 0xffff] or
 858  863                   * [limit + 1 ... 0xffffffff] depending on if the default
 859  864                   * operand size bit is set.
 860  865                   */
 861  866                  if ((type & 0x4) == 0) {
 862  867                          if (*addr > limit)
 863  868                                  return (-1);
 864  869                  } else if (desc->usd_def32) {
 865  870                          if (*addr < limit + 1 || 0xffff < *addr)
 866  871                                  return (-1);
 867  872                  } else {
 868  873                          if (*addr < limit + 1 || 0xffffffff < *addr)
 869  874                                  return (-1);
 870  875                  }
 871  876          }
 872  877  
 873  878          *addr += USEGD_GETBASE(desc);
 874  879  
 875  880          return (0);
 876  881  }
 877  882  
 878  883  int
 879  884  fasttrap_pid_probe(struct regs *rp)
 880  885  {
 881  886          proc_t *p = curproc;
 882  887          uintptr_t pc = rp->r_pc - 1, new_pc = 0;
 883  888          fasttrap_bucket_t *bucket;
 884  889          kmutex_t *pid_mtx;
 885  890          fasttrap_tracepoint_t *tp, tp_local;
 886  891          pid_t pid;
 887  892          dtrace_icookie_t cookie;
 888  893          uint_t is_enabled = 0;
 889  894  
 890  895          /*
 891  896           * It's possible that a user (in a veritable orgy of bad planning)
 892  897           * could redirect this thread's flow of control before it reached the
 893  898           * return probe fasttrap. In this case we need to kill the process
 894  899           * since it's in a unrecoverable state.
 895  900           */
 896  901          if (curthread->t_dtrace_step) {
 897  902                  ASSERT(curthread->t_dtrace_on);
 898  903                  fasttrap_sigtrap(p, curthread, pc);
 899  904                  return (0);
 900  905          }
 901  906  
 902  907          /*
 903  908           * Clear all user tracing flags.
 904  909           */
 905  910          curthread->t_dtrace_ft = 0;
 906  911          curthread->t_dtrace_pc = 0;
 907  912          curthread->t_dtrace_npc = 0;
 908  913          curthread->t_dtrace_scrpc = 0;
 909  914          curthread->t_dtrace_astpc = 0;
 910  915  #ifdef __amd64
 911  916          curthread->t_dtrace_regv = 0;
 912  917  #endif
 913  918  
 914  919          /*
 915  920           * Treat a child created by a call to vfork(2) as if it were its
 916  921           * parent. We know that there's only one thread of control in such a
 917  922           * process: this one.
 918  923           */
 919  924          while (p->p_flag & SVFORK) {
 920  925                  p = p->p_parent;
 921  926          }
 922  927  
 923  928          pid = p->p_pid;
 924  929          pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 925  930          mutex_enter(pid_mtx);
 926  931          bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 927  932  
 928  933          /*
 929  934           * Lookup the tracepoint that the process just hit.
 930  935           */
 931  936          for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 932  937                  if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 933  938                      tp->ftt_proc->ftpc_acount != 0)
 934  939                          break;
 935  940          }
 936  941  
 937  942          /*
 938  943           * If we couldn't find a matching tracepoint, either a tracepoint has
 939  944           * been inserted without using the pid<pid> ioctl interface (see
 940  945           * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
 941  946           */
 942  947          if (tp == NULL) {
 943  948                  mutex_exit(pid_mtx);
 944  949                  return (-1);
 945  950          }
 946  951  
 947  952          /*
 948  953           * Set the program counter to the address of the traced instruction
 949  954           * so that it looks right in ustack() output.
 950  955           */
 951  956          rp->r_pc = pc;
 952  957  
 953  958          if (tp->ftt_ids != NULL) {
 954  959                  fasttrap_id_t *id;
 955  960  
 956  961  #ifdef __amd64
 957  962                  if (p->p_model == DATAMODEL_LP64) {
 958  963                          for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 959  964                                  fasttrap_probe_t *probe = id->fti_probe;
 960  965  
 961  966                                  if (id->fti_ptype == DTFTP_ENTRY) {
 962  967                                          /*
 963  968                                           * We note that this was an entry
 964  969                                           * probe to help ustack() find the
 965  970                                           * first caller.
 966  971                                           */
 967  972                                          cookie = dtrace_interrupt_disable();
 968  973                                          DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 969  974                                          dtrace_probe(probe->ftp_id, rp->r_rdi,
 970  975                                              rp->r_rsi, rp->r_rdx, rp->r_rcx,
 971  976                                              rp->r_r8);
 972  977                                          DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 973  978                                          dtrace_interrupt_enable(cookie);
 974  979                                  } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
 975  980                                          /*
 976  981                                           * Note that in this case, we don't
 977  982                                           * call dtrace_probe() since it's only
 978  983                                           * an artificial probe meant to change
 979  984                                           * the flow of control so that it
 980  985                                           * encounters the true probe.
 981  986                                           */
 982  987                                          is_enabled = 1;
 983  988                                  } else if (probe->ftp_argmap == NULL) {
 984  989                                          dtrace_probe(probe->ftp_id, rp->r_rdi,
 985  990                                              rp->r_rsi, rp->r_rdx, rp->r_rcx,
 986  991                                              rp->r_r8);
 987  992                                  } else {
 988  993                                          uintptr_t t[5];
 989  994  
 990  995                                          fasttrap_usdt_args64(probe, rp,
 991  996                                              sizeof (t) / sizeof (t[0]), t);
 992  997  
 993  998                                          dtrace_probe(probe->ftp_id, t[0], t[1],
 994  999                                              t[2], t[3], t[4]);
 995 1000                                  }
 996 1001                          }
 997 1002                  } else {
 998 1003  #endif
 999 1004                          uintptr_t s0, s1, s2, s3, s4, s5;
1000 1005                          uint32_t *stack = (uint32_t *)rp->r_sp;
1001 1006  
1002 1007                          /*
1003 1008                           * In 32-bit mode, all arguments are passed on the
1004 1009                           * stack. If this is a function entry probe, we need
1005 1010                           * to skip the first entry on the stack as it
1006 1011                           * represents the return address rather than a
1007 1012                           * parameter to the function.
1008 1013                           */
1009 1014                          s0 = fasttrap_fuword32_noerr(&stack[0]);
1010 1015                          s1 = fasttrap_fuword32_noerr(&stack[1]);
1011 1016                          s2 = fasttrap_fuword32_noerr(&stack[2]);
1012 1017                          s3 = fasttrap_fuword32_noerr(&stack[3]);
1013 1018                          s4 = fasttrap_fuword32_noerr(&stack[4]);
1014 1019                          s5 = fasttrap_fuword32_noerr(&stack[5]);
1015 1020  
1016 1021                          for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1017 1022                                  fasttrap_probe_t *probe = id->fti_probe;
1018 1023  
1019 1024                                  if (id->fti_ptype == DTFTP_ENTRY) {
1020 1025                                          /*
1021 1026                                           * We note that this was an entry
1022 1027                                           * probe to help ustack() find the
1023 1028                                           * first caller.
1024 1029                                           */
1025 1030                                          cookie = dtrace_interrupt_disable();
1026 1031                                          DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1027 1032                                          dtrace_probe(probe->ftp_id, s1, s2,
1028 1033                                              s3, s4, s5);
1029 1034                                          DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1030 1035                                          dtrace_interrupt_enable(cookie);
1031 1036                                  } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1032 1037                                          /*
1033 1038                                           * Note that in this case, we don't
1034 1039                                           * call dtrace_probe() since it's only
1035 1040                                           * an artificial probe meant to change
1036 1041                                           * the flow of control so that it
1037 1042                                           * encounters the true probe.
1038 1043                                           */
1039 1044                                          is_enabled = 1;
1040 1045                                  } else if (probe->ftp_argmap == NULL) {
1041 1046                                          dtrace_probe(probe->ftp_id, s0, s1,
1042 1047                                              s2, s3, s4);
1043 1048                                  } else {
1044 1049                                          uint32_t t[5];
1045 1050  
1046 1051                                          fasttrap_usdt_args32(probe, rp,
1047 1052                                              sizeof (t) / sizeof (t[0]), t);
1048 1053  
1049 1054                                          dtrace_probe(probe->ftp_id, t[0], t[1],
1050 1055                                              t[2], t[3], t[4]);
1051 1056                                  }
1052 1057                          }
1053 1058  #ifdef __amd64
1054 1059                  }
1055 1060  #endif
1056 1061          }
1057 1062  
1058 1063          /*
1059 1064           * We're about to do a bunch of work so we cache a local copy of
1060 1065           * the tracepoint to emulate the instruction, and then find the
1061 1066           * tracepoint again later if we need to light up any return probes.
1062 1067           */
1063 1068          tp_local = *tp;
1064 1069          mutex_exit(pid_mtx);
1065 1070          tp = &tp_local;
1066 1071  
1067 1072          /*
1068 1073           * Set the program counter to appear as though the traced instruction
1069 1074           * had completely executed. This ensures that fasttrap_getreg() will
1070 1075           * report the expected value for REG_RIP.
1071 1076           */
1072 1077          rp->r_pc = pc + tp->ftt_size;
1073 1078  
1074 1079          /*
1075 1080           * If there's an is-enabled probe connected to this tracepoint it
1076 1081           * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1077 1082           * instruction that was placed there by DTrace when the binary was
1078 1083           * linked. As this probe is, in fact, enabled, we need to stuff 1
1079 1084           * into %eax or %rax. Accordingly, we can bypass all the instruction
1080 1085           * emulation logic since we know the inevitable result. It's possible
1081 1086           * that a user could construct a scenario where the 'is-enabled'
1082 1087           * probe was on some other instruction, but that would be a rather
1083 1088           * exotic way to shoot oneself in the foot.
1084 1089           */
1085 1090          if (is_enabled) {
1086 1091                  rp->r_r0 = 1;
1087 1092                  new_pc = rp->r_pc;
1088 1093                  goto done;
1089 1094          }
1090 1095  
1091 1096          /*
1092 1097           * We emulate certain types of instructions to ensure correctness
1093 1098           * (in the case of position dependent instructions) or optimize
1094 1099           * common cases. The rest we have the thread execute back in user-
1095 1100           * land.
1096 1101           */
1097 1102          switch (tp->ftt_type) {
1098 1103          case FASTTRAP_T_RET:
1099 1104          case FASTTRAP_T_RET16:
1100 1105          {
1101 1106                  uintptr_t dst;
1102 1107                  uintptr_t addr;
1103 1108                  int ret;
1104 1109  
1105 1110                  /*
1106 1111                   * We have to emulate _every_ facet of the behavior of a ret
1107 1112                   * instruction including what happens if the load from %esp
1108 1113                   * fails; in that case, we send a SIGSEGV.
1109 1114                   */
1110 1115  #ifdef __amd64
1111 1116                  if (p->p_model == DATAMODEL_NATIVE) {
1112 1117  #endif
1113 1118                          ret = fasttrap_fulword((void *)rp->r_sp, &dst);
1114 1119                          addr = rp->r_sp + sizeof (uintptr_t);
1115 1120  #ifdef __amd64
1116 1121                  } else {
1117 1122                          uint32_t dst32;
1118 1123                          ret = fasttrap_fuword32((void *)rp->r_sp, &dst32);
1119 1124                          dst = dst32;
1120 1125                          addr = rp->r_sp + sizeof (uint32_t);
1121 1126                  }
1122 1127  #endif
1123 1128  
1124 1129                  if (ret == -1) {
1125 1130                          fasttrap_sigsegv(p, curthread, rp->r_sp);
1126 1131                          new_pc = pc;
1127 1132                          break;
1128 1133                  }
1129 1134  
1130 1135                  if (tp->ftt_type == FASTTRAP_T_RET16)
1131 1136                          addr += tp->ftt_dest;
1132 1137  
1133 1138                  rp->r_sp = addr;
1134 1139                  new_pc = dst;
1135 1140                  break;
1136 1141          }
1137 1142  
1138 1143          case FASTTRAP_T_JCC:
1139 1144          {
1140 1145                  uint_t taken;
1141 1146  
1142 1147                  switch (tp->ftt_code) {
1143 1148                  case FASTTRAP_JO:
1144 1149                          taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0;
1145 1150                          break;
1146 1151                  case FASTTRAP_JNO:
1147 1152                          taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0;
1148 1153                          break;
1149 1154                  case FASTTRAP_JB:
1150 1155                          taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0;
1151 1156                          break;
1152 1157                  case FASTTRAP_JAE:
1153 1158                          taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0;
1154 1159                          break;
1155 1160                  case FASTTRAP_JE:
1156 1161                          taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1157 1162                          break;
1158 1163                  case FASTTRAP_JNE:
1159 1164                          taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1160 1165                          break;
1161 1166                  case FASTTRAP_JBE:
1162 1167                          taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 ||
1163 1168                              (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1164 1169                          break;
1165 1170                  case FASTTRAP_JA:
1166 1171                          taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 &&
1167 1172                              (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1168 1173                          break;
1169 1174                  case FASTTRAP_JS:
1170 1175                          taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0;
1171 1176                          break;
1172 1177                  case FASTTRAP_JNS:
1173 1178                          taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0;
1174 1179                          break;
1175 1180                  case FASTTRAP_JP:
1176 1181                          taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0;
1177 1182                          break;
1178 1183                  case FASTTRAP_JNP:
1179 1184                          taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0;
1180 1185                          break;
1181 1186                  case FASTTRAP_JL:
1182 1187                          taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1183 1188                              ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1184 1189                          break;
1185 1190                  case FASTTRAP_JGE:
1186 1191                          taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1187 1192                              ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1188 1193                          break;
1189 1194                  case FASTTRAP_JLE:
1190 1195                          taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 ||
1191 1196                              ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1192 1197                              ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1193 1198                          break;
1194 1199                  case FASTTRAP_JG:
1195 1200                          taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1196 1201                              ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1197 1202                              ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1198 1203                          break;
1199 1204  
1200 1205                  }
1201 1206  
1202 1207                  if (taken)
1203 1208                          new_pc = tp->ftt_dest;
1204 1209                  else
1205 1210                          new_pc = pc + tp->ftt_size;
1206 1211                  break;
1207 1212          }
1208 1213  
1209 1214          case FASTTRAP_T_LOOP:
1210 1215          {
1211 1216                  uint_t taken;
1212 1217  #ifdef __amd64
1213 1218                  greg_t cx = rp->r_rcx--;
1214 1219  #else
1215 1220                  greg_t cx = rp->r_ecx--;
1216 1221  #endif
1217 1222  
1218 1223                  switch (tp->ftt_code) {
1219 1224                  case FASTTRAP_LOOPNZ:
1220 1225                          taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1221 1226                              cx != 0;
1222 1227                          break;
1223 1228                  case FASTTRAP_LOOPZ:
1224 1229                          taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 &&
1225 1230                              cx != 0;
1226 1231                          break;
1227 1232                  case FASTTRAP_LOOP:
1228 1233                          taken = (cx != 0);
1229 1234                          break;
1230 1235                  }
1231 1236  
1232 1237                  if (taken)
1233 1238                          new_pc = tp->ftt_dest;
1234 1239                  else
1235 1240                          new_pc = pc + tp->ftt_size;
1236 1241                  break;
1237 1242          }
1238 1243  
1239 1244          case FASTTRAP_T_JCXZ:
1240 1245          {
1241 1246  #ifdef __amd64
1242 1247                  greg_t cx = rp->r_rcx;
1243 1248  #else
1244 1249                  greg_t cx = rp->r_ecx;
1245 1250  #endif
1246 1251  
1247 1252                  if (cx == 0)
1248 1253                          new_pc = tp->ftt_dest;
1249 1254                  else
1250 1255                          new_pc = pc + tp->ftt_size;
1251 1256                  break;
1252 1257          }
1253 1258  
1254 1259          case FASTTRAP_T_PUSHL_EBP:
1255 1260          {
1256 1261                  int ret;
1257 1262                  uintptr_t addr;
1258 1263  #ifdef __amd64
1259 1264                  if (p->p_model == DATAMODEL_NATIVE) {
1260 1265  #endif
1261 1266                          addr = rp->r_sp - sizeof (uintptr_t);
1262 1267                          ret = fasttrap_sulword((void *)addr, rp->r_fp);
1263 1268  #ifdef __amd64
1264 1269                  } else {
1265 1270                          addr = rp->r_sp - sizeof (uint32_t);
1266 1271                          ret = fasttrap_suword32((void *)addr,
1267 1272                              (uint32_t)rp->r_fp);
1268 1273                  }
1269 1274  #endif
1270 1275  
1271 1276                  if (ret == -1) {
1272 1277                          fasttrap_sigsegv(p, curthread, addr);
1273 1278                          new_pc = pc;
1274 1279                          break;
1275 1280                  }
1276 1281  
1277 1282                  rp->r_sp = addr;
1278 1283                  new_pc = pc + tp->ftt_size;
1279 1284                  break;
1280 1285          }
1281 1286  
1282 1287          case FASTTRAP_T_NOP:
1283 1288                  new_pc = pc + tp->ftt_size;
1284 1289                  break;
1285 1290  
1286 1291          case FASTTRAP_T_JMP:
1287 1292          case FASTTRAP_T_CALL:
1288 1293                  if (tp->ftt_code == 0) {
1289 1294                          new_pc = tp->ftt_dest;
1290 1295                  } else {
1291 1296                          uintptr_t value, addr = tp->ftt_dest;
1292 1297  
1293 1298                          if (tp->ftt_base != FASTTRAP_NOREG)
1294 1299                                  addr += fasttrap_getreg(rp, tp->ftt_base);
1295 1300                          if (tp->ftt_index != FASTTRAP_NOREG)
1296 1301                                  addr += fasttrap_getreg(rp, tp->ftt_index) <<
1297 1302                                      tp->ftt_scale;
1298 1303  
1299 1304                          if (tp->ftt_code == 1) {
1300 1305                                  /*
1301 1306                                   * If there's a segment prefix for this
1302 1307                                   * instruction, we'll need to check permissions
1303 1308                                   * and bounds on the given selector, and adjust
1304 1309                                   * the address accordingly.
1305 1310                                   */
1306 1311                                  if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1307 1312                                      fasttrap_do_seg(tp, rp, &addr) != 0) {
1308 1313                                          fasttrap_sigsegv(p, curthread, addr);
1309 1314                                          new_pc = pc;
1310 1315                                          break;
1311 1316                                  }
1312 1317  
1313 1318  #ifdef __amd64
1314 1319                                  if (p->p_model == DATAMODEL_NATIVE) {
1315 1320  #endif
1316 1321                                          if (fasttrap_fulword((void *)addr,
1317 1322                                              &value) == -1) {
1318 1323                                                  fasttrap_sigsegv(p, curthread,
1319 1324                                                      addr);
1320 1325                                                  new_pc = pc;
1321 1326                                                  break;
1322 1327                                          }
1323 1328                                          new_pc = value;
1324 1329  #ifdef __amd64
1325 1330                                  } else {
1326 1331                                          uint32_t value32;
1327 1332                                          addr = (uintptr_t)(uint32_t)addr;
1328 1333                                          if (fasttrap_fuword32((void *)addr,
1329 1334                                              &value32) == -1) {
1330 1335                                                  fasttrap_sigsegv(p, curthread,
1331 1336                                                      addr);
1332 1337                                                  new_pc = pc;
1333 1338                                                  break;
1334 1339                                          }
1335 1340                                          new_pc = value32;
1336 1341                                  }
1337 1342  #endif
1338 1343                          } else {
1339 1344                                  new_pc = addr;
1340 1345                          }
1341 1346                  }
1342 1347  
1343 1348                  /*
1344 1349                   * If this is a call instruction, we need to push the return
1345 1350                   * address onto the stack. If this fails, we send the process
1346 1351                   * a SIGSEGV and reset the pc to emulate what would happen if
1347 1352                   * this instruction weren't traced.
1348 1353                   */
1349 1354                  if (tp->ftt_type == FASTTRAP_T_CALL) {
1350 1355                          int ret;
1351 1356                          uintptr_t addr;
1352 1357  #ifdef __amd64
1353 1358                          if (p->p_model == DATAMODEL_NATIVE) {
1354 1359                                  addr = rp->r_sp - sizeof (uintptr_t);
1355 1360                                  ret = fasttrap_sulword((void *)addr,
1356 1361                                      pc + tp->ftt_size);
1357 1362                          } else {
1358 1363  #endif
1359 1364                                  addr = rp->r_sp - sizeof (uint32_t);
1360 1365                                  ret = fasttrap_suword32((void *)addr,
1361 1366                                      (uint32_t)(pc + tp->ftt_size));
1362 1367  #ifdef __amd64
1363 1368                          }
1364 1369  #endif
1365 1370  
1366 1371                          if (ret == -1) {
1367 1372                                  fasttrap_sigsegv(p, curthread, addr);
1368 1373                                  new_pc = pc;
1369 1374                                  break;
1370 1375                          }
1371 1376  
1372 1377                          rp->r_sp = addr;
1373 1378                  }
1374 1379  
1375 1380                  break;
1376 1381  
1377 1382          case FASTTRAP_T_COMMON:
1378 1383          {
1379 1384                  uintptr_t addr;
1380 1385  #if defined(__amd64)
1381 1386                  uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1382 1387  #else
1383 1388                  uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
1384 1389  #endif
1385 1390                  uint_t i = 0;
1386 1391                  klwp_t *lwp = ttolwp(curthread);
  
    | 
      ↓ open down ↓ | 
    1336 lines elided | 
    
      ↑ open up ↑ | 
  
1387 1392  
1388 1393                  /*
1389 1394                   * Compute the address of the ulwp_t and step over the
1390 1395                   * ul_self pointer. The method used to store the user-land
1391 1396                   * thread pointer is very different on 32- and 64-bit
1392 1397                   * kernels.
1393 1398                   */
1394 1399  #if defined(__amd64)
1395 1400                  if (p->p_model == DATAMODEL_LP64) {
1396 1401                          addr = lwp->lwp_pcb.pcb_fsbase;
     1402 +
     1403 +                        /*
     1404 +                         * If we're branded, convert the fsbase from the
     1405 +                         * brand's fsbase to the native fsbase.
     1406 +                         */
     1407 +                        if (PROC_IS_BRANDED(p) && BRMOP(p)->b_fsbase != NULL)
     1408 +                                addr = BRMOP(p)->b_fsbase(lwp, addr);
     1409 +
1397 1410                          addr += sizeof (void *);
1398 1411                  } else {
1399 1412                          addr = lwp->lwp_pcb.pcb_gsbase;
1400 1413                          addr += sizeof (caddr32_t);
1401 1414                  }
1402 1415  #else
1403 1416                  addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
1404 1417                  addr += sizeof (void *);
1405 1418  #endif
1406 1419  
1407 1420                  /*
1408 1421                   * Generic Instruction Tracing
1409 1422                   * ---------------------------
1410 1423                   *
1411 1424                   * This is the layout of the scratch space in the user-land
1412 1425                   * thread structure for our generated instructions.
1413 1426                   *
1414 1427                   *      32-bit mode                     bytes
1415 1428                   *      ------------------------        -----
1416 1429                   * a:   <original instruction>          <= 15
1417 1430                   *      jmp     <pc + tp->ftt_size>         5
1418 1431                   * b:   <original instrction>           <= 15
1419 1432                   *      int     T_DTRACE_RET                2
1420 1433                   *                                      -----
1421 1434                   *                                      <= 37
1422 1435                   *
1423 1436                   *      64-bit mode                     bytes
1424 1437                   *      ------------------------        -----
1425 1438                   * a:   <original instruction>          <= 15
1426 1439                   *      jmp     0(%rip)                     6
1427 1440                   *      <pc + tp->ftt_size>                 8
1428 1441                   * b:   <original instruction>          <= 15
1429 1442                   *      int     T_DTRACE_RET                2
1430 1443                   *                                      -----
1431 1444                   *                                      <= 46
1432 1445                   *
1433 1446                   * The %pc is set to a, and curthread->t_dtrace_astpc is set
1434 1447                   * to b. If we encounter a signal on the way out of the
1435 1448                   * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1436 1449                   * so that we execute the original instruction and re-enter
1437 1450                   * the kernel rather than redirecting to the next instruction.
1438 1451                   *
1439 1452                   * If there are return probes (so we know that we're going to
1440 1453                   * need to reenter the kernel after executing the original
1441 1454                   * instruction), the scratch space will just contain the
1442 1455                   * original instruction followed by an interrupt -- the same
1443 1456                   * data as at b.
1444 1457                   *
1445 1458                   * %rip-relative Addressing
1446 1459                   * ------------------------
1447 1460                   *
1448 1461                   * There's a further complication in 64-bit mode due to %rip-
1449 1462                   * relative addressing. While this is clearly a beneficial
1450 1463                   * architectural decision for position independent code, it's
1451 1464                   * hard not to see it as a personal attack against the pid
1452 1465                   * provider since before there was a relatively small set of
1453 1466                   * instructions to emulate; with %rip-relative addressing,
1454 1467                   * almost every instruction can potentially depend on the
1455 1468                   * address at which it's executed. Rather than emulating
1456 1469                   * the broad spectrum of instructions that can now be
1457 1470                   * position dependent, we emulate jumps and others as in
1458 1471                   * 32-bit mode, and take a different tack for instructions
1459 1472                   * using %rip-relative addressing.
1460 1473                   *
1461 1474                   * For every instruction that uses the ModRM byte, the
1462 1475                   * in-kernel disassembler reports its location. We use the
1463 1476                   * ModRM byte to identify that an instruction uses
1464 1477                   * %rip-relative addressing and to see what other registers
1465 1478                   * the instruction uses. To emulate those instructions,
1466 1479                   * we modify the instruction to be %rax-relative rather than
1467 1480                   * %rip-relative (or %rcx-relative if the instruction uses
1468 1481                   * %rax; or %r8- or %r9-relative if the REX.B is present so
1469 1482                   * we don't have to rewrite the REX prefix). We then load
1470 1483                   * the value that %rip would have been into the scratch
1471 1484                   * register and generate an instruction to reset the scratch
1472 1485                   * register back to its original value. The instruction
1473 1486                   * sequence looks like this:
1474 1487                   *
1475 1488                   *      64-mode %rip-relative           bytes
1476 1489                   *      ------------------------        -----
1477 1490                   * a:   <modified instruction>          <= 15
1478 1491                   *      movq    $<value>, %<scratch>        6
1479 1492                   *      jmp     0(%rip)                     6
1480 1493                   *      <pc + tp->ftt_size>                 8
1481 1494                   * b:   <modified instruction>          <= 15
1482 1495                   *      int     T_DTRACE_RET                2
1483 1496                   *                                      -----
1484 1497                   *                                         52
1485 1498                   *
1486 1499                   * We set curthread->t_dtrace_regv so that upon receiving
1487 1500                   * a signal we can reset the value of the scratch register.
1488 1501                   */
1489 1502  
1490 1503                  ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1491 1504  
1492 1505                  curthread->t_dtrace_scrpc = addr;
1493 1506                  bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1494 1507                  i += tp->ftt_size;
1495 1508  
1496 1509  #ifdef __amd64
1497 1510                  if (tp->ftt_ripmode != 0) {
1498 1511                          greg_t *reg;
1499 1512  
1500 1513                          ASSERT(p->p_model == DATAMODEL_LP64);
1501 1514                          ASSERT(tp->ftt_ripmode &
1502 1515                              (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1503 1516  
1504 1517                          /*
1505 1518                           * If this was a %rip-relative instruction, we change
1506 1519                           * it to be either a %rax- or %rcx-relative
1507 1520                           * instruction (depending on whether those registers
1508 1521                           * are used as another operand; or %r8- or %r9-
1509 1522                           * relative depending on the value of REX.B). We then
1510 1523                           * set that register and generate a movq instruction
1511 1524                           * to reset the value.
1512 1525                           */
1513 1526                          if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1514 1527                                  scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1515 1528                          else
1516 1529                                  scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1517 1530  
1518 1531                          if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1519 1532                                  scratch[i++] = FASTTRAP_MOV_EAX;
1520 1533                          else
1521 1534                                  scratch[i++] = FASTTRAP_MOV_ECX;
1522 1535  
1523 1536                          switch (tp->ftt_ripmode) {
1524 1537                          case FASTTRAP_RIP_1:
1525 1538                                  reg = &rp->r_rax;
1526 1539                                  curthread->t_dtrace_reg = REG_RAX;
1527 1540                                  break;
1528 1541                          case FASTTRAP_RIP_2:
1529 1542                                  reg = &rp->r_rcx;
1530 1543                                  curthread->t_dtrace_reg = REG_RCX;
1531 1544                                  break;
1532 1545                          case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1533 1546                                  reg = &rp->r_r8;
1534 1547                                  curthread->t_dtrace_reg = REG_R8;
1535 1548                                  break;
1536 1549                          case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1537 1550                                  reg = &rp->r_r9;
1538 1551                                  curthread->t_dtrace_reg = REG_R9;
1539 1552                                  break;
1540 1553                          }
1541 1554  
1542 1555                          /* LINTED - alignment */
1543 1556                          *(uint64_t *)&scratch[i] = *reg;
1544 1557                          curthread->t_dtrace_regv = *reg;
1545 1558                          *reg = pc + tp->ftt_size;
1546 1559                          i += sizeof (uint64_t);
1547 1560                  }
1548 1561  #endif
1549 1562  
1550 1563                  /*
1551 1564                   * Generate the branch instruction to what would have
1552 1565                   * normally been the subsequent instruction. In 32-bit mode,
1553 1566                   * this is just a relative branch; in 64-bit mode this is a
1554 1567                   * %rip-relative branch that loads the 64-bit pc value
1555 1568                   * immediately after the jmp instruction.
1556 1569                   */
1557 1570  #ifdef __amd64
1558 1571                  if (p->p_model == DATAMODEL_LP64) {
1559 1572                          scratch[i++] = FASTTRAP_GROUP5_OP;
1560 1573                          scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1561 1574                          /* LINTED - alignment */
1562 1575                          *(uint32_t *)&scratch[i] = 0;
1563 1576                          i += sizeof (uint32_t);
1564 1577                          /* LINTED - alignment */
1565 1578                          *(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1566 1579                          i += sizeof (uint64_t);
1567 1580                  } else {
1568 1581  #endif
1569 1582                          /*
1570 1583                           * Set up the jmp to the next instruction; note that
1571 1584                           * the size of the traced instruction cancels out.
1572 1585                           */
1573 1586                          scratch[i++] = FASTTRAP_JMP32;
1574 1587                          /* LINTED - alignment */
1575 1588                          *(uint32_t *)&scratch[i] = pc - addr - 5;
1576 1589                          i += sizeof (uint32_t);
1577 1590  #ifdef __amd64
1578 1591                  }
1579 1592  #endif
1580 1593  
1581 1594                  curthread->t_dtrace_astpc = addr + i;
1582 1595                  bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1583 1596                  i += tp->ftt_size;
1584 1597                  scratch[i++] = FASTTRAP_INT;
1585 1598                  scratch[i++] = T_DTRACE_RET;
1586 1599  
1587 1600                  ASSERT(i <= sizeof (scratch));
1588 1601  
1589 1602                  if (fasttrap_copyout(scratch, (char *)addr, i)) {
1590 1603                          fasttrap_sigtrap(p, curthread, pc);
1591 1604                          new_pc = pc;
1592 1605                          break;
1593 1606                  }
1594 1607  
1595 1608                  if (tp->ftt_retids != NULL) {
1596 1609                          curthread->t_dtrace_step = 1;
1597 1610                          curthread->t_dtrace_ret = 1;
1598 1611                          new_pc = curthread->t_dtrace_astpc;
1599 1612                  } else {
1600 1613                          new_pc = curthread->t_dtrace_scrpc;
1601 1614                  }
1602 1615  
1603 1616                  curthread->t_dtrace_pc = pc;
1604 1617                  curthread->t_dtrace_npc = pc + tp->ftt_size;
1605 1618                  curthread->t_dtrace_on = 1;
1606 1619                  break;
1607 1620          }
1608 1621  
1609 1622          default:
1610 1623                  panic("fasttrap: mishandled an instruction");
1611 1624          }
1612 1625  
1613 1626  done:
1614 1627          /*
1615 1628           * If there were no return probes when we first found the tracepoint,
1616 1629           * we should feel no obligation to honor any return probes that were
1617 1630           * subsequently enabled -- they'll just have to wait until the next
1618 1631           * time around.
1619 1632           */
1620 1633          if (tp->ftt_retids != NULL) {
1621 1634                  /*
1622 1635                   * We need to wait until the results of the instruction are
1623 1636                   * apparent before invoking any return probes. If this
1624 1637                   * instruction was emulated we can just call
1625 1638                   * fasttrap_return_common(); if it needs to be executed, we
1626 1639                   * need to wait until the user thread returns to the kernel.
1627 1640                   */
1628 1641                  if (tp->ftt_type != FASTTRAP_T_COMMON) {
1629 1642                          /*
1630 1643                           * Set the program counter to the address of the traced
1631 1644                           * instruction so that it looks right in ustack()
1632 1645                           * output. We had previously set it to the end of the
1633 1646                           * instruction to simplify %rip-relative addressing.
1634 1647                           */
1635 1648                          rp->r_pc = pc;
1636 1649  
1637 1650                          fasttrap_return_common(rp, pc, pid, new_pc);
1638 1651                  } else {
1639 1652                          ASSERT(curthread->t_dtrace_ret != 0);
1640 1653                          ASSERT(curthread->t_dtrace_pc == pc);
1641 1654                          ASSERT(curthread->t_dtrace_scrpc != 0);
1642 1655                          ASSERT(new_pc == curthread->t_dtrace_astpc);
1643 1656                  }
1644 1657          }
1645 1658  
1646 1659          rp->r_pc = new_pc;
1647 1660  
1648 1661          return (0);
1649 1662  }
1650 1663  
1651 1664  int
1652 1665  fasttrap_return_probe(struct regs *rp)
1653 1666  {
1654 1667          proc_t *p = curproc;
1655 1668          uintptr_t pc = curthread->t_dtrace_pc;
1656 1669          uintptr_t npc = curthread->t_dtrace_npc;
1657 1670  
1658 1671          curthread->t_dtrace_pc = 0;
1659 1672          curthread->t_dtrace_npc = 0;
1660 1673          curthread->t_dtrace_scrpc = 0;
1661 1674          curthread->t_dtrace_astpc = 0;
1662 1675  
1663 1676          /*
1664 1677           * Treat a child created by a call to vfork(2) as if it were its
1665 1678           * parent. We know that there's only one thread of control in such a
1666 1679           * process: this one.
1667 1680           */
1668 1681          while (p->p_flag & SVFORK) {
1669 1682                  p = p->p_parent;
1670 1683          }
1671 1684  
1672 1685          /*
1673 1686           * We set rp->r_pc to the address of the traced instruction so
1674 1687           * that it appears to dtrace_probe() that we're on the original
1675 1688           * instruction, and so that the user can't easily detect our
1676 1689           * complex web of lies. dtrace_return_probe() (our caller)
1677 1690           * will correctly set %pc after we return.
1678 1691           */
1679 1692          rp->r_pc = pc;
1680 1693  
1681 1694          fasttrap_return_common(rp, pc, p->p_pid, npc);
1682 1695  
1683 1696          return (0);
1684 1697  }
1685 1698  
1686 1699  /*ARGSUSED*/
1687 1700  uint64_t
1688 1701  fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1689 1702      int aframes)
1690 1703  {
1691 1704          return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno));
1692 1705  }
1693 1706  
1694 1707  /*ARGSUSED*/
1695 1708  uint64_t
1696 1709  fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1697 1710      int aframes)
1698 1711  {
1699 1712          return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno));
1700 1713  }
1701 1714  
1702 1715  static ulong_t
1703 1716  fasttrap_getreg(struct regs *rp, uint_t reg)
1704 1717  {
1705 1718  #ifdef __amd64
1706 1719          switch (reg) {
1707 1720          case REG_R15:           return (rp->r_r15);
1708 1721          case REG_R14:           return (rp->r_r14);
1709 1722          case REG_R13:           return (rp->r_r13);
1710 1723          case REG_R12:           return (rp->r_r12);
1711 1724          case REG_R11:           return (rp->r_r11);
1712 1725          case REG_R10:           return (rp->r_r10);
1713 1726          case REG_R9:            return (rp->r_r9);
1714 1727          case REG_R8:            return (rp->r_r8);
1715 1728          case REG_RDI:           return (rp->r_rdi);
1716 1729          case REG_RSI:           return (rp->r_rsi);
1717 1730          case REG_RBP:           return (rp->r_rbp);
1718 1731          case REG_RBX:           return (rp->r_rbx);
1719 1732          case REG_RDX:           return (rp->r_rdx);
1720 1733          case REG_RCX:           return (rp->r_rcx);
1721 1734          case REG_RAX:           return (rp->r_rax);
1722 1735          case REG_TRAPNO:        return (rp->r_trapno);
1723 1736          case REG_ERR:           return (rp->r_err);
1724 1737          case REG_RIP:           return (rp->r_rip);
1725 1738          case REG_CS:            return (rp->r_cs);
1726 1739          case REG_RFL:           return (rp->r_rfl);
1727 1740          case REG_RSP:           return (rp->r_rsp);
1728 1741          case REG_SS:            return (rp->r_ss);
1729 1742          case REG_FS:            return (rp->r_fs);
1730 1743          case REG_GS:            return (rp->r_gs);
1731 1744          case REG_DS:            return (rp->r_ds);
1732 1745          case REG_ES:            return (rp->r_es);
1733 1746          case REG_FSBASE:        return (rdmsr(MSR_AMD_FSBASE));
1734 1747          case REG_GSBASE:        return (rdmsr(MSR_AMD_GSBASE));
1735 1748          }
1736 1749  
1737 1750          panic("dtrace: illegal register constant");
1738 1751          /*NOTREACHED*/
1739 1752  #else
1740 1753          if (reg >= _NGREG)
1741 1754                  panic("dtrace: illegal register constant");
1742 1755  
1743 1756          return (((greg_t *)&rp->r_gs)[reg]);
1744 1757  #endif
1745 1758  }
  
    | 
      ↓ open down ↓ | 
    339 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX