1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2023 Oxide Computer Company
  14  */
  15 
  16 /*
  17  * This file implements various utility functions we use for the xsave tests.
  18  */
  19 
  20 #include <string.h>
  21 #include <strings.h>
  22 #include <sys/auxv.h>
  23 #include <sys/sysmacros.h>
  24 #include <err.h>
  25 #include <stdlib.h>
  26 #include <procfs.h>
  27 #include <sys/x86_archext.h>
  28 #include <unistd.h>
  29 #include <errno.h>
  30 #include <sys/types.h>
  31 #include <sys/wait.h>
  32 #include <sys/debug.h>
  33 #include <ieeefp.h>
  34 
  35 #include "xsave_util.h"
  36 
  37 static uint_t xsu_proc_timeout = 60 * 1000; /* 60s in ms */
  38 
  39 /*
  40  * Determine if we have the hardware support required for a given level of
  41  * hardware support.
  42  */
  43 uint32_t
  44 xsu_hwsupport(void)
  45 {
  46         uint_t isa[3];
  47         uint_t nisa = getisax(isa, ARRAY_SIZE(isa));
  48 
  49         if (nisa != ARRAY_SIZE(isa)) {
  50                 errx(EXIT_FAILURE, "did not get all %u hwcap values, found %u",
  51                     ARRAY_SIZE(isa), nisa);
  52         }
  53 
  54         if ((isa[0] & AV_386_XSAVE) == 0) {
  55                 errx(EXIT_FAILURE, "xsave not present: this test should have "
  56                     "been skipped");
  57         }
  58 
  59         if ((isa[1] & AV_386_2_AVX512F) != 0) {
  60                 warnx("found %%zmm support");
  61                 return (XSU_ZMM);
  62         }
  63 
  64         if ((isa[0] & AV_386_AVX) != 0) {
  65                 warnx("found %%ymm support");
  66                 return (XSU_YMM);
  67         }
  68 
  69         errx(EXIT_FAILURE, "no non-XMM xsave state found: this test should "
  70             "have been skipped");
  71 }
  72 
  73 /*
  74  * Fill all the valid regions of an FPU based on treating the vector register as
  75  * a series of uint32_t values and going from there.
  76  */
  77 void
  78 xsu_fill(xsu_fpu_t *fpu, uint32_t level, uint32_t start)
  79 {
  80         (void) memset(fpu, 0, sizeof (xsu_fpu_t));
  81 
  82         switch (level) {
  83         default:
  84                 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", level);
  85         case XSU_YMM:
  86                 for (uint32_t regno = 0; regno < XSU_MAX_YMM; regno++) {
  87                         for (uint32_t u32 = 0; u32 < XSU_YMM_U32; u32++,
  88                             start++) {
  89                                 fpu->xf_reg[regno]._l[u32] = start;
  90                         }
  91                 }
  92                 break;
  93         case XSU_ZMM:
  94                 for (uint32_t regno = 0; regno < XSU_MAX_ZMM; regno++) {
  95                         for (uint32_t u32 = 0; u32 < XSU_ZMM_U32; u32++,
  96                             start++) {
  97                                 fpu->xf_reg[regno]._l[u32] = start;
  98                         }
  99                 }
 100                 for (uint32_t regno = 0; regno < ARRAY_SIZE(fpu->xf_opmask);
 101                     regno++) {
 102                         uint64_t val = start | (((uint64_t)start + 1) << 32);
 103                         fpu->xf_opmask[regno] = val;
 104                         start += 2;
 105                 }
 106                 break;
 107         }
 108 }
 109 
 110 static void
 111 xsu_overwrite_uctx_xmm(ucontext_t *uctx, const xsu_fpu_t *fpu)
 112 {
 113         struct _fpchip_state *fp;
 114 
 115         fp = &uctx->uc_mcontext.fpregs.fp_reg_set.fpchip_state;
 116         for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
 117                 (void) memcpy(&fp->xmm[i], &fpu->xf_reg[i]._l[0],
 118                     XSU_XMM_U32 * sizeof (uint32_t));
 119         }
 120 }
 121 
 122 static void
 123 xsu_overwrite_uctx_ymm(uintptr_t arg, const xsu_fpu_t *fpu)
 124 {
 125         prxregset_ymm_t *ymm = (void *)arg;
 126 
 127         for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
 128                 (void) memcpy(&ymm->prx_ymm[i]._l[0],
 129                     &fpu->xf_reg[i]._l[XSU_XMM_U32],
 130                     XSU_XMM_U32 * sizeof (uint32_t));
 131         }
 132 }
 133 
 134 static void
 135 xsu_overwrite_uctx_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
 136 {
 137         prxregset_zmm_t *zmm = (void *)arg;
 138 
 139         /*
 140          * Because this is the low zmm registers, we actually use the max ymm
 141          * value as that's what actually fits in the low zmm and not the full
 142          * definition.
 143          */
 144         for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
 145                 (void) memcpy(&zmm->prx_zmm[i]._l[0],
 146                     &fpu->xf_reg[i]._l[XSU_YMM_U32],
 147                     XSU_YMM_U32 * sizeof (uint32_t));
 148         }
 149 }
 150 
 151 static void
 152 xsu_overwrite_uctx_hi_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
 153 {
 154 #ifdef __amd64
 155         prxregset_hi_zmm_t *zmm = (void *)arg;
 156 
 157         for (uint32_t i = XSU_MAX_YMM; i < XSU_MAX_ZMM; i++) {
 158                 (void) memcpy(&zmm->prx_hi_zmm[i - XSU_MAX_YMM]._l[0],
 159                     &fpu->xf_reg[i]._l[0],
 160                     XSU_ZMM_U32 * sizeof (uint32_t));
 161         }
 162 #else   /* !__amd64 */
 163         warnx("attempted to set High ZMM registers on a 32-bit process!");
 164         abort();
 165 #endif  /* __amd64 */
 166 }
 167 
 168 void
 169 xsu_overwrite_uctx(ucontext_t *uctx, const xsu_fpu_t *fpu, uint32_t hwsup)
 170 {
 171         size_t xsave_size = sizeof (uc_xsave_t);
 172         void *new_buf;
 173         uc_xsave_t *ucs;
 174         uintptr_t write_ptr;
 175 
 176         if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
 177                 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
 178         }
 179 
 180         if (hwsup >= XSU_YMM) {
 181                 xsave_size += sizeof (prxregset_ymm_t);
 182         }
 183 
 184         if (hwsup >= XSU_ZMM) {
 185                 xsave_size += sizeof (prxregset_zmm_t);
 186                 xsave_size += sizeof (prxregset_opmask_t);
 187                 if (XSU_MAX_ZMM > 16) {
 188                         xsave_size += sizeof (prxregset_hi_zmm_t);
 189                 }
 190         }
 191 
 192         new_buf = calloc(1, xsave_size);
 193         if (new_buf == NULL) {
 194                 errx(EXIT_FAILURE, "failed to allocate xsave buf");
 195         }
 196         ucs = new_buf;
 197         ucs->ucx_vers = UC_XSAVE_VERS;
 198         ucs->ucx_len = xsave_size;
 199         if (hwsup >= XSU_YMM) {
 200                 ucs->ucx_bv |= XFEATURE_AVX;
 201         }
 202 
 203         if (hwsup >= XSU_ZMM) {
 204                 ucs->ucx_bv |= XFEATURE_AVX512_OPMASK | XFEATURE_AVX512_ZMM;
 205                 if (XSU_MAX_ZMM > 16)
 206                         ucs->ucx_bv |= XFEATURE_AVX512_HI_ZMM;
 207         }
 208 
 209         /*
 210          * At this point we have rigged things up. XMM values are in the
 211          * ucontext_t itself. After that we must write things out in the kernel
 212          * signal order. Note, the XMM state is not set in the bit-vector
 213          * because well, we don't actually use the xsave pieces for it because o
 214          * the ucontext_t ABI has the xmm state always there. See
 215          * uts/intel/os/fpu.c's big theory statement for more info.
 216          */
 217         xsu_overwrite_uctx_xmm(uctx, fpu);
 218         write_ptr = (uintptr_t)new_buf + sizeof (uc_xsave_t);
 219         if (hwsup >= XSU_YMM) {
 220                 xsu_overwrite_uctx_ymm(write_ptr, fpu);
 221                 write_ptr += sizeof (prxregset_ymm_t);
 222         }
 223 
 224         if (hwsup >= XSU_ZMM) {
 225                 (void) memcpy((void *)write_ptr, fpu->xf_opmask,
 226                     sizeof (fpu->xf_opmask));
 227                 write_ptr += sizeof (fpu->xf_opmask);
 228                 xsu_overwrite_uctx_zmm(write_ptr, fpu);
 229                 write_ptr += sizeof (prxregset_zmm_t);
 230                 if (XSU_MAX_ZMM > 16) {
 231                         xsu_overwrite_uctx_hi_zmm(write_ptr, fpu);
 232                         write_ptr += sizeof (prxregset_hi_zmm_t);
 233                 }
 234         }
 235 
 236         uctx->uc_xsave = (long)(uintptr_t)new_buf;
 237 }
 238 
 239 static boolean_t
 240 xsu_check_vector(const upad512_t *src, const upad512_t *chk, uint32_t regno,
 241     uint32_t nu32)
 242 {
 243         boolean_t valid = B_TRUE;
 244 
 245         for (uint32_t i = 0; i < nu32; i++) {
 246                 if (src->_l[i] != chk->_l[i]) {
 247                         warnx("vec[%u] u32 %u differs: expected 0x%x, "
 248                             "found 0x%x", regno, i, src->_l[i], chk->_l[i]);
 249                         valid = B_FALSE;
 250                 }
 251         }
 252 
 253         return (valid);
 254 }
 255 
 256 boolean_t
 257 xsu_same(const xsu_fpu_t *src, const xsu_fpu_t *check, uint32_t hwsup)
 258 {
 259         boolean_t valid = B_TRUE;
 260 
 261         switch (hwsup) {
 262         default:
 263                 errx(EXIT_FAILURE, "given unkonwn xsu level: 0x%x", hwsup);
 264         case XSU_YMM:
 265                 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
 266                         if (!xsu_check_vector(&src->xf_reg[i],
 267                             &check->xf_reg[i], i, XSU_YMM_U32)) {
 268                                 valid = B_FALSE;
 269                         }
 270                 }
 271                 break;
 272         case XSU_ZMM:
 273                 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
 274                         if (!xsu_check_vector(&src->xf_reg[i],
 275                             &check->xf_reg[i], i, XSU_ZMM_U32)) {
 276                                 valid = B_FALSE;
 277                         }
 278                 }
 279                 for (uint32_t i = 0; i < ARRAY_SIZE(src->xf_opmask); i++) {
 280                         if (src->xf_opmask[i] != check->xf_opmask[i]) {
 281                                 warnx("mask[%u] differs: expected 0x%" PRIx64
 282                                     ", found 0x%" PRIx64, i, src->xf_opmask[i],
 283                                     check->xf_opmask[i]);
 284                                 valid = B_FALSE;
 285                         }
 286                 }
 287                 break;
 288         }
 289         return (valid);
 290 }
 291 
 292 
 293 void *
 294 xsu_sleeper_thread(void *arg __unused)
 295 {
 296         for (;;) {
 297                 (void) sleep(100);
 298         }
 299         return (NULL);
 300 }
 301 
 302 static void
 303 xsu_dump_vector(FILE *f, const upad512_t *reg, uint32_t nu32, const char *name,
 304     uint32_t idx)
 305 {
 306         VERIFY3U(nu32 % 4, ==, 0);
 307         for (uint32_t i = 0; i < nu32; i += 4) {
 308                 (void) fprintf(f, "%s[%02u] [%02u:%02u] = { 0x%08x 0x%08x "
 309                     "0x%08x 0x%08x }\n", name, idx, i + 3, i,  reg->_l[i + 3],
 310                     reg->_l[i + 2], reg->_l[i + 1], reg->_l[i]);
 311         }
 312 }
 313 
 314 void
 315 xsu_dump(FILE *f, const xsu_fpu_t *fpu, uint32_t hwsup)
 316 {
 317 
 318         switch (hwsup) {
 319         default:
 320                 errx(EXIT_FAILURE, "given unkonwn xsu level: 0x%x", hwsup);
 321         case XSU_YMM:
 322                 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
 323                         xsu_dump_vector(f, &fpu->xf_reg[i], XSU_YMM_U32,
 324                             "ymm", i);
 325                 }
 326                 break;
 327         case XSU_ZMM:
 328                 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
 329                         xsu_dump_vector(f, &fpu->xf_reg[i], XSU_ZMM_U32,
 330                             "zmm", i);
 331                 }
 332 
 333                 for (uint32_t i = 0; i < ARRAY_SIZE(fpu->xf_opmask); i++) {
 334                         (void) fprintf(f, "%%k%u 0x%016" PRIx64"\n", i,
 335                             fpu->xf_opmask[i]);
 336                 }
 337                 break;
 338         }
 339 }
 340 
 341 typedef struct xsu_prx {
 342         uint32_t xp_hwsup;
 343         prxregset_xsave_t *xp_xsave;
 344         prxregset_ymm_t *xp_ymm;
 345         prxregset_opmask_t *xp_opmask;
 346         prxregset_zmm_t *xp_zmm;
 347         prxregset_hi_zmm_t *xp_hi_zmm;
 348 } xsu_prx_t;
 349 
 350 static void
 351 xsu_fpu_to_xregs_xsave(xsu_prx_t *prx, const xsu_fpu_t *fpu)
 352 {
 353         prx->xp_xsave->prx_fx_fcw = FPU_CW_INIT;
 354         prx->xp_xsave->prx_fx_mxcsr = SSE_MXCSR_INIT;
 355         for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
 356                 (void) memcpy(&prx->xp_xsave->prx_fx_xmm[i],
 357                     &fpu->xf_reg[i]._l[0], XSU_XMM_U32 * sizeof (uint32_t));
 358         }
 359 
 360         prx->xp_xsave->prx_xsh_xstate_bv = XFEATURE_LEGACY_FP |
 361             XFEATURE_SSE;
 362         if (prx->xp_hwsup >= XSU_YMM) {
 363                 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX;
 364         }
 365 
 366         if (prx->xp_hwsup >= XSU_ZMM) {
 367                 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX512;
 368         }
 369 }
 370 
 371 static void
 372 xsu_fpu_to_xregs_ymm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
 373 {
 374         /* Copy the upper 128-bits to the YMM save area */
 375         for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
 376                 (void) memcpy(&prx->xp_ymm->prx_ymm[i],
 377                     &fpu->xf_reg[i]._l[XSU_XMM_U32],
 378                     XSU_XMM_U32 * sizeof (uint32_t));
 379         }
 380 }
 381 
 382 static void
 383 xsu_fpu_to_xregs_zmm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
 384 {
 385         /* The lower 16 regs are only 256-bit, the upper are 512-bit */
 386         for (uint32_t i = 0; i < MIN(XSU_MAX_ZMM, 16); i++) {
 387                 (void) memcpy(&prx->xp_zmm->prx_zmm[i],
 388                     &fpu->xf_reg[i]._l[XSU_YMM_U32],
 389                     XSU_YMM_U32 * sizeof (uint32_t));
 390         }
 391 
 392 #ifdef __amd64
 393         for (uint32_t i = 16; i < XSU_MAX_ZMM; i++) {
 394                 (void) memcpy(&prx->xp_hi_zmm->prx_hi_zmm[i - 16],
 395                     &fpu->xf_reg[i]._l[0],
 396                     XSU_ZMM_U32 * sizeof (uint32_t));
 397         }
 398 #endif
 399 
 400         (void) memcpy(prx->xp_opmask->prx_opmask, fpu->xf_opmask,
 401             sizeof (prx->xp_opmask->prx_opmask));
 402 }
 403 
 404 
 405 void
 406 xsu_fpu_to_xregs(const xsu_fpu_t *fpu, uint32_t hwsup, prxregset_t **prxp,
 407     size_t *sizep)
 408 {
 409         uint32_t ninfo = 1, curinfo;
 410         size_t len = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) +
 411             sizeof (prxregset_xsave_t);
 412         prxregset_hdr_t *hdr;
 413         uint32_t off;
 414         xsu_prx_t prx;
 415 
 416         if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
 417                 errx(EXIT_FAILURE, "given unkonwn xsu level: 0x%x", hwsup);
 418         }
 419 
 420         if (hwsup >= XSU_YMM) {
 421                 len += sizeof (prxregset_info_t) + sizeof (prxregset_ymm_t);
 422                 ninfo++;
 423         }
 424 
 425         if (hwsup >= XSU_ZMM) {
 426                 len += 3 * sizeof (prxregset_info_t) +
 427                     sizeof (prxregset_opmask_t) + sizeof (prxregset_zmm_t) +
 428                     sizeof (prxregset_hi_zmm_t);
 429                 ninfo += 3;
 430         }
 431 
 432         hdr = calloc(1, len);
 433         if (hdr == NULL) {
 434                 err(EXIT_FAILURE, "failed to allocate prxregset_t (%zu bytes)",
 435                     len);
 436         }
 437         (void) memset(&prx, 0, sizeof (prx));
 438         prx.xp_hwsup = hwsup;
 439 
 440 #ifdef __amd64
 441         VERIFY3U(len, <=, UINT32_MAX);
 442 #endif  /* __amd64 */
 443         hdr->pr_type = PR_TYPE_XSAVE;
 444         hdr->pr_size = (uint32_t)len;
 445         hdr->pr_ninfo = ninfo;
 446 
 447         curinfo = 0;
 448         off = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) * ninfo;
 449         hdr->pr_info[curinfo].pri_type = PRX_INFO_XSAVE;
 450         hdr->pr_info[curinfo].pri_size = sizeof (prxregset_xsave_t);
 451         hdr->pr_info[curinfo].pri_offset = off;
 452         prx.xp_xsave = (void *)((uintptr_t)hdr + off);
 453         off += sizeof (prxregset_xsave_t);
 454         curinfo++;
 455 
 456         if (hwsup >= XSU_YMM) {
 457                 hdr->pr_info[curinfo].pri_type = PRX_INFO_YMM;
 458                 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_ymm_t);
 459                 hdr->pr_info[curinfo].pri_offset = off;
 460                 prx.xp_ymm = (void *)((uintptr_t)hdr + off);
 461                 off += sizeof (prxregset_ymm_t);
 462                 curinfo++;
 463         }
 464 
 465         if (hwsup >= XSU_ZMM) {
 466                 hdr->pr_info[curinfo].pri_type = PRX_INFO_OPMASK;
 467                 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_opmask_t);
 468                 hdr->pr_info[curinfo].pri_offset = off;
 469                 prx.xp_opmask = (void *)((uintptr_t)hdr + off);
 470                 off += sizeof (prxregset_opmask_t);
 471                 curinfo++;
 472 
 473                 hdr->pr_info[curinfo].pri_type = PRX_INFO_ZMM;
 474                 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_zmm_t);
 475                 hdr->pr_info[curinfo].pri_offset = off;
 476                 prx.xp_zmm = (void *)((uintptr_t)hdr + off);
 477                 off += sizeof (prxregset_zmm_t);
 478                 curinfo++;
 479 
 480                 hdr->pr_info[curinfo].pri_type = PRX_INFO_HI_ZMM;
 481                 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_hi_zmm_t);
 482                 hdr->pr_info[curinfo].pri_offset = off;
 483                 prx.xp_hi_zmm = (void *)((uintptr_t)hdr + off);
 484                 off += sizeof (prxregset_hi_zmm_t);
 485                 curinfo++;
 486         }
 487 
 488         xsu_fpu_to_xregs_xsave(&prx, fpu);
 489         if (hwsup >= XSU_YMM) {
 490                 xsu_fpu_to_xregs_ymm(&prx, fpu);
 491         }
 492 
 493         if (hwsup >= XSU_ZMM) {
 494                 xsu_fpu_to_xregs_zmm(&prx, fpu);
 495         }
 496 
 497         *prxp = (prxregset_t *)hdr;
 498         *sizep = len;
 499 }
 500 
 501 /*
 502  * This pairs with xsu_proc_finish() below. The goal is to allow us to inject
 503  * state after hitting a breakpoint, which is generally used right before
 504  * something wants to print data.
 505  */
 506 void
 507 xsu_proc_bkpt(xsu_proc_t *xp)
 508 {
 509         int perr;
 510         struct ps_prochandle *P;
 511         char *const argv[3] = { xp->xp_prog, xp->xp_arg, NULL };
 512         GElf_Sym sym;
 513 
 514         P = Pcreate(xp->xp_prog, argv, &perr, NULL, 0);
 515         if (P == NULL) {
 516                 errx(EXIT_FAILURE, "failed to create %s: %s", xp->xp_prog,
 517                     Pcreate_error(perr));
 518         }
 519 
 520         xp->xp_proc = P;
 521         (void) Punsetflags(P, PR_RLC);
 522         if (Psetflags(P, PR_KLC | PR_BPTADJ) != 0) {
 523                 int e = errno;
 524                 Prelease(P, PRELEASE_KILL);
 525                 errc(EXIT_FAILURE, e, "failed to set PR_KLC | PR_BPTADJ flags");
 526         }
 527 
 528         if (Pxlookup_by_name(P, LM_ID_BASE, xp->xp_object, xp->xp_symname, &sym,
 529             NULL) != 0) {
 530                 err(EXIT_FAILURE, "failed to find %s`%s", xp->xp_object,
 531                     xp->xp_symname);
 532         }
 533 
 534         if (Pfault(P, FLTBPT, 1) != 0) {
 535                 errx(EXIT_FAILURE, "failed to set the FLTBPT disposition");
 536         }
 537 
 538         xp->xp_addr = sym.st_value;
 539         if (Psetbkpt(P, sym.st_value, &xp->xp_instr) != 0) {
 540                 err(EXIT_FAILURE, "failed to set breakpoint on xsu_getfpu "
 541                     "(0x%" PRIx64 ")", sym.st_value);
 542         }
 543 
 544         if (Psetrun(P, 0, 0) != 0) {
 545                 err(EXIT_FAILURE, "failed to resume running our target");
 546         }
 547 
 548         if (Pwait(P, xsu_proc_timeout) != 0) {
 549                 err(EXIT_FAILURE, "%s did not hit our expected breakpoint",
 550                     argv[1]);
 551         }
 552 }
 553 
 554 /*
 555  * Run a process to completion and get its wait exit status.
 556  */
 557 void
 558 xsu_proc_finish(xsu_proc_t *xp)
 559 {
 560         pid_t pid = Ppsinfo(xp->xp_proc)->pr_pid;
 561 
 562         if (Pdelbkpt(xp->xp_proc, xp->xp_addr, xp->xp_instr) != 0) {
 563                 err(EXIT_FAILURE, "failed to delete %s`%s() breakpoint",
 564                     xp->xp_object, xp->xp_symname);
 565         }
 566 
 567         if (Psetrun(xp->xp_proc, 0, PRCFAULT) != 0) {
 568                 err(EXIT_FAILURE, "failed to resume running our target");
 569         }
 570 
 571         if (waitpid(pid, &xp->xp_wait, 0) != pid) {
 572                 err(EXIT_FAILURE, "failed to get our child processes's wait "
 573                     "info", pid);
 574         }
 575 
 576         if (WIFEXITED(xp->xp_wait) == 0) {
 577                 errx(EXIT_FAILURE, "our child process didn't actually exit!");
 578         }
 579 
 580         Pfree(xp->xp_proc);
 581         xp->xp_proc = NULL;
 582 }
 583 
 584 void
 585 xsu_fpregset_xmm_set(fpregset_t *fpr, uint32_t seed)
 586 {
 587         size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
 588         for (uint32_t i = 0; i < nregs; i++) {
 589                 upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
 590                 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
 591                         u128->_l[u32] = seed;
 592                 }
 593         }
 594 }
 595 
 596 void
 597 xsu_xregs_xmm_set(prxregset_t *prx, uint32_t seed)
 598 {
 599         prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
 600         prxregset_xsave_t *xsave = NULL;
 601 
 602         for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
 603                 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
 604                         xsave = (void *)((uintptr_t)prx +
 605                             hdr->pr_info[i].pri_offset);
 606                         break;
 607                 }
 608         }
 609 
 610         if (xsave == NULL) {
 611                 errx(EXIT_FAILURE, "asked to set xsave %%xmm regs, but no "
 612                     "xsave info present");
 613         }
 614 
 615         size_t nregs = ARRAY_SIZE(xsave->prx_fx_xmm);
 616         for (uint32_t i = 0; i < nregs; i++) {
 617                 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
 618                         xsave->prx_fx_xmm[i]._l[u32] = seed;
 619                 }
 620         }
 621 }
 622 
 623 static const prxregset_info_t *
 624 xsu_xregs_find_comp(const prxregset_hdr_t *hdr, uint32_t comp, uintptr_t *datap)
 625 {
 626         for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
 627                 if (hdr->pr_info[i].pri_type == comp) {
 628                         *datap = (uintptr_t)hdr + hdr->pr_info[i].pri_offset;
 629                         return (&hdr->pr_info[i]);
 630                 }
 631         }
 632 
 633         return (NULL);
 634 }
 635 
 636 boolean_t
 637 xsu_xregs_comp_equal(const prxregset_t *src, const prxregset_t *dest,
 638     uint32_t comp)
 639 {
 640         const prxregset_hdr_t *shdr = (prxregset_hdr_t *)src;
 641         const prxregset_hdr_t *dhdr = (prxregset_hdr_t *)dest;
 642         const prxregset_info_t *sinfo = NULL, *dinfo = NULL;
 643         uintptr_t sdata, ddata;
 644 
 645         sinfo = xsu_xregs_find_comp(shdr, comp, &sdata);
 646         if (sinfo == NULL) {
 647                 warnx("source xregs missing component %u", comp);
 648                 return (B_FALSE);
 649         }
 650 
 651         dinfo = xsu_xregs_find_comp(dhdr, comp, &ddata);
 652         if (dinfo == NULL) {
 653                 warnx("destination xregs missing component %u", comp);
 654                 return (B_FALSE);
 655         }
 656 
 657         if (sinfo->pri_size != dinfo->pri_size) {
 658                 warnx("source xregs length 0x%x does not match dest xregs 0x%x",
 659                     sinfo->pri_size, dinfo->pri_size);
 660         }
 661 
 662         if (bcmp((void *)sdata, (void *)ddata, sinfo->pri_size) != 0) {
 663                 warnx("component data differs: dumping!");
 664                 for (uint32_t i = 0; i < sinfo->pri_offset; i++) {
 665                         const uint8_t *su8 = (uint8_t *)sdata;
 666                         const uint8_t *du8 = (uint8_t *)ddata;
 667 
 668                         if (su8[i] != du8[i]) {
 669                                 (void) fprintf(stderr,
 670                                     "src[%u] = 0x%2x\tdst[%u] = 0x%x\n",
 671                                     i, su8[i], i, du8[i]);
 672                         }
 673                 }
 674 
 675                 return (B_FALSE);
 676         }
 677 
 678         return (B_TRUE);
 679 }
 680 
 681 boolean_t
 682 xsu_fpregs_cmp(const fpregset_t *fpr, const prxregset_t *prx)
 683 {
 684         boolean_t valid = B_TRUE;
 685         const prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
 686         const prxregset_xsave_t *xsave = NULL;
 687         uint16_t fpr_cw, fpr_sw;
 688 
 689         for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
 690                 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
 691                         xsave = (void *)((uintptr_t)prx +
 692                             hdr->pr_info[i].pri_offset);
 693                         break;
 694                 }
 695         }
 696 
 697         if (xsave == NULL) {
 698                 warnx("xregs missing xsave component for fpregs comparison");
 699                 return (B_FALSE);
 700         }
 701 
 702         /*
 703          * First check the XMM registers because those don't require ifdefs,
 704          * thankfully.
 705          */
 706         size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
 707         for (size_t i = 0; i < nregs; i++) {
 708                 const upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
 709                 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++) {
 710                         if (u128->_l[u32] != xsave->prx_fx_xmm[i]._l[u32]) {
 711                                 valid = B_FALSE;
 712                                 (void) fprintf(stderr, "fpregset xmm[%u] "
 713                                     "u32[%u] does not match xsave, fpregset: "
 714                                     "0x%x, xsave: 0x%x\n", i, u32,
 715                                     u128->_l[u32],
 716                                     xsave->prx_fx_xmm[i]._l[u32]);
 717                         }
 718                 }
 719         }
 720 
 721         if (xsave->prx_fx_mxcsr != fpr->fp_reg_set.fpchip_state.mxcsr) {
 722                 valid = B_FALSE;
 723                 (void) fprintf(stderr, "mxcsr mismatched: fpregset: 0x%x, "
 724                     "xsave: 0x%x\n", fpr->fp_reg_set.fpchip_state.mxcsr,
 725                     xsave->prx_fx_mxcsr);
 726         }
 727 
 728         /*
 729          * Extract the basic x87 state. This requires ifdefs because the 32-bit
 730          * ABI here is a bit, particular. The 32-bit fpregs is the mcontext_t
 731          * struct which is mostly opaque and we need to use the ieeefp.h types
 732          * which are only visible for ILP32. It also treats 16-bit values as
 733          * 32-bit ones, hence masking below.
 734          */
 735 #ifdef __amd64
 736         fpr_cw = fpr->fp_reg_set.fpchip_state.cw;
 737         fpr_sw = fpr->fp_reg_set.fpchip_state.sw;
 738 #else   /* !__amd64 (__i386) */
 739         struct _fpstate fps;
 740 
 741         (void) memcpy(&fps, &fpr->fp_reg_set.fpchip_state, sizeof (fps));
 742         fpr_cw = fps.cw & 0xffff;
 743         fpr_sw = fps.sw & 0xffff;
 744 #endif  /* __amd64 */
 745 
 746         if (fpr_cw != xsave->prx_fx_fcw) {
 747                 valid = B_FALSE;
 748                 (void) fprintf(stderr, "x87 cw mismatched: fpregset: 0x%x, "
 749                     "xsave: 0x%x\n", fpr_cw, xsave->prx_fx_fcw);
 750         }
 751 
 752         if (fpr_sw != xsave->prx_fx_fsw) {
 753                 valid = B_FALSE;
 754                 (void) fprintf(stderr, "x87 sw mismatched: fpregset: 0x%x, "
 755                     "xsave: 0x%x\n", fpr_sw, xsave->prx_fx_fsw);
 756         }
 757 
 758         return (valid);
 759 }