Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/test/os-tests/tests/xsave/xsave_util.c
+++ new/usr/src/test/os-tests/tests/xsave/xsave_util.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2023 Oxide Computer Company
14 14 */
15 15
16 16 /*
17 17 * This file implements various utility functions we use for the xsave tests.
18 18 */
19 19
20 20 #include <string.h>
21 21 #include <strings.h>
22 22 #include <sys/auxv.h>
23 23 #include <sys/sysmacros.h>
24 24 #include <err.h>
25 25 #include <stdlib.h>
26 26 #include <procfs.h>
27 27 #include <sys/x86_archext.h>
28 28 #include <unistd.h>
29 29 #include <errno.h>
30 30 #include <sys/types.h>
31 31 #include <sys/wait.h>
32 32 #include <sys/debug.h>
33 33 #include <ieeefp.h>
34 34
35 35 #include "xsave_util.h"
36 36
37 37 static uint_t xsu_proc_timeout = 60 * 1000; /* 60s in ms */
38 38
39 39 /*
40 40 * Determine if we have the hardware support required for a given level of
41 41 * hardware support.
42 42 */
43 43 uint32_t
44 44 xsu_hwsupport(void)
45 45 {
46 46 uint_t isa[3];
47 47 uint_t nisa = getisax(isa, ARRAY_SIZE(isa));
48 48
49 49 if (nisa != ARRAY_SIZE(isa)) {
50 50 errx(EXIT_FAILURE, "did not get all %u hwcap values, found %u",
51 51 ARRAY_SIZE(isa), nisa);
52 52 }
53 53
54 54 if ((isa[0] & AV_386_XSAVE) == 0) {
55 55 errx(EXIT_FAILURE, "xsave not present: this test should have "
56 56 "been skipped");
57 57 }
58 58
59 59 if ((isa[1] & AV_386_2_AVX512F) != 0) {
60 60 warnx("found %%zmm support");
61 61 return (XSU_ZMM);
62 62 }
63 63
64 64 if ((isa[0] & AV_386_AVX) != 0) {
65 65 warnx("found %%ymm support");
66 66 return (XSU_YMM);
67 67 }
68 68
69 69 errx(EXIT_FAILURE, "no non-XMM xsave state found: this test should "
70 70 "have been skipped");
71 71 }
72 72
73 73 /*
74 74 * Fill all the valid regions of an FPU based on treating the vector register as
75 75 * a series of uint32_t values and going from there.
76 76 */
77 77 void
78 78 xsu_fill(xsu_fpu_t *fpu, uint32_t level, uint32_t start)
79 79 {
80 80 (void) memset(fpu, 0, sizeof (xsu_fpu_t));
81 81
82 82 switch (level) {
83 83 default:
84 84 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", level);
85 85 case XSU_YMM:
86 86 for (uint32_t regno = 0; regno < XSU_MAX_YMM; regno++) {
87 87 for (uint32_t u32 = 0; u32 < XSU_YMM_U32; u32++,
88 88 start++) {
89 89 fpu->xf_reg[regno]._l[u32] = start;
90 90 }
91 91 }
92 92 break;
93 93 case XSU_ZMM:
94 94 for (uint32_t regno = 0; regno < XSU_MAX_ZMM; regno++) {
95 95 for (uint32_t u32 = 0; u32 < XSU_ZMM_U32; u32++,
96 96 start++) {
97 97 fpu->xf_reg[regno]._l[u32] = start;
98 98 }
99 99 }
100 100 for (uint32_t regno = 0; regno < ARRAY_SIZE(fpu->xf_opmask);
101 101 regno++) {
102 102 uint64_t val = start | (((uint64_t)start + 1) << 32);
103 103 fpu->xf_opmask[regno] = val;
104 104 start += 2;
105 105 }
106 106 break;
107 107 }
108 108 }
109 109
110 110 static void
111 111 xsu_overwrite_uctx_xmm(ucontext_t *uctx, const xsu_fpu_t *fpu)
112 112 {
113 113 struct _fpchip_state *fp;
114 114
115 115 fp = &uctx->uc_mcontext.fpregs.fp_reg_set.fpchip_state;
116 116 for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
117 117 (void) memcpy(&fp->xmm[i], &fpu->xf_reg[i]._l[0],
118 118 XSU_XMM_U32 * sizeof (uint32_t));
119 119 }
120 120 }
121 121
122 122 static void
123 123 xsu_overwrite_uctx_ymm(uintptr_t arg, const xsu_fpu_t *fpu)
124 124 {
125 125 prxregset_ymm_t *ymm = (void *)arg;
126 126
127 127 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
128 128 (void) memcpy(&ymm->prx_ymm[i]._l[0],
129 129 &fpu->xf_reg[i]._l[XSU_XMM_U32],
130 130 XSU_XMM_U32 * sizeof (uint32_t));
131 131 }
132 132 }
133 133
134 134 static void
135 135 xsu_overwrite_uctx_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
136 136 {
137 137 prxregset_zmm_t *zmm = (void *)arg;
138 138
139 139 /*
140 140 * Because this is the low zmm registers, we actually use the max ymm
141 141 * value as that's what actually fits in the low zmm and not the full
142 142 * definition.
143 143 */
144 144 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
145 145 (void) memcpy(&zmm->prx_zmm[i]._l[0],
146 146 &fpu->xf_reg[i]._l[XSU_YMM_U32],
147 147 XSU_YMM_U32 * sizeof (uint32_t));
148 148 }
149 149 }
150 150
151 151 static void
152 152 xsu_overwrite_uctx_hi_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
153 153 {
154 154 #ifdef __amd64
155 155 prxregset_hi_zmm_t *zmm = (void *)arg;
156 156
157 157 for (uint32_t i = XSU_MAX_YMM; i < XSU_MAX_ZMM; i++) {
158 158 (void) memcpy(&zmm->prx_hi_zmm[i - XSU_MAX_YMM]._l[0],
159 159 &fpu->xf_reg[i]._l[0],
160 160 XSU_ZMM_U32 * sizeof (uint32_t));
161 161 }
162 162 #else /* !__amd64 */
163 163 warnx("attempted to set High ZMM registers on a 32-bit process!");
164 164 abort();
165 165 #endif /* __amd64 */
166 166 }
167 167
168 168 void
169 169 xsu_overwrite_uctx(ucontext_t *uctx, const xsu_fpu_t *fpu, uint32_t hwsup)
170 170 {
171 171 size_t xsave_size = sizeof (uc_xsave_t);
172 172 void *new_buf;
173 173 uc_xsave_t *ucs;
174 174 uintptr_t write_ptr;
175 175
176 176 if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
177 177 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
178 178 }
179 179
180 180 if (hwsup >= XSU_YMM) {
181 181 xsave_size += sizeof (prxregset_ymm_t);
182 182 }
183 183
184 184 if (hwsup >= XSU_ZMM) {
185 185 xsave_size += sizeof (prxregset_zmm_t);
186 186 xsave_size += sizeof (prxregset_opmask_t);
187 187 if (XSU_MAX_ZMM > 16) {
188 188 xsave_size += sizeof (prxregset_hi_zmm_t);
189 189 }
190 190 }
191 191
192 192 new_buf = calloc(1, xsave_size);
193 193 if (new_buf == NULL) {
194 194 errx(EXIT_FAILURE, "failed to allocate xsave buf");
195 195 }
196 196 ucs = new_buf;
197 197 ucs->ucx_vers = UC_XSAVE_VERS;
198 198 ucs->ucx_len = xsave_size;
199 199 if (hwsup >= XSU_YMM) {
200 200 ucs->ucx_bv |= XFEATURE_AVX;
201 201 }
202 202
203 203 if (hwsup >= XSU_ZMM) {
204 204 ucs->ucx_bv |= XFEATURE_AVX512_OPMASK | XFEATURE_AVX512_ZMM;
205 205 if (XSU_MAX_ZMM > 16)
206 206 ucs->ucx_bv |= XFEATURE_AVX512_HI_ZMM;
207 207 }
208 208
209 209 /*
210 210 * At this point we have rigged things up. XMM values are in the
211 211 * ucontext_t itself. After that we must write things out in the kernel
212 212 * signal order. Note, the XMM state is not set in the bit-vector
213 213 * because well, we don't actually use the xsave pieces for it because o
214 214 * the ucontext_t ABI has the xmm state always there. See
215 215 * uts/intel/os/fpu.c's big theory statement for more info.
216 216 */
217 217 xsu_overwrite_uctx_xmm(uctx, fpu);
218 218 write_ptr = (uintptr_t)new_buf + sizeof (uc_xsave_t);
219 219 if (hwsup >= XSU_YMM) {
220 220 xsu_overwrite_uctx_ymm(write_ptr, fpu);
221 221 write_ptr += sizeof (prxregset_ymm_t);
222 222 }
223 223
224 224 if (hwsup >= XSU_ZMM) {
225 225 (void) memcpy((void *)write_ptr, fpu->xf_opmask,
226 226 sizeof (fpu->xf_opmask));
227 227 write_ptr += sizeof (fpu->xf_opmask);
228 228 xsu_overwrite_uctx_zmm(write_ptr, fpu);
229 229 write_ptr += sizeof (prxregset_zmm_t);
230 230 if (XSU_MAX_ZMM > 16) {
231 231 xsu_overwrite_uctx_hi_zmm(write_ptr, fpu);
232 232 write_ptr += sizeof (prxregset_hi_zmm_t);
233 233 }
234 234 }
235 235
236 236 uctx->uc_xsave = (long)(uintptr_t)new_buf;
237 237 }
238 238
239 239 static boolean_t
240 240 xsu_check_vector(const upad512_t *src, const upad512_t *chk, uint32_t regno,
241 241 uint32_t nu32)
242 242 {
243 243 boolean_t valid = B_TRUE;
244 244
245 245 for (uint32_t i = 0; i < nu32; i++) {
246 246 if (src->_l[i] != chk->_l[i]) {
247 247 warnx("vec[%u] u32 %u differs: expected 0x%x, "
248 248 "found 0x%x", regno, i, src->_l[i], chk->_l[i]);
249 249 valid = B_FALSE;
250 250 }
251 251 }
252 252
253 253 return (valid);
254 254 }
255 255
256 256 boolean_t
257 257 xsu_same(const xsu_fpu_t *src, const xsu_fpu_t *check, uint32_t hwsup)
258 258 {
259 259 boolean_t valid = B_TRUE;
260 260
261 261 switch (hwsup) {
262 262 default:
263 263 errx(EXIT_FAILURE, "given unkonwn xsu level: 0x%x", hwsup);
264 264 case XSU_YMM:
265 265 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
266 266 if (!xsu_check_vector(&src->xf_reg[i],
267 267 &check->xf_reg[i], i, XSU_YMM_U32)) {
268 268 valid = B_FALSE;
269 269 }
270 270 }
271 271 break;
272 272 case XSU_ZMM:
273 273 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
274 274 if (!xsu_check_vector(&src->xf_reg[i],
275 275 &check->xf_reg[i], i, XSU_ZMM_U32)) {
276 276 valid = B_FALSE;
277 277 }
278 278 }
279 279 for (uint32_t i = 0; i < ARRAY_SIZE(src->xf_opmask); i++) {
280 280 if (src->xf_opmask[i] != check->xf_opmask[i]) {
281 281 warnx("mask[%u] differs: expected 0x%" PRIx64
282 282 ", found 0x%" PRIx64, i, src->xf_opmask[i],
283 283 check->xf_opmask[i]);
284 284 valid = B_FALSE;
285 285 }
286 286 }
287 287 break;
288 288 }
289 289 return (valid);
290 290 }
291 291
292 292
293 293 void *
294 294 xsu_sleeper_thread(void *arg __unused)
295 295 {
296 296 for (;;) {
297 297 (void) sleep(100);
298 298 }
299 299 return (NULL);
300 300 }
301 301
302 302 static void
303 303 xsu_dump_vector(FILE *f, const upad512_t *reg, uint32_t nu32, const char *name,
304 304 uint32_t idx)
305 305 {
306 306 VERIFY3U(nu32 % 4, ==, 0);
307 307 for (uint32_t i = 0; i < nu32; i += 4) {
308 308 (void) fprintf(f, "%s[%02u] [%02u:%02u] = { 0x%08x 0x%08x "
309 309 "0x%08x 0x%08x }\n", name, idx, i + 3, i, reg->_l[i + 3],
310 310 reg->_l[i + 2], reg->_l[i + 1], reg->_l[i]);
311 311 }
312 312 }
313 313
314 314 void
315 315 xsu_dump(FILE *f, const xsu_fpu_t *fpu, uint32_t hwsup)
316 316 {
317 317
318 318 switch (hwsup) {
319 319 default:
320 320 errx(EXIT_FAILURE, "given unkonwn xsu level: 0x%x", hwsup);
321 321 case XSU_YMM:
322 322 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
323 323 xsu_dump_vector(f, &fpu->xf_reg[i], XSU_YMM_U32,
324 324 "ymm", i);
325 325 }
326 326 break;
327 327 case XSU_ZMM:
328 328 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
329 329 xsu_dump_vector(f, &fpu->xf_reg[i], XSU_ZMM_U32,
330 330 "zmm", i);
331 331 }
332 332
333 333 for (uint32_t i = 0; i < ARRAY_SIZE(fpu->xf_opmask); i++) {
334 334 (void) fprintf(f, "%%k%u 0x%016" PRIx64"\n", i,
335 335 fpu->xf_opmask[i]);
336 336 }
337 337 break;
338 338 }
339 339 }
340 340
341 341 typedef struct xsu_prx {
342 342 uint32_t xp_hwsup;
343 343 prxregset_xsave_t *xp_xsave;
344 344 prxregset_ymm_t *xp_ymm;
345 345 prxregset_opmask_t *xp_opmask;
346 346 prxregset_zmm_t *xp_zmm;
347 347 prxregset_hi_zmm_t *xp_hi_zmm;
348 348 } xsu_prx_t;
349 349
350 350 static void
351 351 xsu_fpu_to_xregs_xsave(xsu_prx_t *prx, const xsu_fpu_t *fpu)
352 352 {
353 353 prx->xp_xsave->prx_fx_fcw = FPU_CW_INIT;
354 354 prx->xp_xsave->prx_fx_mxcsr = SSE_MXCSR_INIT;
355 355 for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
356 356 (void) memcpy(&prx->xp_xsave->prx_fx_xmm[i],
357 357 &fpu->xf_reg[i]._l[0], XSU_XMM_U32 * sizeof (uint32_t));
358 358 }
359 359
360 360 prx->xp_xsave->prx_xsh_xstate_bv = XFEATURE_LEGACY_FP |
361 361 XFEATURE_SSE;
362 362 if (prx->xp_hwsup >= XSU_YMM) {
363 363 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX;
364 364 }
365 365
366 366 if (prx->xp_hwsup >= XSU_ZMM) {
367 367 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX512;
368 368 }
369 369 }
370 370
371 371 static void
372 372 xsu_fpu_to_xregs_ymm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
373 373 {
374 374 /* Copy the upper 128-bits to the YMM save area */
375 375 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
376 376 (void) memcpy(&prx->xp_ymm->prx_ymm[i],
377 377 &fpu->xf_reg[i]._l[XSU_XMM_U32],
378 378 XSU_XMM_U32 * sizeof (uint32_t));
379 379 }
380 380 }
381 381
382 382 static void
383 383 xsu_fpu_to_xregs_zmm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
384 384 {
385 385 /* The lower 16 regs are only 256-bit, the upper are 512-bit */
386 386 for (uint32_t i = 0; i < MIN(XSU_MAX_ZMM, 16); i++) {
387 387 (void) memcpy(&prx->xp_zmm->prx_zmm[i],
388 388 &fpu->xf_reg[i]._l[XSU_YMM_U32],
389 389 XSU_YMM_U32 * sizeof (uint32_t));
390 390 }
391 391
392 392 #ifdef __amd64
393 393 for (uint32_t i = 16; i < XSU_MAX_ZMM; i++) {
394 394 (void) memcpy(&prx->xp_hi_zmm->prx_hi_zmm[i - 16],
395 395 &fpu->xf_reg[i]._l[0],
396 396 XSU_ZMM_U32 * sizeof (uint32_t));
397 397 }
398 398 #endif
399 399
400 400 (void) memcpy(prx->xp_opmask->prx_opmask, fpu->xf_opmask,
401 401 sizeof (prx->xp_opmask->prx_opmask));
402 402 }
403 403
404 404
405 405 void
406 406 xsu_fpu_to_xregs(const xsu_fpu_t *fpu, uint32_t hwsup, prxregset_t **prxp,
407 407 size_t *sizep)
408 408 {
409 409 uint32_t ninfo = 1, curinfo;
410 410 size_t len = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) +
411 411 sizeof (prxregset_xsave_t);
412 412 prxregset_hdr_t *hdr;
413 413 uint32_t off;
414 414 xsu_prx_t prx;
415 415
416 416 if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
417 417 errx(EXIT_FAILURE, "given unkonwn xsu level: 0x%x", hwsup);
418 418 }
419 419
420 420 if (hwsup >= XSU_YMM) {
421 421 len += sizeof (prxregset_info_t) + sizeof (prxregset_ymm_t);
422 422 ninfo++;
423 423 }
424 424
425 425 if (hwsup >= XSU_ZMM) {
426 426 len += 3 * sizeof (prxregset_info_t) +
427 427 sizeof (prxregset_opmask_t) + sizeof (prxregset_zmm_t) +
428 428 sizeof (prxregset_hi_zmm_t);
429 429 ninfo += 3;
430 430 }
431 431
432 432 hdr = calloc(1, len);
433 433 if (hdr == NULL) {
434 434 err(EXIT_FAILURE, "failed to allocate prxregset_t (%zu bytes)",
435 435 len);
436 436 }
437 437 (void) memset(&prx, 0, sizeof (prx));
438 438 prx.xp_hwsup = hwsup;
439 439
440 440 #ifdef __amd64
441 441 VERIFY3U(len, <=, UINT32_MAX);
442 442 #endif /* __amd64 */
443 443 hdr->pr_type = PR_TYPE_XSAVE;
444 444 hdr->pr_size = (uint32_t)len;
445 445 hdr->pr_ninfo = ninfo;
446 446
447 447 curinfo = 0;
448 448 off = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) * ninfo;
449 449 hdr->pr_info[curinfo].pri_type = PRX_INFO_XSAVE;
450 450 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_xsave_t);
451 451 hdr->pr_info[curinfo].pri_offset = off;
452 452 prx.xp_xsave = (void *)((uintptr_t)hdr + off);
453 453 off += sizeof (prxregset_xsave_t);
454 454 curinfo++;
455 455
456 456 if (hwsup >= XSU_YMM) {
457 457 hdr->pr_info[curinfo].pri_type = PRX_INFO_YMM;
458 458 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_ymm_t);
459 459 hdr->pr_info[curinfo].pri_offset = off;
460 460 prx.xp_ymm = (void *)((uintptr_t)hdr + off);
461 461 off += sizeof (prxregset_ymm_t);
462 462 curinfo++;
463 463 }
464 464
465 465 if (hwsup >= XSU_ZMM) {
466 466 hdr->pr_info[curinfo].pri_type = PRX_INFO_OPMASK;
467 467 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_opmask_t);
468 468 hdr->pr_info[curinfo].pri_offset = off;
469 469 prx.xp_opmask = (void *)((uintptr_t)hdr + off);
470 470 off += sizeof (prxregset_opmask_t);
471 471 curinfo++;
472 472
473 473 hdr->pr_info[curinfo].pri_type = PRX_INFO_ZMM;
474 474 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_zmm_t);
475 475 hdr->pr_info[curinfo].pri_offset = off;
476 476 prx.xp_zmm = (void *)((uintptr_t)hdr + off);
477 477 off += sizeof (prxregset_zmm_t);
478 478 curinfo++;
479 479
480 480 hdr->pr_info[curinfo].pri_type = PRX_INFO_HI_ZMM;
481 481 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_hi_zmm_t);
482 482 hdr->pr_info[curinfo].pri_offset = off;
483 483 prx.xp_hi_zmm = (void *)((uintptr_t)hdr + off);
484 484 off += sizeof (prxregset_hi_zmm_t);
485 485 curinfo++;
486 486 }
487 487
488 488 xsu_fpu_to_xregs_xsave(&prx, fpu);
489 489 if (hwsup >= XSU_YMM) {
490 490 xsu_fpu_to_xregs_ymm(&prx, fpu);
491 491 }
492 492
493 493 if (hwsup >= XSU_ZMM) {
494 494 xsu_fpu_to_xregs_zmm(&prx, fpu);
495 495 }
496 496
497 497 *prxp = (prxregset_t *)hdr;
498 498 *sizep = len;
499 499 }
500 500
501 501 /*
502 502 * This pairs with xsu_proc_finish() below. The goal is to allow us to inject
503 503 * state after hitting a breakpoint, which is generally used right before
504 504 * something wants to print data.
505 505 */
506 506 void
507 507 xsu_proc_bkpt(xsu_proc_t *xp)
508 508 {
509 509 int perr;
510 510 struct ps_prochandle *P;
511 511 char *const argv[3] = { xp->xp_prog, xp->xp_arg, NULL };
512 512 GElf_Sym sym;
513 513
514 514 P = Pcreate(xp->xp_prog, argv, &perr, NULL, 0);
515 515 if (P == NULL) {
516 516 errx(EXIT_FAILURE, "failed to create %s: %s", xp->xp_prog,
517 517 Pcreate_error(perr));
518 518 }
519 519
520 520 xp->xp_proc = P;
521 521 (void) Punsetflags(P, PR_RLC);
522 522 if (Psetflags(P, PR_KLC | PR_BPTADJ) != 0) {
523 523 int e = errno;
524 524 Prelease(P, PRELEASE_KILL);
525 525 errc(EXIT_FAILURE, e, "failed to set PR_KLC | PR_BPTADJ flags");
526 526 }
527 527
528 528 if (Pxlookup_by_name(P, LM_ID_BASE, xp->xp_object, xp->xp_symname, &sym,
529 529 NULL) != 0) {
530 530 err(EXIT_FAILURE, "failed to find %s`%s", xp->xp_object,
531 531 xp->xp_symname);
532 532 }
533 533
534 534 if (Pfault(P, FLTBPT, 1) != 0) {
535 535 errx(EXIT_FAILURE, "failed to set the FLTBPT disposition");
536 536 }
537 537
538 538 xp->xp_addr = sym.st_value;
539 539 if (Psetbkpt(P, sym.st_value, &xp->xp_instr) != 0) {
540 540 err(EXIT_FAILURE, "failed to set breakpoint on xsu_getfpu "
541 541 "(0x%" PRIx64 ")", sym.st_value);
542 542 }
543 543
544 544 if (Psetrun(P, 0, 0) != 0) {
545 545 err(EXIT_FAILURE, "failed to resume running our target");
546 546 }
547 547
548 548 if (Pwait(P, xsu_proc_timeout) != 0) {
549 549 err(EXIT_FAILURE, "%s did not hit our expected breakpoint",
550 550 argv[1]);
551 551 }
552 552 }
553 553
554 554 /*
555 555 * Run a process to completion and get its wait exit status.
556 556 */
557 557 void
558 558 xsu_proc_finish(xsu_proc_t *xp)
559 559 {
560 560 pid_t pid = Ppsinfo(xp->xp_proc)->pr_pid;
561 561
562 562 if (Pdelbkpt(xp->xp_proc, xp->xp_addr, xp->xp_instr) != 0) {
563 563 err(EXIT_FAILURE, "failed to delete %s`%s() breakpoint",
564 564 xp->xp_object, xp->xp_symname);
565 565 }
566 566
567 567 if (Psetrun(xp->xp_proc, 0, PRCFAULT) != 0) {
568 568 err(EXIT_FAILURE, "failed to resume running our target");
569 569 }
570 570
571 571 if (waitpid(pid, &xp->xp_wait, 0) != pid) {
572 572 err(EXIT_FAILURE, "failed to get our child processes's wait "
573 573 "info", pid);
574 574 }
575 575
576 576 if (WIFEXITED(xp->xp_wait) == 0) {
577 577 errx(EXIT_FAILURE, "our child process didn't actually exit!");
578 578 }
579 579
580 580 Pfree(xp->xp_proc);
581 581 xp->xp_proc = NULL;
582 582 }
583 583
584 584 void
585 585 xsu_fpregset_xmm_set(fpregset_t *fpr, uint32_t seed)
586 586 {
587 587 size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
588 588 for (uint32_t i = 0; i < nregs; i++) {
589 589 upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
590 590 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
591 591 u128->_l[u32] = seed;
592 592 }
593 593 }
594 594 }
595 595
596 596 void
597 597 xsu_xregs_xmm_set(prxregset_t *prx, uint32_t seed)
598 598 {
599 599 prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
600 600 prxregset_xsave_t *xsave = NULL;
601 601
602 602 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
603 603 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
604 604 xsave = (void *)((uintptr_t)prx +
605 605 hdr->pr_info[i].pri_offset);
606 606 break;
607 607 }
608 608 }
609 609
610 610 if (xsave == NULL) {
611 611 errx(EXIT_FAILURE, "asked to set xsave %%xmm regs, but no "
612 612 "xsave info present");
613 613 }
614 614
615 615 size_t nregs = ARRAY_SIZE(xsave->prx_fx_xmm);
616 616 for (uint32_t i = 0; i < nregs; i++) {
617 617 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
618 618 xsave->prx_fx_xmm[i]._l[u32] = seed;
619 619 }
620 620 }
621 621 }
622 622
623 623 static const prxregset_info_t *
624 624 xsu_xregs_find_comp(const prxregset_hdr_t *hdr, uint32_t comp, uintptr_t *datap)
625 625 {
626 626 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
627 627 if (hdr->pr_info[i].pri_type == comp) {
628 628 *datap = (uintptr_t)hdr + hdr->pr_info[i].pri_offset;
629 629 return (&hdr->pr_info[i]);
630 630 }
631 631 }
632 632
633 633 return (NULL);
634 634 }
635 635
636 636 boolean_t
637 637 xsu_xregs_comp_equal(const prxregset_t *src, const prxregset_t *dest,
638 638 uint32_t comp)
639 639 {
640 640 const prxregset_hdr_t *shdr = (prxregset_hdr_t *)src;
641 641 const prxregset_hdr_t *dhdr = (prxregset_hdr_t *)dest;
642 642 const prxregset_info_t *sinfo = NULL, *dinfo = NULL;
643 643 uintptr_t sdata, ddata;
644 644
645 645 sinfo = xsu_xregs_find_comp(shdr, comp, &sdata);
646 646 if (sinfo == NULL) {
647 647 warnx("source xregs missing component %u", comp);
648 648 return (B_FALSE);
649 649 }
650 650
651 651 dinfo = xsu_xregs_find_comp(dhdr, comp, &ddata);
652 652 if (dinfo == NULL) {
653 653 warnx("destination xregs missing component %u", comp);
654 654 return (B_FALSE);
655 655 }
656 656
657 657 if (sinfo->pri_size != dinfo->pri_size) {
658 658 warnx("source xregs length 0x%x does not match dest xregs 0x%x",
659 659 sinfo->pri_size, dinfo->pri_size);
660 660 }
661 661
662 662 if (bcmp((void *)sdata, (void *)ddata, sinfo->pri_size) != 0) {
663 663 warnx("component data differs: dumping!");
664 664 for (uint32_t i = 0; i < sinfo->pri_offset; i++) {
665 665 const uint8_t *su8 = (uint8_t *)sdata;
666 666 const uint8_t *du8 = (uint8_t *)ddata;
667 667
668 668 if (su8[i] != du8[i]) {
669 669 (void) fprintf(stderr,
670 670 "src[%u] = 0x%2x\tdst[%u] = 0x%x\n",
671 671 i, su8[i], i, du8[i]);
672 672 }
673 673 }
674 674
675 675 return (B_FALSE);
676 676 }
677 677
678 678 return (B_TRUE);
679 679 }
680 680
681 681 boolean_t
682 682 xsu_fpregs_cmp(const fpregset_t *fpr, const prxregset_t *prx)
683 683 {
684 684 boolean_t valid = B_TRUE;
685 685 const prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
686 686 const prxregset_xsave_t *xsave = NULL;
687 687 uint16_t fpr_cw, fpr_sw;
688 688
689 689 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
690 690 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
691 691 xsave = (void *)((uintptr_t)prx +
692 692 hdr->pr_info[i].pri_offset);
693 693 break;
694 694 }
695 695 }
696 696
697 697 if (xsave == NULL) {
698 698 warnx("xregs missing xsave component for fpregs comparison");
699 699 return (B_FALSE);
700 700 }
701 701
702 702 /*
703 703 * First check the XMM registers because those don't require ifdefs,
704 704 * thankfully.
705 705 */
706 706 size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
707 707 for (size_t i = 0; i < nregs; i++) {
708 708 const upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
709 709 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++) {
710 710 if (u128->_l[u32] != xsave->prx_fx_xmm[i]._l[u32]) {
711 711 valid = B_FALSE;
712 712 (void) fprintf(stderr, "fpregset xmm[%u] "
713 713 "u32[%u] does not match xsave, fpregset: "
714 714 "0x%x, xsave: 0x%x\n", i, u32,
715 715 u128->_l[u32],
716 716 xsave->prx_fx_xmm[i]._l[u32]);
717 717 }
718 718 }
719 719 }
720 720
721 721 if (xsave->prx_fx_mxcsr != fpr->fp_reg_set.fpchip_state.mxcsr) {
722 722 valid = B_FALSE;
723 723 (void) fprintf(stderr, "mxcsr mismatched: fpregset: 0x%x, "
724 724 "xsave: 0x%x\n", fpr->fp_reg_set.fpchip_state.mxcsr,
725 725 xsave->prx_fx_mxcsr);
726 726 }
727 727
728 728 /*
729 729 * Extract the basic x87 state. This requires ifdefs because the 32-bit
730 730 * ABI here is a bit, particular. The 32-bit fpregs is the mcontext_t
731 731 * struct which is mostly opaque and we need to use the ieeefp.h types
732 732 * which are only visible for ILP32. It also treats 16-bit values as
733 733 * 32-bit ones, hence masking below.
734 734 */
735 735 #ifdef __amd64
736 736 fpr_cw = fpr->fp_reg_set.fpchip_state.cw;
737 737 fpr_sw = fpr->fp_reg_set.fpchip_state.sw;
738 738 #else /* !__amd64 (__i386) */
739 739 struct _fpstate fps;
740 740
741 741 (void) memcpy(&fps, &fpr->fp_reg_set.fpchip_state, sizeof (fps));
742 742 fpr_cw = fps.cw & 0xffff;
743 743 fpr_sw = fps.sw & 0xffff;
744 744 #endif /* __amd64 */
745 745
746 746 if (fpr_cw != xsave->prx_fx_fcw) {
747 747 valid = B_FALSE;
748 748 (void) fprintf(stderr, "x87 cw mismatched: fpregset: 0x%x, "
749 749 "xsave: 0x%x\n", fpr_cw, xsave->prx_fx_fcw);
750 750 }
751 751
752 752 if (fpr_sw != xsave->prx_fx_fsw) {
753 753 valid = B_FALSE;
754 754 (void) fprintf(stderr, "x87 sw mismatched: fpregset: 0x%x, "
755 755 "xsave: 0x%x\n", fpr_sw, xsave->prx_fx_fsw);
756 756 }
757 757
758 758 return (valid);
759 759 }
|
↓ open down ↓ |
759 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX