Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/i86pc/os/trap.c
+++ new/usr/src/uts/i86pc/os/trap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 28 /* All Rights Reserved */
29 29 /* */
30 30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 31 /* All Rights Reserved */
32 32 /* */
33 33
34 34 /*
35 35 * Copyright 2015 Joyent, Inc.
36 36 */
37 37
38 38 #include <sys/types.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/param.h>
41 41 #include <sys/signal.h>
42 42 #include <sys/systm.h>
43 43 #include <sys/user.h>
44 44 #include <sys/proc.h>
45 45 #include <sys/disp.h>
46 46 #include <sys/class.h>
47 47 #include <sys/core.h>
48 48 #include <sys/syscall.h>
49 49 #include <sys/cpuvar.h>
50 50 #include <sys/vm.h>
51 51 #include <sys/sysinfo.h>
52 52 #include <sys/fault.h>
53 53 #include <sys/stack.h>
54 54 #include <sys/psw.h>
55 55 #include <sys/regset.h>
56 56 #include <sys/fp.h>
57 57 #include <sys/trap.h>
58 58 #include <sys/kmem.h>
59 59 #include <sys/vtrace.h>
60 60 #include <sys/cmn_err.h>
61 61 #include <sys/prsystm.h>
62 62 #include <sys/mutex_impl.h>
63 63 #include <sys/machsystm.h>
64 64 #include <sys/archsystm.h>
65 65 #include <sys/sdt.h>
66 66 #include <sys/avintr.h>
67 67 #include <sys/kobj.h>
68 68
69 69 #include <vm/hat.h>
70 70
71 71 #include <vm/seg_kmem.h>
72 72 #include <vm/as.h>
73 73 #include <vm/seg.h>
74 74 #include <vm/hat_pte.h>
75 75 #include <vm/hat_i86.h>
76 76
77 77 #include <sys/procfs.h>
78 78
79 79 #include <sys/reboot.h>
80 80 #include <sys/debug.h>
81 81 #include <sys/debugreg.h>
82 82 #include <sys/modctl.h>
83 83 #include <sys/aio_impl.h>
84 84 #include <sys/tnf.h>
85 85 #include <sys/tnf_probe.h>
86 86 #include <sys/cred.h>
87 87 #include <sys/mman.h>
88 88 #include <sys/x86_archext.h>
89 89 #include <sys/copyops.h>
90 90 #include <c2/audit.h>
91 91 #include <sys/ftrace.h>
92 92 #include <sys/panic.h>
93 93 #include <sys/traptrace.h>
94 94 #include <sys/ontrap.h>
95 95 #include <sys/cpc_impl.h>
96 96 #include <sys/bootconf.h>
97 97 #include <sys/bootinfo.h>
98 98 #include <sys/promif.h>
99 99 #include <sys/mach_mmu.h>
100 100 #if defined(__xpv)
101 101 #include <sys/hypervisor.h>
102 102 #endif
103 103 #include <sys/contract/process_impl.h>
104 104 #include <sys/brand.h>
105 105
106 106 #define USER 0x10000 /* user-mode flag added to trap type */
107 107
108 108 static const char *trap_type_mnemonic[] = {
109 109 "de", "db", "2", "bp",
110 110 "of", "br", "ud", "nm",
111 111 "df", "9", "ts", "np",
112 112 "ss", "gp", "pf", "15",
113 113 "mf", "ac", "mc", "xf"
114 114 };
115 115
116 116 static const char *trap_type[] = {
117 117 "Divide error", /* trap id 0 */
118 118 "Debug", /* trap id 1 */
119 119 "NMI interrupt", /* trap id 2 */
120 120 "Breakpoint", /* trap id 3 */
121 121 "Overflow", /* trap id 4 */
122 122 "BOUND range exceeded", /* trap id 5 */
123 123 "Invalid opcode", /* trap id 6 */
124 124 "Device not available", /* trap id 7 */
125 125 "Double fault", /* trap id 8 */
126 126 "Coprocessor segment overrun", /* trap id 9 */
127 127 "Invalid TSS", /* trap id 10 */
128 128 "Segment not present", /* trap id 11 */
129 129 "Stack segment fault", /* trap id 12 */
130 130 "General protection", /* trap id 13 */
131 131 "Page fault", /* trap id 14 */
132 132 "Reserved", /* trap id 15 */
133 133 "x87 floating point error", /* trap id 16 */
134 134 "Alignment check", /* trap id 17 */
135 135 "Machine check", /* trap id 18 */
136 136 "SIMD floating point exception", /* trap id 19 */
137 137 };
138 138
139 139 #define TRAP_TYPES (sizeof (trap_type) / sizeof (trap_type[0]))
140 140
141 141 #define SLOW_SCALL_SIZE 2
142 142 #define FAST_SCALL_SIZE 2
143 143
144 144 int tudebug = 0;
145 145 int tudebugbpt = 0;
146 146 int tudebugfpe = 0;
147 147 int tudebugsse = 0;
148 148
149 149 #if defined(TRAPDEBUG) || defined(lint)
150 150 int tdebug = 0;
151 151 int lodebug = 0;
152 152 int faultdebug = 0;
153 153 #else
154 154 #define tdebug 0
155 155 #define lodebug 0
156 156 #define faultdebug 0
157 157 #endif /* defined(TRAPDEBUG) || defined(lint) */
158 158
159 159 #if defined(TRAPTRACE)
160 160 /*
161 161 * trap trace record for cpu0 is allocated here.
162 162 * trap trace records for non-boot cpus are allocated in mp_startup_init().
163 163 */
164 164 static trap_trace_rec_t trap_tr0[TRAPTR_NENT];
165 165 trap_trace_ctl_t trap_trace_ctl[NCPU] = {
166 166 {
167 167 (uintptr_t)trap_tr0, /* next record */
168 168 (uintptr_t)trap_tr0, /* first record */
169 169 (uintptr_t)(trap_tr0 + TRAPTR_NENT), /* limit */
170 170 (uintptr_t)0 /* current */
171 171 },
172 172 };
173 173
174 174 /*
175 175 * default trap buffer size
176 176 */
177 177 size_t trap_trace_bufsize = TRAPTR_NENT * sizeof (trap_trace_rec_t);
178 178 int trap_trace_freeze = 0;
179 179 int trap_trace_off = 0;
180 180
181 181 /*
182 182 * A dummy TRAPTRACE entry to use after death.
183 183 */
184 184 trap_trace_rec_t trap_trace_postmort;
185 185
186 186 static void dump_ttrace(void);
187 187 #endif /* TRAPTRACE */
188 188 static void dumpregs(struct regs *);
189 189 static void showregs(uint_t, struct regs *, caddr_t);
190 190 static int kern_gpfault(struct regs *);
191 191
192 192 /*ARGSUSED*/
193 193 static int
194 194 die(uint_t type, struct regs *rp, caddr_t addr, processorid_t cpuid)
195 195 {
196 196 struct panic_trap_info ti;
197 197 const char *trap_name, *trap_mnemonic;
198 198
199 199 if (type < TRAP_TYPES) {
200 200 trap_name = trap_type[type];
201 201 trap_mnemonic = trap_type_mnemonic[type];
202 202 } else {
203 203 trap_name = "trap";
204 204 trap_mnemonic = "-";
205 205 }
206 206
207 207 #ifdef TRAPTRACE
208 208 TRAPTRACE_FREEZE;
209 209 #endif
210 210
211 211 ti.trap_regs = rp;
212 212 ti.trap_type = type & ~USER;
213 213 ti.trap_addr = addr;
214 214
215 215 curthread->t_panic_trap = &ti;
216 216
217 217 if (type == T_PGFLT && addr < (caddr_t)KERNELBASE) {
218 218 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p "
219 219 "occurred in module \"%s\" due to %s",
220 220 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr,
221 221 mod_containing_pc((caddr_t)rp->r_pc),
222 222 addr < (caddr_t)PAGESIZE ?
223 223 "a NULL pointer dereference" :
224 224 "an illegal access to a user address");
225 225 } else
226 226 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p",
227 227 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr);
228 228 return (0);
229 229 }
230 230
231 231 /*
232 232 * Rewrite the instruction at pc to be an int $T_SYSCALLINT instruction.
233 233 *
234 234 * int <vector> is two bytes: 0xCD <vector>
235 235 */
236 236
237 237 static int
238 238 rewrite_syscall(caddr_t pc)
239 239 {
240 240 uchar_t instr[SLOW_SCALL_SIZE] = { 0xCD, T_SYSCALLINT };
241 241
242 242 if (uwrite(curthread->t_procp, instr, SLOW_SCALL_SIZE,
243 243 (uintptr_t)pc) != 0)
244 244 return (1);
245 245
246 246 return (0);
247 247 }
248 248
249 249 /*
250 250 * Test to see if the instruction at pc is sysenter or syscall. The second
251 251 * argument should be the x86 feature flag corresponding to the expected
252 252 * instruction.
253 253 *
254 254 * sysenter is two bytes: 0x0F 0x34
255 255 * syscall is two bytes: 0x0F 0x05
256 256 * int $T_SYSCALLINT is two bytes: 0xCD 0x91
257 257 */
258 258
259 259 static int
260 260 instr_is_other_syscall(caddr_t pc, int which)
261 261 {
262 262 uchar_t instr[FAST_SCALL_SIZE];
263 263
264 264 ASSERT(which == X86FSET_SEP || which == X86FSET_ASYSC || which == 0xCD);
265 265
266 266 if (copyin_nowatch(pc, (caddr_t)instr, FAST_SCALL_SIZE) != 0)
267 267 return (0);
268 268
269 269 switch (which) {
270 270 case X86FSET_SEP:
271 271 if (instr[0] == 0x0F && instr[1] == 0x34)
272 272 return (1);
273 273 break;
274 274 case X86FSET_ASYSC:
275 275 if (instr[0] == 0x0F && instr[1] == 0x05)
276 276 return (1);
277 277 break;
278 278 case 0xCD:
279 279 if (instr[0] == 0xCD && instr[1] == T_SYSCALLINT)
280 280 return (1);
281 281 break;
282 282 }
283 283
284 284 return (0);
285 285 }
286 286
287 287 static const char *
288 288 syscall_insn_string(int syscall_insn)
289 289 {
290 290 switch (syscall_insn) {
291 291 case X86FSET_SEP:
292 292 return ("sysenter");
293 293 case X86FSET_ASYSC:
294 294 return ("syscall");
295 295 case 0xCD:
296 296 return ("int");
297 297 default:
298 298 return ("Unknown");
299 299 }
300 300 }
301 301
302 302 static int
303 303 ldt_rewrite_syscall(struct regs *rp, proc_t *p, int syscall_insn)
304 304 {
305 305 caddr_t linearpc;
306 306 int return_code = 0;
307 307
308 308 mutex_enter(&p->p_ldtlock); /* Must be held across linear_pc() */
309 309
310 310 if (linear_pc(rp, p, &linearpc) == 0) {
311 311
312 312 /*
313 313 * If another thread beat us here, it already changed
314 314 * this site to the slower (int) syscall instruction.
315 315 */
316 316 if (instr_is_other_syscall(linearpc, 0xCD)) {
317 317 return_code = 1;
318 318 } else if (instr_is_other_syscall(linearpc, syscall_insn)) {
319 319
320 320 if (rewrite_syscall(linearpc) == 0) {
321 321 return_code = 1;
322 322 }
323 323 #ifdef DEBUG
324 324 else
325 325 cmn_err(CE_WARN, "failed to rewrite %s "
326 326 "instruction in process %d",
327 327 syscall_insn_string(syscall_insn),
328 328 p->p_pid);
329 329 #endif /* DEBUG */
330 330 }
331 331 }
332 332
333 333 mutex_exit(&p->p_ldtlock); /* Must be held across linear_pc() */
334 334
335 335 return (return_code);
336 336 }
337 337
338 338 /*
339 339 * Test to see if the instruction at pc is a system call instruction.
340 340 *
341 341 * The bytes of an lcall instruction used for the syscall trap.
342 342 * static uchar_t lcall[7] = { 0x9a, 0, 0, 0, 0, 0x7, 0 };
343 343 * static uchar_t lcallalt[7] = { 0x9a, 0, 0, 0, 0, 0x27, 0 };
344 344 */
345 345
346 346 #define LCALLSIZE 7
347 347
348 348 static int
349 349 instr_is_lcall_syscall(caddr_t pc)
350 350 {
351 351 uchar_t instr[LCALLSIZE];
352 352
353 353 if (copyin_nowatch(pc, (caddr_t)instr, LCALLSIZE) == 0 &&
354 354 instr[0] == 0x9a &&
355 355 instr[1] == 0 &&
356 356 instr[2] == 0 &&
357 357 instr[3] == 0 &&
358 358 instr[4] == 0 &&
359 359 (instr[5] == 0x7 || instr[5] == 0x27) &&
360 360 instr[6] == 0)
361 361 return (1);
362 362
363 363 return (0);
364 364 }
365 365
366 366 #ifdef __amd64
367 367
368 368 /*
369 369 * In the first revisions of amd64 CPUs produced by AMD, the LAHF and
370 370 * SAHF instructions were not implemented in 64-bit mode. Later revisions
371 371 * did implement these instructions. An extension to the cpuid instruction
372 372 * was added to check for the capability of executing these instructions
373 373 * in 64-bit mode.
374 374 *
375 375 * Intel originally did not implement these instructions in EM64T either,
376 376 * but added them in later revisions.
377 377 *
378 378 * So, there are different chip revisions by both vendors out there that
379 379 * may or may not implement these instructions. The easy solution is to
380 380 * just always emulate these instructions on demand.
381 381 *
382 382 * SAHF == store %ah in the lower 8 bits of %rflags (opcode 0x9e)
383 383 * LAHF == load the lower 8 bits of %rflags into %ah (opcode 0x9f)
384 384 */
385 385
386 386 #define LSAHFSIZE 1
387 387
388 388 static int
389 389 instr_is_lsahf(caddr_t pc, uchar_t *instr)
390 390 {
391 391 if (copyin_nowatch(pc, (caddr_t)instr, LSAHFSIZE) == 0 &&
392 392 (*instr == 0x9e || *instr == 0x9f))
393 393 return (1);
394 394 return (0);
395 395 }
396 396
397 397 /*
398 398 * Emulate the LAHF and SAHF instructions. The reference manuals define
399 399 * these instructions to always load/store bit 1 as a 1, and bits 3 and 5
400 400 * as a 0. The other, defined, bits are copied (the PS_ICC bits and PS_P).
401 401 *
402 402 * Note that %ah is bits 8-15 of %rax.
403 403 */
404 404 static void
405 405 emulate_lsahf(struct regs *rp, uchar_t instr)
406 406 {
407 407 if (instr == 0x9e) {
408 408 /* sahf. Copy bits from %ah to flags. */
409 409 rp->r_ps = (rp->r_ps & ~0xff) |
410 410 ((rp->r_rax >> 8) & PSL_LSAHFMASK) | PS_MB1;
411 411 } else {
412 412 /* lahf. Copy bits from flags to %ah. */
413 413 rp->r_rax = (rp->r_rax & ~0xff00) |
414 414 (((rp->r_ps & PSL_LSAHFMASK) | PS_MB1) << 8);
415 415 }
416 416 rp->r_pc += LSAHFSIZE;
417 417 }
418 418 #endif /* __amd64 */
419 419
420 420 #ifdef OPTERON_ERRATUM_91
421 421
422 422 /*
423 423 * Test to see if the instruction at pc is a prefetch instruction.
424 424 *
425 425 * The first byte of prefetch instructions is always 0x0F.
426 426 * The second byte is 0x18 for regular prefetch or 0x0D for AMD 3dnow prefetch.
427 427 * The third byte (ModRM) contains the register field bits (bits 3-5).
428 428 * These bits must be between 0 and 3 inclusive for regular prefetch and
429 429 * 0 and 1 inclusive for AMD 3dnow prefetch.
430 430 *
431 431 * In 64-bit mode, there may be a one-byte REX prefex (0x40-0x4F).
432 432 */
433 433
434 434 static int
435 435 cmp_to_prefetch(uchar_t *p)
436 436 {
437 437 #ifdef _LP64
438 438 if ((p[0] & 0xF0) == 0x40) /* 64-bit REX prefix */
439 439 p++;
440 440 #endif
441 441 return ((p[0] == 0x0F && p[1] == 0x18 && ((p[2] >> 3) & 7) <= 3) ||
442 442 (p[0] == 0x0F && p[1] == 0x0D && ((p[2] >> 3) & 7) <= 1));
443 443 }
444 444
445 445 static int
446 446 instr_is_prefetch(caddr_t pc)
447 447 {
448 448 uchar_t instr[4]; /* optional REX prefix plus 3-byte opcode */
449 449
450 450 return (copyin_nowatch(pc, instr, sizeof (instr)) == 0 &&
451 451 cmp_to_prefetch(instr));
452 452 }
453 453
454 454 #endif /* OPTERON_ERRATUM_91 */
455 455
456 456 /*
457 457 * Called from the trap handler when a processor trap occurs.
458 458 *
459 459 * Note: All user-level traps that might call stop() must exit
460 460 * trap() by 'goto out' or by falling through.
461 461 * Note Also: trap() is usually called with interrupts enabled, (PS_IE == 1)
462 462 * however, there are paths that arrive here with PS_IE == 0 so special care
463 463 * must be taken in those cases.
464 464 */
465 465 void
466 466 trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
467 467 {
468 468 kthread_t *ct = curthread;
469 469 enum seg_rw rw;
470 470 unsigned type;
471 471 proc_t *p = ttoproc(ct);
472 472 klwp_t *lwp = ttolwp(ct);
473 473 uintptr_t lofault;
474 474 label_t *onfault;
475 475 faultcode_t pagefault(), res, errcode;
476 476 enum fault_type fault_type;
477 477 k_siginfo_t siginfo;
478 478 uint_t fault = 0;
479 479 int mstate;
480 480 int sicode = 0;
481 481 int watchcode;
482 482 int watchpage;
483 483 caddr_t vaddr;
484 484 int singlestep_twiddle;
485 485 size_t sz;
486 486 int ta;
487 487 #ifdef __amd64
488 488 uchar_t instr;
489 489 #endif
490 490
491 491 ASSERT_STACK_ALIGNED();
492 492
493 493 type = rp->r_trapno;
494 494 CPU_STATS_ADDQ(CPU, sys, trap, 1);
495 495 ASSERT(ct->t_schedflag & TS_DONT_SWAP);
496 496
497 497 if (type == T_PGFLT) {
498 498
499 499 errcode = rp->r_err;
500 500 if (errcode & PF_ERR_WRITE)
501 501 rw = S_WRITE;
502 502 else if ((caddr_t)rp->r_pc == addr ||
503 503 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC)))
504 504 rw = S_EXEC;
505 505 else
506 506 rw = S_READ;
507 507
508 508 #if defined(__i386)
509 509 /*
510 510 * Pentium Pro work-around
511 511 */
512 512 if ((errcode & PF_ERR_PROT) && pentiumpro_bug4046376) {
513 513 uint_t attr;
514 514 uint_t priv_violation;
515 515 uint_t access_violation;
516 516
517 517 if (hat_getattr(addr < (caddr_t)kernelbase ?
518 518 curproc->p_as->a_hat : kas.a_hat, addr, &attr)
519 519 == -1) {
520 520 errcode &= ~PF_ERR_PROT;
521 521 } else {
522 522 priv_violation = (errcode & PF_ERR_USER) &&
523 523 !(attr & PROT_USER);
524 524 access_violation = (errcode & PF_ERR_WRITE) &&
525 525 !(attr & PROT_WRITE);
526 526 if (!priv_violation && !access_violation)
527 527 goto cleanup;
528 528 }
529 529 }
530 530 #endif /* __i386 */
531 531
532 532 } else if (type == T_SGLSTP && lwp != NULL)
533 533 lwp->lwp_pcb.pcb_drstat = (uintptr_t)addr;
534 534
535 535 if (tdebug)
536 536 showregs(type, rp, addr);
537 537
538 538 if (USERMODE(rp->r_cs)) {
539 539 /*
540 540 * Set up the current cred to use during this trap. u_cred
541 541 * no longer exists. t_cred is used instead.
542 542 * The current process credential applies to the thread for
543 543 * the entire trap. If trapping from the kernel, this
544 544 * should already be set up.
545 545 */
546 546 if (ct->t_cred != p->p_cred) {
547 547 cred_t *oldcred = ct->t_cred;
548 548 /*
549 549 * DTrace accesses t_cred in probe context. t_cred
550 550 * must always be either NULL, or point to a valid,
551 551 * allocated cred structure.
552 552 */
553 553 ct->t_cred = crgetcred();
554 554 crfree(oldcred);
555 555 }
556 556 ASSERT(lwp != NULL);
557 557 type |= USER;
558 558 ASSERT(lwptoregs(lwp) == rp);
559 559 lwp->lwp_state = LWP_SYS;
560 560
561 561 switch (type) {
562 562 case T_PGFLT + USER:
563 563 if ((caddr_t)rp->r_pc == addr)
564 564 mstate = LMS_TFAULT;
565 565 else
566 566 mstate = LMS_DFAULT;
567 567 break;
568 568 default:
569 569 mstate = LMS_TRAP;
570 570 break;
571 571 }
572 572 /* Kernel probe */
573 573 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
574 574 tnf_microstate, state, mstate);
575 575 mstate = new_mstate(ct, mstate);
576 576
577 577 bzero(&siginfo, sizeof (siginfo));
578 578 }
579 579
580 580 switch (type) {
581 581 case T_PGFLT + USER:
582 582 case T_SGLSTP:
583 583 case T_SGLSTP + USER:
584 584 case T_BPTFLT + USER:
585 585 break;
586 586
587 587 default:
588 588 FTRACE_2("trap(): type=0x%lx, regs=0x%lx",
589 589 (ulong_t)type, (ulong_t)rp);
590 590 break;
591 591 }
592 592
593 593 switch (type) {
594 594 case T_SIMDFPE:
595 595 /* Make sure we enable interrupts before die()ing */
596 596 sti(); /* The SIMD exception comes in via cmninttrap */
597 597 /*FALLTHROUGH*/
598 598 default:
599 599 if (type & USER) {
600 600 if (tudebug)
601 601 showregs(type, rp, (caddr_t)0);
602 602 printf("trap: Unknown trap type %d in user mode\n",
603 603 type & ~USER);
604 604 siginfo.si_signo = SIGILL;
605 605 siginfo.si_code = ILL_ILLTRP;
606 606 siginfo.si_addr = (caddr_t)rp->r_pc;
607 607 siginfo.si_trapno = type & ~USER;
608 608 fault = FLTILL;
609 609 break;
610 610 } else {
611 611 (void) die(type, rp, addr, cpuid);
612 612 /*NOTREACHED*/
613 613 }
614 614
615 615 case T_PGFLT: /* system page fault */
616 616 /*
617 617 * If we're under on_trap() protection (see <sys/ontrap.h>),
618 618 * set ot_trap and bounce back to the on_trap() call site
619 619 * via the installed trampoline.
620 620 */
621 621 if ((ct->t_ontrap != NULL) &&
622 622 (ct->t_ontrap->ot_prot & OT_DATA_ACCESS)) {
623 623 ct->t_ontrap->ot_trap |= OT_DATA_ACCESS;
624 624 rp->r_pc = ct->t_ontrap->ot_trampoline;
625 625 goto cleanup;
626 626 }
627 627
628 628 /*
629 629 * If we have an Instruction fault in kernel mode, then that
630 630 * means we've tried to execute a user page (SMEP) or both of
631 631 * PAE and NXE are enabled. In either case, given that it's a
632 632 * kernel fault, we should panic immediately and not try to make
633 633 * any more forward progress. This indicates a bug in the
634 634 * kernel, which if execution continued, could be exploited to
635 635 * wreak havoc on the system.
636 636 */
637 637 if (errcode & PF_ERR_EXEC) {
638 638 (void) die(type, rp, addr, cpuid);
639 639 }
640 640
641 641 /*
642 642 * We need to check if SMAP is in play. If SMAP is in play, then
643 643 * any access to a user page will show up as a protection
644 644 * violation. To see if SMAP is enabled we first check if it's a
645 645 * user address and whether we have the feature flag set. If we
646 646 * do and the interrupted registers do not allow for user
647 647 * accesses (PS_ACHK is not enabled), then we need to die
648 648 * immediately.
649 649 */
650 650 if (addr < (caddr_t)kernelbase &&
651 651 is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE &&
652 652 (rp->r_ps & PS_ACHK) == 0) {
653 653 (void) die(type, rp, addr, cpuid);
654 654 }
655 655
656 656 /*
657 657 * See if we can handle as pagefault. Save lofault and onfault
658 658 * across this. Here we assume that an address less than
659 659 * KERNELBASE is a user fault. We can do this as copy.s
660 660 * routines verify that the starting address is less than
661 661 * KERNELBASE before starting and because we know that we
662 662 * always have KERNELBASE mapped as invalid to serve as a
663 663 * "barrier".
664 664 */
665 665 lofault = ct->t_lofault;
666 666 onfault = ct->t_onfault;
667 667 ct->t_lofault = 0;
668 668
669 669 mstate = new_mstate(ct, LMS_KFAULT);
670 670
671 671 if (addr < (caddr_t)kernelbase) {
672 672 res = pagefault(addr,
673 673 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 0);
674 674 if (res == FC_NOMAP &&
675 675 addr < p->p_usrstack &&
676 676 grow(addr))
677 677 res = 0;
678 678 } else {
679 679 res = pagefault(addr,
680 680 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 1);
681 681 }
682 682 (void) new_mstate(ct, mstate);
683 683
684 684 /*
685 685 * Restore lofault and onfault. If we resolved the fault, exit.
686 686 * If we didn't and lofault wasn't set, die.
687 687 */
688 688 ct->t_lofault = lofault;
689 689 ct->t_onfault = onfault;
690 690 if (res == 0)
691 691 goto cleanup;
692 692
693 693 #if defined(OPTERON_ERRATUM_93) && defined(_LP64)
694 694 if (lofault == 0 && opteron_erratum_93) {
695 695 /*
696 696 * Workaround for Opteron Erratum 93. On return from
697 697 * a System Managment Interrupt at a HLT instruction
698 698 * the %rip might be truncated to a 32 bit value.
699 699 * BIOS is supposed to fix this, but some don't.
700 700 * If this occurs we simply restore the high order bits.
701 701 * The HLT instruction is 1 byte of 0xf4.
702 702 */
703 703 uintptr_t rip = rp->r_pc;
704 704
705 705 if ((rip & 0xfffffffful) == rip) {
706 706 rip |= 0xfffffffful << 32;
707 707 if (hat_getpfnum(kas.a_hat, (caddr_t)rip) !=
708 708 PFN_INVALID &&
709 709 (*(uchar_t *)rip == 0xf4 ||
710 710 *(uchar_t *)(rip - 1) == 0xf4)) {
711 711 rp->r_pc = rip;
712 712 goto cleanup;
713 713 }
714 714 }
715 715 }
716 716 #endif /* OPTERON_ERRATUM_93 && _LP64 */
717 717
718 718 #ifdef OPTERON_ERRATUM_91
719 719 if (lofault == 0 && opteron_erratum_91) {
720 720 /*
721 721 * Workaround for Opteron Erratum 91. Prefetches may
722 722 * generate a page fault (they're not supposed to do
723 723 * that!). If this occurs we simply return back to the
724 724 * instruction.
725 725 */
726 726 caddr_t pc = (caddr_t)rp->r_pc;
727 727
728 728 /*
729 729 * If the faulting PC is not mapped, this is a
730 730 * legitimate kernel page fault that must result in a
731 731 * panic. If the faulting PC is mapped, it could contain
732 732 * a prefetch instruction. Check for that here.
733 733 */
734 734 if (hat_getpfnum(kas.a_hat, pc) != PFN_INVALID) {
735 735 if (cmp_to_prefetch((uchar_t *)pc)) {
736 736 #ifdef DEBUG
737 737 cmn_err(CE_WARN, "Opteron erratum 91 "
738 738 "occurred: kernel prefetch"
739 739 " at %p generated a page fault!",
740 740 (void *)rp->r_pc);
741 741 #endif /* DEBUG */
742 742 goto cleanup;
743 743 }
744 744 }
745 745 (void) die(type, rp, addr, cpuid);
746 746 }
747 747 #endif /* OPTERON_ERRATUM_91 */
748 748
749 749 if (lofault == 0)
750 750 (void) die(type, rp, addr, cpuid);
751 751
752 752 /*
753 753 * Cannot resolve fault. Return to lofault.
754 754 */
755 755 if (lodebug) {
756 756 showregs(type, rp, addr);
757 757 traceregs(rp);
758 758 }
759 759 if (FC_CODE(res) == FC_OBJERR)
760 760 res = FC_ERRNO(res);
761 761 else
762 762 res = EFAULT;
763 763 rp->r_r0 = res;
764 764 rp->r_pc = ct->t_lofault;
765 765 goto cleanup;
766 766
767 767 case T_PGFLT + USER: /* user page fault */
768 768 if (faultdebug) {
769 769 char *fault_str;
770 770
771 771 switch (rw) {
772 772 case S_READ:
773 773 fault_str = "read";
774 774 break;
775 775 case S_WRITE:
776 776 fault_str = "write";
777 777 break;
778 778 case S_EXEC:
779 779 fault_str = "exec";
780 780 break;
781 781 default:
782 782 fault_str = "";
783 783 break;
784 784 }
785 785 printf("user %s fault: addr=0x%lx errcode=0x%x\n",
786 786 fault_str, (uintptr_t)addr, errcode);
787 787 }
788 788
789 789 #if defined(OPTERON_ERRATUM_100) && defined(_LP64)
790 790 /*
791 791 * Workaround for AMD erratum 100
792 792 *
793 793 * A 32-bit process may receive a page fault on a non
794 794 * 32-bit address by mistake. The range of the faulting
795 795 * address will be
796 796 *
797 797 * 0xffffffff80000000 .. 0xffffffffffffffff or
798 798 * 0x0000000100000000 .. 0x000000017fffffff
799 799 *
800 800 * The fault is always due to an instruction fetch, however
801 801 * the value of r_pc should be correct (in 32 bit range),
802 802 * so we ignore the page fault on the bogus address.
803 803 */
804 804 if (p->p_model == DATAMODEL_ILP32 &&
805 805 (0xffffffff80000000 <= (uintptr_t)addr ||
806 806 (0x100000000 <= (uintptr_t)addr &&
807 807 (uintptr_t)addr <= 0x17fffffff))) {
808 808 if (!opteron_erratum_100)
809 809 panic("unexpected erratum #100");
810 810 if (rp->r_pc <= 0xffffffff)
811 811 goto out;
812 812 }
813 813 #endif /* OPTERON_ERRATUM_100 && _LP64 */
814 814
815 815 ASSERT(!(curthread->t_flag & T_WATCHPT));
816 816 watchpage = (pr_watch_active(p) && pr_is_watchpage(addr, rw));
817 817 #ifdef __i386
818 818 /*
819 819 * In 32-bit mode, the lcall (system call) instruction fetches
820 820 * one word from the stack, at the stack pointer, because of the
821 821 * way the call gate is constructed. This is a bogus
822 822 * read and should not be counted as a read watchpoint.
823 823 * We work around the problem here by testing to see if
824 824 * this situation applies and, if so, simply jumping to
825 825 * the code in locore.s that fields the system call trap.
826 826 * The registers on the stack are already set up properly
827 827 * due to the match between the call gate sequence and the
828 828 * trap gate sequence. We just have to adjust the pc.
829 829 */
830 830 if (watchpage && addr == (caddr_t)rp->r_sp &&
831 831 rw == S_READ && instr_is_lcall_syscall((caddr_t)rp->r_pc)) {
832 832 extern void watch_syscall(void);
833 833
834 834 rp->r_pc += LCALLSIZE;
835 835 watch_syscall(); /* never returns */
836 836 /* NOTREACHED */
837 837 }
838 838 #endif /* __i386 */
839 839 vaddr = addr;
840 840 if (!watchpage || (sz = instr_size(rp, &vaddr, rw)) <= 0)
841 841 fault_type = (errcode & PF_ERR_PROT)? F_PROT: F_INVAL;
842 842 else if ((watchcode = pr_is_watchpoint(&vaddr, &ta,
843 843 sz, NULL, rw)) != 0) {
844 844 if (ta) {
845 845 do_watch_step(vaddr, sz, rw,
846 846 watchcode, rp->r_pc);
847 847 fault_type = F_INVAL;
848 848 } else {
849 849 bzero(&siginfo, sizeof (siginfo));
850 850 siginfo.si_signo = SIGTRAP;
851 851 siginfo.si_code = watchcode;
852 852 siginfo.si_addr = vaddr;
853 853 siginfo.si_trapafter = 0;
854 854 siginfo.si_pc = (caddr_t)rp->r_pc;
855 855 fault = FLTWATCH;
856 856 break;
857 857 }
858 858 } else {
859 859 /* XXX pr_watch_emul() never succeeds (for now) */
860 860 if (rw != S_EXEC && pr_watch_emul(rp, vaddr, rw))
861 861 goto out;
862 862 do_watch_step(vaddr, sz, rw, 0, 0);
863 863 fault_type = F_INVAL;
864 864 }
865 865
866 866 /*
867 867 * Allow the brand to interpose on invalid memory accesses
868 868 * prior to running the native pagefault handler. If this
869 869 * brand hook returns zero, it was able to handle the fault
870 870 * completely. Otherwise, drive on and call pagefault().
871 871 */
872 872 if (PROC_IS_BRANDED(p) && BROP(p)->b_pagefault != NULL &&
873 873 BROP(p)->b_pagefault(p, lwp, addr, fault_type, rw) == 0) {
874 874 goto out;
875 875 }
876 876
877 877 res = pagefault(addr, fault_type, rw, 0);
878 878
879 879 /*
880 880 * If pagefault() succeeded, ok.
881 881 * Otherwise attempt to grow the stack.
882 882 */
883 883 if (res == 0 ||
884 884 (res == FC_NOMAP &&
885 885 addr < p->p_usrstack &&
886 886 grow(addr))) {
887 887 lwp->lwp_lastfault = FLTPAGE;
888 888 lwp->lwp_lastfaddr = addr;
889 889 if (prismember(&p->p_fltmask, FLTPAGE)) {
890 890 bzero(&siginfo, sizeof (siginfo));
891 891 siginfo.si_addr = addr;
892 892 (void) stop_on_fault(FLTPAGE, &siginfo);
893 893 }
894 894 goto out;
895 895 } else if (res == FC_PROT && addr < p->p_usrstack &&
896 896 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC))) {
897 897 report_stack_exec(p, addr);
898 898 }
899 899
900 900 #ifdef OPTERON_ERRATUM_91
901 901 /*
902 902 * Workaround for Opteron Erratum 91. Prefetches may generate a
903 903 * page fault (they're not supposed to do that!). If this
904 904 * occurs we simply return back to the instruction.
905 905 *
906 906 * We rely on copyin to properly fault in the page with r_pc.
907 907 */
908 908 if (opteron_erratum_91 &&
909 909 addr != (caddr_t)rp->r_pc &&
910 910 instr_is_prefetch((caddr_t)rp->r_pc)) {
911 911 #ifdef DEBUG
912 912 cmn_err(CE_WARN, "Opteron erratum 91 occurred: "
913 913 "prefetch at %p in pid %d generated a trap!",
914 914 (void *)rp->r_pc, p->p_pid);
915 915 #endif /* DEBUG */
916 916 goto out;
917 917 }
918 918 #endif /* OPTERON_ERRATUM_91 */
919 919
920 920 if (tudebug)
921 921 showregs(type, rp, addr);
922 922 /*
923 923 * In the case where both pagefault and grow fail,
924 924 * set the code to the value provided by pagefault.
925 925 * We map all errors returned from pagefault() to SIGSEGV.
926 926 */
927 927 bzero(&siginfo, sizeof (siginfo));
928 928 siginfo.si_addr = addr;
929 929 switch (FC_CODE(res)) {
930 930 case FC_HWERR:
931 931 case FC_NOSUPPORT:
932 932 siginfo.si_signo = SIGBUS;
933 933 siginfo.si_code = BUS_ADRERR;
934 934 fault = FLTACCESS;
935 935 break;
936 936 case FC_ALIGN:
937 937 siginfo.si_signo = SIGBUS;
938 938 siginfo.si_code = BUS_ADRALN;
939 939 fault = FLTACCESS;
940 940 break;
941 941 case FC_OBJERR:
942 942 if ((siginfo.si_errno = FC_ERRNO(res)) != EINTR) {
943 943 siginfo.si_signo = SIGBUS;
944 944 siginfo.si_code = BUS_OBJERR;
945 945 fault = FLTACCESS;
946 946 }
947 947 break;
948 948 default: /* FC_NOMAP or FC_PROT */
949 949 siginfo.si_signo = SIGSEGV;
950 950 siginfo.si_code =
951 951 (res == FC_NOMAP)? SEGV_MAPERR : SEGV_ACCERR;
952 952 fault = FLTBOUNDS;
953 953 break;
954 954 }
955 955 break;
956 956
957 957 case T_ILLINST + USER: /* invalid opcode fault */
958 958 /*
959 959 * If the syscall instruction is disabled due to LDT usage, a
960 960 * user program that attempts to execute it will trigger a #ud
961 961 * trap. Check for that case here. If this occurs on a CPU which
962 962 * doesn't even support syscall, the result of all of this will
963 963 * be to emulate that particular instruction.
964 964 */
965 965 if (p->p_ldt != NULL &&
966 966 ldt_rewrite_syscall(rp, p, X86FSET_ASYSC))
967 967 goto out;
968 968
969 969 #ifdef __amd64
970 970 /*
971 971 * Emulate the LAHF and SAHF instructions if needed.
972 972 * See the instr_is_lsahf function for details.
973 973 */
974 974 if (p->p_model == DATAMODEL_LP64 &&
975 975 instr_is_lsahf((caddr_t)rp->r_pc, &instr)) {
976 976 emulate_lsahf(rp, instr);
977 977 goto out;
978 978 }
979 979 #endif
980 980
981 981 /*FALLTHROUGH*/
982 982
983 983 if (tudebug)
984 984 showregs(type, rp, (caddr_t)0);
985 985 siginfo.si_signo = SIGILL;
986 986 siginfo.si_code = ILL_ILLOPC;
987 987 siginfo.si_addr = (caddr_t)rp->r_pc;
988 988 fault = FLTILL;
989 989 break;
990 990
991 991 case T_ZERODIV + USER: /* integer divide by zero */
992 992 if (tudebug && tudebugfpe)
993 993 showregs(type, rp, (caddr_t)0);
994 994 siginfo.si_signo = SIGFPE;
995 995 siginfo.si_code = FPE_INTDIV;
996 996 siginfo.si_addr = (caddr_t)rp->r_pc;
997 997 fault = FLTIZDIV;
998 998 break;
999 999
1000 1000 case T_OVFLW + USER: /* integer overflow */
1001 1001 if (tudebug && tudebugfpe)
1002 1002 showregs(type, rp, (caddr_t)0);
1003 1003 siginfo.si_signo = SIGFPE;
1004 1004 siginfo.si_code = FPE_INTOVF;
1005 1005 siginfo.si_addr = (caddr_t)rp->r_pc;
1006 1006 fault = FLTIOVF;
1007 1007 break;
1008 1008
1009 1009 case T_NOEXTFLT + USER: /* math coprocessor not available */
1010 1010 if (tudebug && tudebugfpe)
1011 1011 showregs(type, rp, addr);
1012 1012 if (fpnoextflt(rp)) {
1013 1013 siginfo.si_signo = SIGILL;
1014 1014 siginfo.si_code = ILL_ILLOPC;
1015 1015 siginfo.si_addr = (caddr_t)rp->r_pc;
1016 1016 fault = FLTILL;
1017 1017 }
1018 1018 break;
1019 1019
1020 1020 case T_EXTOVRFLT: /* extension overrun fault */
1021 1021 /* check if we took a kernel trap on behalf of user */
1022 1022 {
1023 1023 extern void ndptrap_frstor(void);
1024 1024 if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
1025 1025 sti(); /* T_EXTOVRFLT comes in via cmninttrap */
1026 1026 (void) die(type, rp, addr, cpuid);
1027 1027 }
1028 1028 type |= USER;
1029 1029 }
1030 1030 /*FALLTHROUGH*/
1031 1031 case T_EXTOVRFLT + USER: /* extension overrun fault */
1032 1032 if (tudebug && tudebugfpe)
1033 1033 showregs(type, rp, addr);
1034 1034 if (fpextovrflt(rp)) {
1035 1035 siginfo.si_signo = SIGSEGV;
1036 1036 siginfo.si_code = SEGV_MAPERR;
1037 1037 siginfo.si_addr = (caddr_t)rp->r_pc;
1038 1038 fault = FLTBOUNDS;
1039 1039 }
1040 1040 break;
1041 1041
1042 1042 case T_EXTERRFLT: /* x87 floating point exception pending */
1043 1043 /* check if we took a kernel trap on behalf of user */
1044 1044 {
1045 1045 extern void ndptrap_frstor(void);
1046 1046 if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
1047 1047 sti(); /* T_EXTERRFLT comes in via cmninttrap */
1048 1048 (void) die(type, rp, addr, cpuid);
1049 1049 }
1050 1050 type |= USER;
1051 1051 }
1052 1052 /*FALLTHROUGH*/
1053 1053
1054 1054 case T_EXTERRFLT + USER: /* x87 floating point exception pending */
1055 1055 if (tudebug && tudebugfpe)
1056 1056 showregs(type, rp, addr);
1057 1057 if (sicode = fpexterrflt(rp)) {
1058 1058 siginfo.si_signo = SIGFPE;
1059 1059 siginfo.si_code = sicode;
1060 1060 siginfo.si_addr = (caddr_t)rp->r_pc;
1061 1061 fault = FLTFPE;
1062 1062 }
1063 1063 break;
1064 1064
1065 1065 case T_SIMDFPE + USER: /* SSE and SSE2 exceptions */
1066 1066 if (tudebug && tudebugsse)
1067 1067 showregs(type, rp, addr);
1068 1068 if (!is_x86_feature(x86_featureset, X86FSET_SSE) &&
1069 1069 !is_x86_feature(x86_featureset, X86FSET_SSE2)) {
1070 1070 /*
1071 1071 * There are rumours that some user instructions
1072 1072 * on older CPUs can cause this trap to occur; in
1073 1073 * which case send a SIGILL instead of a SIGFPE.
1074 1074 */
1075 1075 siginfo.si_signo = SIGILL;
1076 1076 siginfo.si_code = ILL_ILLTRP;
1077 1077 siginfo.si_addr = (caddr_t)rp->r_pc;
1078 1078 siginfo.si_trapno = type & ~USER;
1079 1079 fault = FLTILL;
1080 1080 } else if ((sicode = fpsimderrflt(rp)) != 0) {
1081 1081 siginfo.si_signo = SIGFPE;
1082 1082 siginfo.si_code = sicode;
1083 1083 siginfo.si_addr = (caddr_t)rp->r_pc;
1084 1084 fault = FLTFPE;
1085 1085 }
1086 1086
1087 1087 sti(); /* The SIMD exception comes in via cmninttrap */
1088 1088 break;
1089 1089
1090 1090 case T_BPTFLT: /* breakpoint trap */
1091 1091 /*
1092 1092 * Kernel breakpoint traps should only happen when kmdb is
1093 1093 * active, and even then, it'll have interposed on the IDT, so
1094 1094 * control won't get here. If it does, we've hit a breakpoint
1095 1095 * without the debugger, which is very strange, and very
1096 1096 * fatal.
1097 1097 */
1098 1098 if (tudebug && tudebugbpt)
1099 1099 showregs(type, rp, (caddr_t)0);
1100 1100
1101 1101 (void) die(type, rp, addr, cpuid);
1102 1102 break;
1103 1103
1104 1104 case T_SGLSTP: /* single step/hw breakpoint exception */
1105 1105
1106 1106 /* Now evaluate how we got here */
1107 1107 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
1108 1108 /*
1109 1109 * i386 single-steps even through lcalls which
1110 1110 * change the privilege level. So we take a trap at
1111 1111 * the first instruction in privileged mode.
1112 1112 *
1113 1113 * Set a flag to indicate that upon completion of
1114 1114 * the system call, deal with the single-step trap.
1115 1115 *
1116 1116 * The same thing happens for sysenter, too.
1117 1117 */
1118 1118 singlestep_twiddle = 0;
1119 1119 if (rp->r_pc == (uintptr_t)sys_sysenter ||
1120 1120 rp->r_pc == (uintptr_t)brand_sys_sysenter) {
1121 1121 singlestep_twiddle = 1;
1122 1122 #if defined(__amd64)
1123 1123 /*
1124 1124 * Since we are already on the kernel's
1125 1125 * %gs, on 64-bit systems the sysenter case
1126 1126 * needs to adjust the pc to avoid
1127 1127 * executing the swapgs instruction at the
1128 1128 * top of the handler.
1129 1129 */
1130 1130 if (rp->r_pc == (uintptr_t)sys_sysenter)
1131 1131 rp->r_pc = (uintptr_t)
1132 1132 _sys_sysenter_post_swapgs;
1133 1133 else
1134 1134 rp->r_pc = (uintptr_t)
1135 1135 _brand_sys_sysenter_post_swapgs;
1136 1136 #endif
1137 1137 }
1138 1138 #if defined(__i386)
1139 1139 else if (rp->r_pc == (uintptr_t)sys_call ||
1140 1140 rp->r_pc == (uintptr_t)brand_sys_call) {
1141 1141 singlestep_twiddle = 1;
1142 1142 }
1143 1143 #endif
1144 1144 else {
1145 1145 /* not on sysenter/syscall; uregs available */
1146 1146 if (tudebug && tudebugbpt)
1147 1147 showregs(type, rp, (caddr_t)0);
1148 1148 }
1149 1149 if (singlestep_twiddle) {
1150 1150 rp->r_ps &= ~PS_T; /* turn off trace */
1151 1151 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
1152 1152 ct->t_post_sys = 1;
1153 1153 aston(curthread);
1154 1154 goto cleanup;
1155 1155 }
1156 1156 }
1157 1157 /* XXX - needs review on debugger interface? */
1158 1158 if (boothowto & RB_DEBUG)
1159 1159 debug_enter((char *)NULL);
1160 1160 else
1161 1161 (void) die(type, rp, addr, cpuid);
1162 1162 break;
1163 1163
1164 1164 case T_NMIFLT: /* NMI interrupt */
1165 1165 printf("Unexpected NMI in system mode\n");
1166 1166 goto cleanup;
1167 1167
1168 1168 case T_NMIFLT + USER: /* NMI interrupt */
1169 1169 printf("Unexpected NMI in user mode\n");
1170 1170 break;
1171 1171
1172 1172 case T_GPFLT: /* general protection violation */
1173 1173 /*
1174 1174 * Any #GP that occurs during an on_trap .. no_trap bracket
1175 1175 * with OT_DATA_ACCESS or OT_SEGMENT_ACCESS protection,
1176 1176 * or in a on_fault .. no_fault bracket, is forgiven
1177 1177 * and we trampoline. This protection is given regardless
1178 1178 * of whether we are 32/64 bit etc - if a distinction is
1179 1179 * required then define new on_trap protection types.
1180 1180 *
1181 1181 * On amd64, we can get a #gp from referencing addresses
1182 1182 * in the virtual address hole e.g. from a copyin or in
1183 1183 * update_sregs while updating user segment registers.
1184 1184 *
1185 1185 * On the 32-bit hypervisor we could also generate one in
1186 1186 * mfn_to_pfn by reaching around or into where the hypervisor
1187 1187 * lives which is protected by segmentation.
1188 1188 */
1189 1189
1190 1190 /*
1191 1191 * If we're under on_trap() protection (see <sys/ontrap.h>),
1192 1192 * set ot_trap and trampoline back to the on_trap() call site
1193 1193 * for OT_DATA_ACCESS or OT_SEGMENT_ACCESS.
1194 1194 */
1195 1195 if (ct->t_ontrap != NULL) {
1196 1196 int ttype = ct->t_ontrap->ot_prot &
1197 1197 (OT_DATA_ACCESS | OT_SEGMENT_ACCESS);
1198 1198
1199 1199 if (ttype != 0) {
1200 1200 ct->t_ontrap->ot_trap |= ttype;
1201 1201 if (tudebug)
1202 1202 showregs(type, rp, (caddr_t)0);
1203 1203 rp->r_pc = ct->t_ontrap->ot_trampoline;
1204 1204 goto cleanup;
1205 1205 }
1206 1206 }
1207 1207
1208 1208 /*
1209 1209 * If we're under lofault protection (copyin etc.),
1210 1210 * longjmp back to lofault with an EFAULT.
1211 1211 */
1212 1212 if (ct->t_lofault) {
1213 1213 /*
1214 1214 * Fault is not resolvable, so just return to lofault
1215 1215 */
1216 1216 if (lodebug) {
1217 1217 showregs(type, rp, addr);
1218 1218 traceregs(rp);
1219 1219 }
1220 1220 rp->r_r0 = EFAULT;
1221 1221 rp->r_pc = ct->t_lofault;
1222 1222 goto cleanup;
1223 1223 }
1224 1224
1225 1225 /*
1226 1226 * We fall through to the next case, which repeats
1227 1227 * the OT_SEGMENT_ACCESS check which we've already
1228 1228 * done, so we'll always fall through to the
1229 1229 * T_STKFLT case.
1230 1230 */
1231 1231 /*FALLTHROUGH*/
1232 1232 case T_SEGFLT: /* segment not present fault */
1233 1233 /*
1234 1234 * One example of this is #NP in update_sregs while
1235 1235 * attempting to update a user segment register
1236 1236 * that points to a descriptor that is marked not
1237 1237 * present.
1238 1238 */
1239 1239 if (ct->t_ontrap != NULL &&
1240 1240 ct->t_ontrap->ot_prot & OT_SEGMENT_ACCESS) {
1241 1241 ct->t_ontrap->ot_trap |= OT_SEGMENT_ACCESS;
1242 1242 if (tudebug)
1243 1243 showregs(type, rp, (caddr_t)0);
1244 1244 rp->r_pc = ct->t_ontrap->ot_trampoline;
1245 1245 goto cleanup;
1246 1246 }
1247 1247 /*FALLTHROUGH*/
1248 1248 case T_STKFLT: /* stack fault */
1249 1249 case T_TSSFLT: /* invalid TSS fault */
1250 1250 if (tudebug)
1251 1251 showregs(type, rp, (caddr_t)0);
1252 1252 if (kern_gpfault(rp))
1253 1253 (void) die(type, rp, addr, cpuid);
1254 1254 goto cleanup;
1255 1255
1256 1256 /*
1257 1257 * ONLY 32-bit PROCESSES can USE a PRIVATE LDT! 64-bit apps
1258 1258 * should have no need for them, so we put a stop to it here.
1259 1259 *
1260 1260 * So: not-present fault is ONLY valid for 32-bit processes with
1261 1261 * a private LDT trying to do a system call. Emulate it.
1262 1262 *
1263 1263 * #gp fault is ONLY valid for 32-bit processes also, which DO NOT
1264 1264 * have a private LDT, and are trying to do a system call. Emulate it.
1265 1265 */
1266 1266
1267 1267 case T_SEGFLT + USER: /* segment not present fault */
1268 1268 case T_GPFLT + USER: /* general protection violation */
1269 1269 #ifdef _SYSCALL32_IMPL
1270 1270 if (p->p_model != DATAMODEL_NATIVE) {
1271 1271 #endif /* _SYSCALL32_IMPL */
1272 1272 if (instr_is_lcall_syscall((caddr_t)rp->r_pc)) {
1273 1273 if (type == T_SEGFLT + USER)
1274 1274 ASSERT(p->p_ldt != NULL);
1275 1275
1276 1276 if ((p->p_ldt == NULL && type == T_GPFLT + USER) ||
1277 1277 type == T_SEGFLT + USER) {
1278 1278
1279 1279 /*
1280 1280 * The user attempted a system call via the obsolete
1281 1281 * call gate mechanism. Because the process doesn't have
1282 1282 * an LDT (i.e. the ldtr contains 0), a #gp results.
1283 1283 * Emulate the syscall here, just as we do above for a
1284 1284 * #np trap.
1285 1285 */
1286 1286
1287 1287 /*
1288 1288 * Since this is a not-present trap, rp->r_pc points to
1289 1289 * the trapping lcall instruction. We need to bump it
1290 1290 * to the next insn so the app can continue on.
1291 1291 */
1292 1292 rp->r_pc += LCALLSIZE;
1293 1293 lwp->lwp_regs = rp;
1294 1294
1295 1295 /*
1296 1296 * Normally the microstate of the LWP is forced back to
1297 1297 * LMS_USER by the syscall handlers. Emulate that
1298 1298 * behavior here.
1299 1299 */
1300 1300 mstate = LMS_USER;
1301 1301
1302 1302 dosyscall();
1303 1303 goto out;
1304 1304 }
1305 1305 }
1306 1306 #ifdef _SYSCALL32_IMPL
1307 1307 }
1308 1308 #endif /* _SYSCALL32_IMPL */
1309 1309 /*
1310 1310 * If the current process is using a private LDT and the
1311 1311 * trapping instruction is sysenter, the sysenter instruction
1312 1312 * has been disabled on the CPU because it destroys segment
1313 1313 * registers. If this is the case, rewrite the instruction to
1314 1314 * be a safe system call and retry it. If this occurs on a CPU
1315 1315 * which doesn't even support sysenter, the result of all of
1316 1316 * this will be to emulate that particular instruction.
1317 1317 */
1318 1318 if (p->p_ldt != NULL &&
1319 1319 ldt_rewrite_syscall(rp, p, X86FSET_SEP))
1320 1320 goto out;
1321 1321
1322 1322 /*FALLTHROUGH*/
1323 1323
1324 1324 case T_BOUNDFLT + USER: /* bound fault */
1325 1325 case T_STKFLT + USER: /* stack fault */
1326 1326 case T_TSSFLT + USER: /* invalid TSS fault */
1327 1327 if (tudebug)
1328 1328 showregs(type, rp, (caddr_t)0);
1329 1329 siginfo.si_signo = SIGSEGV;
1330 1330 siginfo.si_code = SEGV_MAPERR;
1331 1331 siginfo.si_addr = (caddr_t)rp->r_pc;
1332 1332 fault = FLTBOUNDS;
1333 1333 break;
1334 1334
1335 1335 case T_ALIGNMENT + USER: /* user alignment error (486) */
1336 1336 if (tudebug)
1337 1337 showregs(type, rp, (caddr_t)0);
1338 1338 bzero(&siginfo, sizeof (siginfo));
1339 1339 siginfo.si_signo = SIGBUS;
1340 1340 siginfo.si_code = BUS_ADRALN;
1341 1341 siginfo.si_addr = (caddr_t)rp->r_pc;
1342 1342 fault = FLTACCESS;
1343 1343 break;
1344 1344
1345 1345 case T_SGLSTP + USER: /* single step/hw breakpoint exception */
1346 1346 if (tudebug && tudebugbpt)
1347 1347 showregs(type, rp, (caddr_t)0);
1348 1348
1349 1349 /* Was it single-stepping? */
1350 1350 if (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP) {
1351 1351 pcb_t *pcb = &lwp->lwp_pcb;
1352 1352
1353 1353 rp->r_ps &= ~PS_T;
1354 1354 /*
1355 1355 * If both NORMAL_STEP and WATCH_STEP are in effect,
1356 1356 * give precedence to WATCH_STEP. If neither is set,
1357 1357 * user must have set the PS_T bit in %efl; treat this
1358 1358 * as NORMAL_STEP.
1359 1359 */
1360 1360 if ((fault = undo_watch_step(&siginfo)) == 0 &&
1361 1361 ((pcb->pcb_flags & NORMAL_STEP) ||
1362 1362 !(pcb->pcb_flags & WATCH_STEP))) {
1363 1363 siginfo.si_signo = SIGTRAP;
1364 1364 siginfo.si_code = TRAP_TRACE;
1365 1365 siginfo.si_addr = (caddr_t)rp->r_pc;
1366 1366 fault = FLTTRACE;
1367 1367 }
1368 1368 pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
1369 1369 }
1370 1370 break;
1371 1371
1372 1372 case T_BPTFLT + USER: /* breakpoint trap */
1373 1373 if (tudebug && tudebugbpt)
1374 1374 showregs(type, rp, (caddr_t)0);
1375 1375 /*
1376 1376 * int 3 (the breakpoint instruction) leaves the pc referring
1377 1377 * to the address one byte after the breakpointed address.
1378 1378 * If the P_PR_BPTADJ flag has been set via /proc, We adjust
1379 1379 * it back so it refers to the breakpointed address.
1380 1380 */
1381 1381 if (p->p_proc_flag & P_PR_BPTADJ)
1382 1382 rp->r_pc--;
1383 1383 siginfo.si_signo = SIGTRAP;
1384 1384 siginfo.si_code = TRAP_BRKPT;
1385 1385 siginfo.si_addr = (caddr_t)rp->r_pc;
1386 1386 fault = FLTBPT;
1387 1387 break;
1388 1388
1389 1389 case T_AST:
1390 1390 /*
1391 1391 * This occurs only after the cs register has been made to
1392 1392 * look like a kernel selector, either through debugging or
1393 1393 * possibly by functions like setcontext(). The thread is
1394 1394 * about to cause a general protection fault at common_iret()
1395 1395 * in locore. We let that happen immediately instead of
1396 1396 * doing the T_AST processing.
1397 1397 */
1398 1398 goto cleanup;
1399 1399
1400 1400 case T_AST + USER: /* profiling, resched, h/w error pseudo trap */
1401 1401 if (lwp->lwp_pcb.pcb_flags & ASYNC_HWERR) {
1402 1402 proc_t *p = ttoproc(curthread);
1403 1403 extern void print_msg_hwerr(ctid_t ct_id, proc_t *p);
1404 1404
1405 1405 lwp->lwp_pcb.pcb_flags &= ~ASYNC_HWERR;
1406 1406 print_msg_hwerr(p->p_ct_process->conp_contract.ct_id,
1407 1407 p);
1408 1408 contract_process_hwerr(p->p_ct_process, p);
1409 1409 siginfo.si_signo = SIGKILL;
1410 1410 siginfo.si_code = SI_NOINFO;
1411 1411 } else if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW) {
1412 1412 lwp->lwp_pcb.pcb_flags &= ~CPC_OVERFLOW;
1413 1413 if (kcpc_overflow_ast()) {
1414 1414 /*
1415 1415 * Signal performance counter overflow
1416 1416 */
1417 1417 if (tudebug)
1418 1418 showregs(type, rp, (caddr_t)0);
1419 1419 bzero(&siginfo, sizeof (siginfo));
1420 1420 siginfo.si_signo = SIGEMT;
1421 1421 siginfo.si_code = EMT_CPCOVF;
1422 1422 siginfo.si_addr = (caddr_t)rp->r_pc;
1423 1423 fault = FLTCPCOVF;
1424 1424 }
1425 1425 }
1426 1426
1427 1427 break;
1428 1428 }
1429 1429
1430 1430 /*
1431 1431 * We can't get here from a system trap
1432 1432 */
1433 1433 ASSERT(type & USER);
1434 1434
1435 1435 if (fault) {
1436 1436 /* We took a fault so abort single step. */
1437 1437 lwp->lwp_pcb.pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
1438 1438 /*
1439 1439 * Remember the fault and fault adddress
1440 1440 * for real-time (SIGPROF) profiling.
1441 1441 */
1442 1442 lwp->lwp_lastfault = fault;
1443 1443 lwp->lwp_lastfaddr = siginfo.si_addr;
1444 1444
1445 1445 DTRACE_PROC2(fault, int, fault, ksiginfo_t *, &siginfo);
1446 1446
1447 1447 /*
1448 1448 * If a debugger has declared this fault to be an
1449 1449 * event of interest, stop the lwp. Otherwise just
1450 1450 * deliver the associated signal.
1451 1451 */
1452 1452 if (siginfo.si_signo != SIGKILL &&
1453 1453 prismember(&p->p_fltmask, fault) &&
1454 1454 stop_on_fault(fault, &siginfo) == 0)
1455 1455 siginfo.si_signo = 0;
1456 1456 }
1457 1457
1458 1458 if (siginfo.si_signo)
1459 1459 trapsig(&siginfo, (fault != FLTFPE && fault != FLTCPCOVF));
1460 1460
1461 1461 if (lwp->lwp_oweupc)
1462 1462 profil_tick(rp->r_pc);
1463 1463
1464 1464 if (ct->t_astflag | ct->t_sig_check) {
1465 1465 /*
1466 1466 * Turn off the AST flag before checking all the conditions that
1467 1467 * may have caused an AST. This flag is on whenever a signal or
1468 1468 * unusual condition should be handled after the next trap or
1469 1469 * syscall.
1470 1470 */
1471 1471 astoff(ct);
1472 1472 /*
1473 1473 * If a single-step trap occurred on a syscall (see above)
1474 1474 * recognize it now. Do this before checking for signals
1475 1475 * because deferred_singlestep_trap() may generate a SIGTRAP to
1476 1476 * the LWP or may otherwise mark the LWP to call issig(FORREAL).
1477 1477 */
1478 1478 if (lwp->lwp_pcb.pcb_flags & DEBUG_PENDING)
1479 1479 deferred_singlestep_trap((caddr_t)rp->r_pc);
1480 1480
1481 1481 ct->t_sig_check = 0;
1482 1482
1483 1483 /*
1484 1484 * As in other code paths that check against TP_CHANGEBIND,
1485 1485 * we perform the check first without p_lock held -- only
1486 1486 * acquiring p_lock in the unlikely event that it is indeed
1487 1487 * set. This is safe because we are doing this after the
1488 1488 * astoff(); if we are racing another thread setting
1489 1489 * TP_CHANGEBIND on us, we will pick it up on a subsequent
1490 1490 * lap through.
1491 1491 */
1492 1492 if (curthread->t_proc_flag & TP_CHANGEBIND) {
1493 1493 mutex_enter(&p->p_lock);
1494 1494 if (curthread->t_proc_flag & TP_CHANGEBIND) {
1495 1495 timer_lwpbind();
1496 1496 curthread->t_proc_flag &= ~TP_CHANGEBIND;
1497 1497 }
1498 1498 mutex_exit(&p->p_lock);
1499 1499 }
1500 1500
1501 1501 /*
1502 1502 * for kaio requests that are on the per-process poll queue,
1503 1503 * aiop->aio_pollq, they're AIO_POLL bit is set, the kernel
1504 1504 * should copyout their result_t to user memory. by copying
1505 1505 * out the result_t, the user can poll on memory waiting
1506 1506 * for the kaio request to complete.
1507 1507 */
1508 1508 if (p->p_aio)
1509 1509 aio_cleanup(0);
1510 1510 /*
1511 1511 * If this LWP was asked to hold, call holdlwp(), which will
1512 1512 * stop. holdlwps() sets this up and calls pokelwps() which
1513 1513 * sets the AST flag.
1514 1514 *
1515 1515 * Also check TP_EXITLWP, since this is used by fresh new LWPs
1516 1516 * through lwp_rtt(). That flag is set if the lwp_create(2)
1517 1517 * syscall failed after creating the LWP.
1518 1518 */
1519 1519 if (ISHOLD(p))
1520 1520 holdlwp();
1521 1521
1522 1522 /*
1523 1523 * All code that sets signals and makes ISSIG evaluate true must
1524 1524 * set t_astflag afterwards.
1525 1525 */
1526 1526 if (ISSIG_PENDING(ct, lwp, p)) {
1527 1527 if (issig(FORREAL))
1528 1528 psig();
1529 1529 ct->t_sig_check = 1;
1530 1530 }
1531 1531
1532 1532 if (ct->t_rprof != NULL) {
1533 1533 realsigprof(0, 0, 0);
1534 1534 ct->t_sig_check = 1;
1535 1535 }
1536 1536
1537 1537 /*
1538 1538 * /proc can't enable/disable the trace bit itself
1539 1539 * because that could race with the call gate used by
1540 1540 * system calls via "lcall". If that happened, an
1541 1541 * invalid EFLAGS would result. prstep()/prnostep()
1542 1542 * therefore schedule an AST for the purpose.
1543 1543 */
1544 1544 if (lwp->lwp_pcb.pcb_flags & REQUEST_STEP) {
1545 1545 lwp->lwp_pcb.pcb_flags &= ~REQUEST_STEP;
1546 1546 rp->r_ps |= PS_T;
1547 1547 }
1548 1548 if (lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP) {
1549 1549 lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP;
1550 1550 rp->r_ps &= ~PS_T;
1551 1551 }
1552 1552 }
1553 1553
1554 1554 out: /* We can't get here from a system trap */
1555 1555 ASSERT(type & USER);
1556 1556
1557 1557 if (ISHOLD(p))
1558 1558 holdlwp();
1559 1559
1560 1560 /*
1561 1561 * Set state to LWP_USER here so preempt won't give us a kernel
1562 1562 * priority if it occurs after this point. Call CL_TRAPRET() to
1563 1563 * restore the user-level priority.
1564 1564 *
1565 1565 * It is important that no locks (other than spinlocks) be entered
1566 1566 * after this point before returning to user mode (unless lwp_state
1567 1567 * is set back to LWP_SYS).
1568 1568 */
1569 1569 lwp->lwp_state = LWP_USER;
1570 1570
1571 1571 if (ct->t_trapret) {
1572 1572 ct->t_trapret = 0;
1573 1573 thread_lock(ct);
1574 1574 CL_TRAPRET(ct);
1575 1575 thread_unlock(ct);
1576 1576 }
1577 1577 if (CPU->cpu_runrun || curthread->t_schedflag & TS_ANYWAITQ)
1578 1578 preempt();
1579 1579 prunstop();
1580 1580 (void) new_mstate(ct, mstate);
1581 1581
1582 1582 /* Kernel probe */
1583 1583 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
1584 1584 tnf_microstate, state, LMS_USER);
1585 1585
1586 1586 return;
1587 1587
1588 1588 cleanup: /* system traps end up here */
1589 1589 ASSERT(!(type & USER));
1590 1590 }
1591 1591
1592 1592 /*
1593 1593 * Patch non-zero to disable preemption of threads in the kernel.
1594 1594 */
1595 1595 int IGNORE_KERNEL_PREEMPTION = 0; /* XXX - delete this someday */
1596 1596
1597 1597 struct kpreempt_cnts { /* kernel preemption statistics */
1598 1598 int kpc_idle; /* executing idle thread */
1599 1599 int kpc_intr; /* executing interrupt thread */
1600 1600 int kpc_clock; /* executing clock thread */
1601 1601 int kpc_blocked; /* thread has blocked preemption (t_preempt) */
1602 1602 int kpc_notonproc; /* thread is surrendering processor */
1603 1603 int kpc_inswtch; /* thread has ratified scheduling decision */
1604 1604 int kpc_prilevel; /* processor interrupt level is too high */
1605 1605 int kpc_apreempt; /* asynchronous preemption */
1606 1606 int kpc_spreempt; /* synchronous preemption */
1607 1607 } kpreempt_cnts;
1608 1608
1609 1609 /*
1610 1610 * kernel preemption: forced rescheduling, preempt the running kernel thread.
1611 1611 * the argument is old PIL for an interrupt,
1612 1612 * or the distingished value KPREEMPT_SYNC.
1613 1613 */
1614 1614 void
1615 1615 kpreempt(int asyncspl)
1616 1616 {
1617 1617 kthread_t *ct = curthread;
1618 1618
1619 1619 if (IGNORE_KERNEL_PREEMPTION) {
1620 1620 aston(CPU->cpu_dispthread);
1621 1621 return;
1622 1622 }
1623 1623
1624 1624 /*
1625 1625 * Check that conditions are right for kernel preemption
1626 1626 */
1627 1627 do {
1628 1628 if (ct->t_preempt) {
1629 1629 /*
1630 1630 * either a privileged thread (idle, panic, interrupt)
1631 1631 * or will check when t_preempt is lowered
1632 1632 * We need to specifically handle the case where
1633 1633 * the thread is in the middle of swtch (resume has
1634 1634 * been called) and has its t_preempt set
1635 1635 * [idle thread and a thread which is in kpreempt
1636 1636 * already] and then a high priority thread is
1637 1637 * available in the local dispatch queue.
1638 1638 * In this case the resumed thread needs to take a
1639 1639 * trap so that it can call kpreempt. We achieve
1640 1640 * this by using siron().
1641 1641 * How do we detect this condition:
1642 1642 * idle thread is running and is in the midst of
1643 1643 * resume: curthread->t_pri == -1 && CPU->dispthread
1644 1644 * != CPU->thread
1645 1645 * Need to ensure that this happens only at high pil
1646 1646 * resume is called at high pil
1647 1647 * Only in resume_from_idle is the pil changed.
1648 1648 */
1649 1649 if (ct->t_pri < 0) {
1650 1650 kpreempt_cnts.kpc_idle++;
1651 1651 if (CPU->cpu_dispthread != CPU->cpu_thread)
1652 1652 siron();
1653 1653 } else if (ct->t_flag & T_INTR_THREAD) {
1654 1654 kpreempt_cnts.kpc_intr++;
1655 1655 if (ct->t_pil == CLOCK_LEVEL)
1656 1656 kpreempt_cnts.kpc_clock++;
1657 1657 } else {
1658 1658 kpreempt_cnts.kpc_blocked++;
1659 1659 if (CPU->cpu_dispthread != CPU->cpu_thread)
1660 1660 siron();
1661 1661 }
1662 1662 aston(CPU->cpu_dispthread);
1663 1663 return;
1664 1664 }
1665 1665 if (ct->t_state != TS_ONPROC ||
1666 1666 ct->t_disp_queue != CPU->cpu_disp) {
1667 1667 /* this thread will be calling swtch() shortly */
1668 1668 kpreempt_cnts.kpc_notonproc++;
1669 1669 if (CPU->cpu_thread != CPU->cpu_dispthread) {
1670 1670 /* already in swtch(), force another */
1671 1671 kpreempt_cnts.kpc_inswtch++;
1672 1672 siron();
1673 1673 }
1674 1674 return;
1675 1675 }
1676 1676 if (getpil() >= DISP_LEVEL) {
1677 1677 /*
1678 1678 * We can't preempt this thread if it is at
1679 1679 * a PIL >= DISP_LEVEL since it may be holding
1680 1680 * a spin lock (like sched_lock).
1681 1681 */
1682 1682 siron(); /* check back later */
1683 1683 kpreempt_cnts.kpc_prilevel++;
1684 1684 return;
1685 1685 }
1686 1686 if (!interrupts_enabled()) {
1687 1687 /*
1688 1688 * Can't preempt while running with ints disabled
1689 1689 */
1690 1690 kpreempt_cnts.kpc_prilevel++;
1691 1691 return;
1692 1692 }
1693 1693 if (asyncspl != KPREEMPT_SYNC)
1694 1694 kpreempt_cnts.kpc_apreempt++;
1695 1695 else
1696 1696 kpreempt_cnts.kpc_spreempt++;
1697 1697
1698 1698 ct->t_preempt++;
1699 1699 preempt();
1700 1700 ct->t_preempt--;
1701 1701 } while (CPU->cpu_kprunrun);
1702 1702 }
1703 1703
1704 1704 /*
1705 1705 * Print out debugging info.
1706 1706 */
1707 1707 static void
1708 1708 showregs(uint_t type, struct regs *rp, caddr_t addr)
1709 1709 {
1710 1710 int s;
1711 1711
1712 1712 s = spl7();
1713 1713 type &= ~USER;
1714 1714 if (PTOU(curproc)->u_comm[0])
1715 1715 printf("%s: ", PTOU(curproc)->u_comm);
1716 1716 if (type < TRAP_TYPES)
1717 1717 printf("#%s %s\n", trap_type_mnemonic[type], trap_type[type]);
1718 1718 else
1719 1719 switch (type) {
1720 1720 case T_SYSCALL:
1721 1721 printf("Syscall Trap:\n");
1722 1722 break;
1723 1723 case T_AST:
1724 1724 printf("AST\n");
1725 1725 break;
1726 1726 default:
1727 1727 printf("Bad Trap = %d\n", type);
1728 1728 break;
1729 1729 }
1730 1730 if (type == T_PGFLT) {
1731 1731 printf("Bad %s fault at addr=0x%lx\n",
1732 1732 USERMODE(rp->r_cs) ? "user": "kernel", (uintptr_t)addr);
1733 1733 } else if (addr) {
1734 1734 printf("addr=0x%lx\n", (uintptr_t)addr);
1735 1735 }
1736 1736
1737 1737 printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n",
1738 1738 (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ?
1739 1739 ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps);
1740 1740
1741 1741 #if defined(__lint)
1742 1742 /*
1743 1743 * this clause can be deleted when lint bug 4870403 is fixed
1744 1744 * (lint thinks that bit 32 is illegal in a %b format string)
1745 1745 */
1746 1746 printf("cr0: %x cr4: %b\n",
1747 1747 (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4);
1748 1748 #else
1749 1749 printf("cr0: %b cr4: %b\n",
1750 1750 (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
1751 1751 #endif /* __lint */
1752 1752
1753 1753 printf("cr2: %lx", getcr2());
1754 1754 #if !defined(__xpv)
1755 1755 printf("cr3: %lx", getcr3());
1756 1756 #if defined(__amd64)
1757 1757 printf("cr8: %lx\n", getcr8());
1758 1758 #endif
1759 1759 #endif
1760 1760 printf("\n");
1761 1761
1762 1762 dumpregs(rp);
1763 1763 splx(s);
1764 1764 }
1765 1765
1766 1766 static void
1767 1767 dumpregs(struct regs *rp)
1768 1768 {
1769 1769 #if defined(__amd64)
1770 1770 const char fmt[] = "\t%3s: %16lx %3s: %16lx %3s: %16lx\n";
1771 1771
1772 1772 printf(fmt, "rdi", rp->r_rdi, "rsi", rp->r_rsi, "rdx", rp->r_rdx);
1773 1773 printf(fmt, "rcx", rp->r_rcx, " r8", rp->r_r8, " r9", rp->r_r9);
1774 1774 printf(fmt, "rax", rp->r_rax, "rbx", rp->r_rbx, "rbp", rp->r_rbp);
1775 1775 printf(fmt, "r10", rp->r_r10, "r11", rp->r_r11, "r12", rp->r_r12);
1776 1776 printf(fmt, "r13", rp->r_r13, "r14", rp->r_r14, "r15", rp->r_r15);
1777 1777
1778 1778 printf(fmt, "fsb", rdmsr(MSR_AMD_FSBASE), "gsb", rdmsr(MSR_AMD_GSBASE),
1779 1779 " ds", rp->r_ds);
1780 1780 printf(fmt, " es", rp->r_es, " fs", rp->r_fs, " gs", rp->r_gs);
1781 1781
1782 1782 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err, "rip", rp->r_rip);
1783 1783 printf(fmt, " cs", rp->r_cs, "rfl", rp->r_rfl, "rsp", rp->r_rsp);
1784 1784
1785 1785 printf("\t%3s: %16lx\n", " ss", rp->r_ss);
1786 1786
1787 1787 #elif defined(__i386)
1788 1788 const char fmt[] = "\t%3s: %8lx %3s: %8lx %3s: %8lx %3s: %8lx\n";
1789 1789
1790 1790 printf(fmt, " gs", rp->r_gs, " fs", rp->r_fs,
1791 1791 " es", rp->r_es, " ds", rp->r_ds);
1792 1792 printf(fmt, "edi", rp->r_edi, "esi", rp->r_esi,
1793 1793 "ebp", rp->r_ebp, "esp", rp->r_esp);
1794 1794 printf(fmt, "ebx", rp->r_ebx, "edx", rp->r_edx,
1795 1795 "ecx", rp->r_ecx, "eax", rp->r_eax);
1796 1796 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err,
1797 1797 "eip", rp->r_eip, " cs", rp->r_cs);
1798 1798 printf("\t%3s: %8lx %3s: %8lx %3s: %8lx\n",
1799 1799 "efl", rp->r_efl, "usp", rp->r_uesp, " ss", rp->r_ss);
1800 1800
1801 1801 #endif /* __i386 */
1802 1802 }
1803 1803
1804 1804 /*
1805 1805 * Test to see if the instruction is iret on i386 or iretq on amd64.
1806 1806 *
1807 1807 * On the hypervisor we can only test for nopop_sys_rtt_syscall. If true
1808 1808 * then we are in the context of hypervisor's failsafe handler because it
1809 1809 * tried to iret and failed due to a bad selector. See xen_failsafe_callback.
1810 1810 */
1811 1811 static int
1812 1812 instr_is_iret(caddr_t pc)
1813 1813 {
1814 1814
1815 1815 #if defined(__xpv)
1816 1816 extern void nopop_sys_rtt_syscall(void);
1817 1817 return ((pc == (caddr_t)nopop_sys_rtt_syscall) ? 1 : 0);
1818 1818
1819 1819 #else
1820 1820
1821 1821 #if defined(__amd64)
1822 1822 static const uint8_t iret_insn[2] = { 0x48, 0xcf }; /* iretq */
1823 1823
1824 1824 #elif defined(__i386)
1825 1825 static const uint8_t iret_insn[1] = { 0xcf }; /* iret */
1826 1826 #endif /* __i386 */
1827 1827 return (bcmp(pc, iret_insn, sizeof (iret_insn)) == 0);
1828 1828
1829 1829 #endif /* __xpv */
1830 1830 }
1831 1831
1832 1832 #if defined(__i386)
1833 1833
1834 1834 /*
1835 1835 * Test to see if the instruction is part of __SEGREGS_POP
1836 1836 *
1837 1837 * Note carefully the appallingly awful dependency between
1838 1838 * the instruction sequence used in __SEGREGS_POP and these
1839 1839 * instructions encoded here.
1840 1840 */
1841 1841 static int
1842 1842 instr_is_segregs_pop(caddr_t pc)
1843 1843 {
1844 1844 static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 };
1845 1845 static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 };
1846 1846 static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 };
1847 1847 static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc };
1848 1848
1849 1849 if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 ||
1850 1850 bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 ||
1851 1851 bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 ||
1852 1852 bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0)
1853 1853 return (1);
1854 1854
1855 1855 return (0);
1856 1856 }
1857 1857
1858 1858 #endif /* __i386 */
1859 1859
1860 1860 /*
1861 1861 * Test to see if the instruction is part of _sys_rtt.
1862 1862 *
1863 1863 * Again on the hypervisor if we try to IRET to user land with a bad code
1864 1864 * or stack selector we will get vectored through xen_failsafe_callback.
1865 1865 * In which case we assume we got here via _sys_rtt since we only allow
1866 1866 * IRET to user land to take place in _sys_rtt.
1867 1867 */
1868 1868 static int
1869 1869 instr_is_sys_rtt(caddr_t pc)
1870 1870 {
1871 1871 extern void _sys_rtt(), _sys_rtt_end();
1872 1872
1873 1873 if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
1874 1874 (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
1875 1875 return (0);
1876 1876
1877 1877 return (1);
1878 1878 }
1879 1879
1880 1880 /*
1881 1881 * Handle #gp faults in kernel mode.
1882 1882 *
1883 1883 * One legitimate way this can happen is if we attempt to update segment
1884 1884 * registers to naughty values on the way out of the kernel.
1885 1885 *
1886 1886 * This can happen in a couple of ways: someone - either accidentally or
1887 1887 * on purpose - creates (setcontext(2), lwp_create(2)) or modifies
1888 1888 * (signal(2)) a ucontext that contains silly segment register values.
1889 1889 * Or someone - either accidentally or on purpose - modifies the prgregset_t
1890 1890 * of a subject process via /proc to contain silly segment register values.
1891 1891 *
1892 1892 * (The unfortunate part is that we can end up discovering the bad segment
1893 1893 * register value in the middle of an 'iret' after we've popped most of the
1894 1894 * stack. So it becomes quite difficult to associate an accurate ucontext
1895 1895 * with the lwp, because the act of taking the #gp trap overwrites most of
1896 1896 * what we were going to send the lwp.)
1897 1897 *
1898 1898 * OTOH if it turns out that's -not- the problem, and we're -not- an lwp
1899 1899 * trying to return to user mode and we get a #gp fault, then we need
1900 1900 * to die() -- which will happen if we return non-zero from this routine.
1901 1901 */
1902 1902 static int
1903 1903 kern_gpfault(struct regs *rp)
1904 1904 {
1905 1905 kthread_t *t = curthread;
1906 1906 proc_t *p = ttoproc(t);
1907 1907 klwp_t *lwp = ttolwp(t);
1908 1908 struct regs tmpregs, *trp = NULL;
1909 1909 caddr_t pc = (caddr_t)rp->r_pc;
1910 1910 int v;
1911 1911 uint32_t auditing = AU_AUDITING();
1912 1912
1913 1913 /*
1914 1914 * if we're not an lwp, or in the case of running native the
1915 1915 * pc range is outside _sys_rtt, then we should immediately
1916 1916 * be die()ing horribly.
1917 1917 */
1918 1918 if (lwp == NULL || !instr_is_sys_rtt(pc))
1919 1919 return (1);
1920 1920
1921 1921 /*
1922 1922 * So at least we're in the right part of the kernel.
1923 1923 *
1924 1924 * Disassemble the instruction at the faulting pc.
1925 1925 * Once we know what it is, we carefully reconstruct the stack
1926 1926 * based on the order in which the stack is deconstructed in
1927 1927 * _sys_rtt. Ew.
1928 1928 */
1929 1929 if (instr_is_iret(pc)) {
1930 1930 /*
1931 1931 * We took the #gp while trying to perform the IRET.
1932 1932 * This means that either %cs or %ss are bad.
1933 1933 * All we know for sure is that most of the general
1934 1934 * registers have been restored, including the
1935 1935 * segment registers, and all we have left on the
1936 1936 * topmost part of the lwp's stack are the
1937 1937 * registers that the iretq was unable to consume.
1938 1938 *
1939 1939 * All the rest of the state was crushed by the #gp
1940 1940 * which pushed -its- registers atop our old save area
1941 1941 * (because we had to decrement the stack pointer, sigh) so
1942 1942 * all that we can try and do is to reconstruct the
1943 1943 * crushed frame from the #gp trap frame itself.
1944 1944 */
1945 1945 trp = &tmpregs;
1946 1946 trp->r_ss = lwptoregs(lwp)->r_ss;
1947 1947 trp->r_sp = lwptoregs(lwp)->r_sp;
1948 1948 trp->r_ps = lwptoregs(lwp)->r_ps;
1949 1949 trp->r_cs = lwptoregs(lwp)->r_cs;
1950 1950 trp->r_pc = lwptoregs(lwp)->r_pc;
1951 1951 bcopy(rp, trp, offsetof(struct regs, r_pc));
1952 1952
1953 1953 /*
1954 1954 * Validate simple math
1955 1955 */
1956 1956 ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc);
1957 1957 ASSERT(trp->r_err == rp->r_err);
1958 1958
1959 1959
1960 1960
1961 1961 }
1962 1962
1963 1963 #if defined(__amd64)
1964 1964 if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) {
1965 1965
1966 1966 /*
1967 1967 * This is the common case -- we're trying to load
1968 1968 * a bad segment register value in the only section
1969 1969 * of kernel code that ever loads segment registers.
1970 1970 *
1971 1971 * We don't need to do anything at this point because
1972 1972 * the pcb contains all the pending segment register
1973 1973 * state, and the regs are still intact because we
1974 1974 * didn't adjust the stack pointer yet. Given the fidelity
1975 1975 * of all this, we could conceivably send a signal
1976 1976 * to the lwp, rather than core-ing.
1977 1977 */
1978 1978 trp = lwptoregs(lwp);
1979 1979 ASSERT((caddr_t)trp == (caddr_t)rp->r_sp);
1980 1980 }
1981 1981
1982 1982 #elif defined(__i386)
1983 1983
1984 1984 if (trp == NULL && instr_is_segregs_pop(pc))
1985 1985 trp = lwptoregs(lwp);
1986 1986
1987 1987 #endif /* __i386 */
1988 1988
1989 1989 if (trp == NULL)
1990 1990 return (1);
1991 1991
1992 1992 /*
1993 1993 * If we get to here, we're reasonably confident that we've
1994 1994 * correctly decoded what happened on the way out of the kernel.
1995 1995 * Rewrite the lwp's registers so that we can create a core dump
1996 1996 * the (at least vaguely) represents the mcontext we were
1997 1997 * being asked to restore when things went so terribly wrong.
1998 1998 */
1999 1999
2000 2000 /*
2001 2001 * Make sure that we have a meaningful %trapno and %err.
2002 2002 */
2003 2003 trp->r_trapno = rp->r_trapno;
2004 2004 trp->r_err = rp->r_err;
2005 2005
2006 2006 if ((caddr_t)trp != (caddr_t)lwptoregs(lwp))
2007 2007 bcopy(trp, lwptoregs(lwp), sizeof (*trp));
2008 2008
2009 2009
2010 2010 mutex_enter(&p->p_lock);
2011 2011 lwp->lwp_cursig = SIGSEGV;
2012 2012 mutex_exit(&p->p_lock);
2013 2013
2014 2014 /*
2015 2015 * Terminate all LWPs but don't discard them. If another lwp beat
2016 2016 * us to the punch by calling exit(), evaporate now.
2017 2017 */
2018 2018 proc_is_exiting(p);
2019 2019 if (exitlwps(1) != 0) {
2020 2020 mutex_enter(&p->p_lock);
2021 2021 lwp_exit();
2022 2022 }
2023 2023
2024 2024 if (auditing) /* audit core dump */
2025 2025 audit_core_start(SIGSEGV);
2026 2026 v = core(SIGSEGV, B_FALSE);
2027 2027 if (auditing) /* audit core dump */
2028 2028 audit_core_finish(v ? CLD_KILLED : CLD_DUMPED);
2029 2029 exit(v ? CLD_KILLED : CLD_DUMPED, SIGSEGV);
2030 2030 return (0);
2031 2031 }
2032 2032
2033 2033 /*
2034 2034 * dump_tss() - Display the TSS structure
2035 2035 */
2036 2036
2037 2037 #if !defined(__xpv)
2038 2038 #if defined(__amd64)
2039 2039
2040 2040 static void
2041 2041 dump_tss(void)
2042 2042 {
2043 2043 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */
2044 2044 tss_t *tss = CPU->cpu_tss;
2045 2045
2046 2046 printf(tss_fmt, "tss_rsp0", (void *)tss->tss_rsp0);
2047 2047 printf(tss_fmt, "tss_rsp1", (void *)tss->tss_rsp1);
2048 2048 printf(tss_fmt, "tss_rsp2", (void *)tss->tss_rsp2);
2049 2049
2050 2050 printf(tss_fmt, "tss_ist1", (void *)tss->tss_ist1);
2051 2051 printf(tss_fmt, "tss_ist2", (void *)tss->tss_ist2);
2052 2052 printf(tss_fmt, "tss_ist3", (void *)tss->tss_ist3);
2053 2053 printf(tss_fmt, "tss_ist4", (void *)tss->tss_ist4);
2054 2054 printf(tss_fmt, "tss_ist5", (void *)tss->tss_ist5);
2055 2055 printf(tss_fmt, "tss_ist6", (void *)tss->tss_ist6);
2056 2056 printf(tss_fmt, "tss_ist7", (void *)tss->tss_ist7);
2057 2057 }
2058 2058
2059 2059 #elif defined(__i386)
2060 2060
2061 2061 static void
2062 2062 dump_tss(void)
2063 2063 {
2064 2064 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */
2065 2065 tss_t *tss = CPU->cpu_tss;
2066 2066
2067 2067 printf(tss_fmt, "tss_link", (void *)(uintptr_t)tss->tss_link);
2068 2068 printf(tss_fmt, "tss_esp0", (void *)(uintptr_t)tss->tss_esp0);
2069 2069 printf(tss_fmt, "tss_ss0", (void *)(uintptr_t)tss->tss_ss0);
2070 2070 printf(tss_fmt, "tss_esp1", (void *)(uintptr_t)tss->tss_esp1);
2071 2071 printf(tss_fmt, "tss_ss1", (void *)(uintptr_t)tss->tss_ss1);
2072 2072 printf(tss_fmt, "tss_esp2", (void *)(uintptr_t)tss->tss_esp2);
2073 2073 printf(tss_fmt, "tss_ss2", (void *)(uintptr_t)tss->tss_ss2);
2074 2074 printf(tss_fmt, "tss_cr3", (void *)(uintptr_t)tss->tss_cr3);
2075 2075 printf(tss_fmt, "tss_eip", (void *)(uintptr_t)tss->tss_eip);
2076 2076 printf(tss_fmt, "tss_eflags", (void *)(uintptr_t)tss->tss_eflags);
2077 2077 printf(tss_fmt, "tss_eax", (void *)(uintptr_t)tss->tss_eax);
2078 2078 printf(tss_fmt, "tss_ebx", (void *)(uintptr_t)tss->tss_ebx);
2079 2079 printf(tss_fmt, "tss_ecx", (void *)(uintptr_t)tss->tss_ecx);
2080 2080 printf(tss_fmt, "tss_edx", (void *)(uintptr_t)tss->tss_edx);
2081 2081 printf(tss_fmt, "tss_esp", (void *)(uintptr_t)tss->tss_esp);
2082 2082 }
2083 2083
2084 2084 #endif /* __amd64 */
2085 2085 #endif /* !__xpv */
2086 2086
2087 2087 #if defined(TRAPTRACE)
2088 2088
2089 2089 int ttrace_nrec = 10; /* number of records to dump out */
2090 2090 int ttrace_dump_nregs = 0; /* dump out this many records with regs too */
2091 2091
2092 2092 /*
2093 2093 * Dump out the last ttrace_nrec traptrace records on each CPU
2094 2094 */
2095 2095 static void
2096 2096 dump_ttrace(void)
2097 2097 {
2098 2098 trap_trace_ctl_t *ttc;
2099 2099 trap_trace_rec_t *rec;
2100 2100 uintptr_t current;
2101 2101 int i, j, k;
2102 2102 int n = NCPU;
2103 2103 #if defined(__amd64)
2104 2104 const char banner[] =
2105 2105 "\ncpu address timestamp "
2106 2106 "type vc handler pc\n";
2107 2107 const char fmt1[] = "%3d %016lx %12llx ";
2108 2108 #elif defined(__i386)
2109 2109 const char banner[] =
2110 2110 "\ncpu address timestamp type vc handler pc\n";
2111 2111 const char fmt1[] = "%3d %08lx %12llx ";
2112 2112 #endif
2113 2113 const char fmt2[] = "%4s %3x ";
2114 2114 const char fmt3[] = "%8s ";
2115 2115
2116 2116 if (ttrace_nrec == 0)
2117 2117 return;
2118 2118
2119 2119 printf(banner);
2120 2120
2121 2121 for (i = 0; i < n; i++) {
2122 2122 ttc = &trap_trace_ctl[i];
2123 2123 if (ttc->ttc_first == NULL)
2124 2124 continue;
2125 2125
2126 2126 current = ttc->ttc_next - sizeof (trap_trace_rec_t);
2127 2127 for (j = 0; j < ttrace_nrec; j++) {
2128 2128 struct sysent *sys;
2129 2129 struct autovec *vec;
2130 2130 extern struct av_head autovect[];
2131 2131 int type;
2132 2132 ulong_t off;
2133 2133 char *sym, *stype;
2134 2134
2135 2135 if (current < ttc->ttc_first)
2136 2136 current =
2137 2137 ttc->ttc_limit - sizeof (trap_trace_rec_t);
2138 2138
2139 2139 if (current == NULL)
2140 2140 continue;
2141 2141
2142 2142 rec = (trap_trace_rec_t *)current;
2143 2143
2144 2144 if (rec->ttr_stamp == 0)
2145 2145 break;
2146 2146
2147 2147 printf(fmt1, i, (uintptr_t)rec, rec->ttr_stamp);
2148 2148
2149 2149 switch (rec->ttr_marker) {
2150 2150 case TT_SYSCALL:
2151 2151 case TT_SYSENTER:
2152 2152 case TT_SYSC:
2153 2153 case TT_SYSC64:
2154 2154 #if defined(__amd64)
2155 2155 sys = &sysent32[rec->ttr_sysnum];
2156 2156 switch (rec->ttr_marker) {
2157 2157 case TT_SYSC64:
2158 2158 sys = &sysent[rec->ttr_sysnum];
2159 2159 /*FALLTHROUGH*/
2160 2160 #elif defined(__i386)
2161 2161 sys = &sysent[rec->ttr_sysnum];
2162 2162 switch (rec->ttr_marker) {
2163 2163 case TT_SYSC64:
2164 2164 #endif
2165 2165 case TT_SYSC:
2166 2166 stype = "sysc"; /* syscall */
2167 2167 break;
2168 2168 case TT_SYSCALL:
2169 2169 stype = "lcal"; /* lcall */
2170 2170 break;
2171 2171 case TT_SYSENTER:
2172 2172 stype = "syse"; /* sysenter */
2173 2173 break;
2174 2174 default:
2175 2175 break;
2176 2176 }
2177 2177 printf(fmt2, "sysc", rec->ttr_sysnum);
2178 2178 if (sys != NULL) {
2179 2179 sym = kobj_getsymname(
2180 2180 (uintptr_t)sys->sy_callc,
2181 2181 &off);
2182 2182 if (sym != NULL)
2183 2183 printf(fmt3, sym);
2184 2184 else
2185 2185 printf("%p ", sys->sy_callc);
2186 2186 } else {
2187 2187 printf(fmt3, "unknown");
2188 2188 }
2189 2189 break;
2190 2190
2191 2191 case TT_INTERRUPT:
2192 2192 printf(fmt2, "intr", rec->ttr_vector);
2193 2193 if (get_intr_handler != NULL)
2194 2194 vec = (struct autovec *)
2195 2195 (*get_intr_handler)
2196 2196 (rec->ttr_cpuid, rec->ttr_vector);
2197 2197 else
2198 2198 vec =
2199 2199 autovect[rec->ttr_vector].avh_link;
2200 2200
2201 2201 if (vec != NULL) {
2202 2202 sym = kobj_getsymname(
2203 2203 (uintptr_t)vec->av_vector, &off);
2204 2204 if (sym != NULL)
2205 2205 printf(fmt3, sym);
2206 2206 else
2207 2207 printf("%p ", vec->av_vector);
2208 2208 } else {
2209 2209 printf(fmt3, "unknown ");
2210 2210 }
2211 2211 break;
2212 2212
2213 2213 case TT_TRAP:
2214 2214 case TT_EVENT:
2215 2215 type = rec->ttr_regs.r_trapno;
2216 2216 printf(fmt2, "trap", type);
2217 2217 if (type < TRAP_TYPES)
2218 2218 printf(" #%s ",
2219 2219 trap_type_mnemonic[type]);
2220 2220 else
2221 2221 switch (type) {
2222 2222 case T_AST:
2223 2223 printf(fmt3, "ast");
2224 2224 break;
2225 2225 default:
2226 2226 printf(fmt3, "");
2227 2227 break;
2228 2228 }
2229 2229 break;
2230 2230
2231 2231 default:
2232 2232 break;
2233 2233 }
2234 2234
2235 2235 sym = kobj_getsymname(rec->ttr_regs.r_pc, &off);
2236 2236 if (sym != NULL)
2237 2237 printf("%s+%lx\n", sym, off);
2238 2238 else
2239 2239 printf("%lx\n", rec->ttr_regs.r_pc);
2240 2240
2241 2241 if (ttrace_dump_nregs-- > 0) {
2242 2242 int s;
2243 2243
2244 2244 if (rec->ttr_marker == TT_INTERRUPT)
2245 2245 printf(
2246 2246 "\t\tipl %x spl %x pri %x\n",
2247 2247 rec->ttr_ipl,
2248 2248 rec->ttr_spl,
2249 2249 rec->ttr_pri);
2250 2250
2251 2251 dumpregs(&rec->ttr_regs);
2252 2252
2253 2253 printf("\t%3s: %p\n\n", " ct",
2254 2254 (void *)rec->ttr_curthread);
2255 2255
2256 2256 /*
2257 2257 * print out the pc stack that we recorded
2258 2258 * at trap time (if any)
2259 2259 */
2260 2260 for (s = 0; s < rec->ttr_sdepth; s++) {
2261 2261 uintptr_t fullpc;
2262 2262
2263 2263 if (s >= TTR_STACK_DEPTH) {
2264 2264 printf("ttr_sdepth corrupt\n");
2265 2265 break;
2266 2266 }
2267 2267
2268 2268 fullpc = (uintptr_t)rec->ttr_stack[s];
2269 2269
2270 2270 sym = kobj_getsymname(fullpc, &off);
2271 2271 if (sym != NULL)
2272 2272 printf("-> %s+0x%lx()\n",
2273 2273 sym, off);
2274 2274 else
2275 2275 printf("-> 0x%lx()\n", fullpc);
2276 2276 }
2277 2277 printf("\n");
2278 2278 }
2279 2279 current -= sizeof (trap_trace_rec_t);
2280 2280 }
2281 2281 }
2282 2282 }
2283 2283
2284 2284 #endif /* TRAPTRACE */
2285 2285
2286 2286 void
2287 2287 panic_showtrap(struct panic_trap_info *tip)
2288 2288 {
2289 2289 showregs(tip->trap_type, tip->trap_regs, tip->trap_addr);
2290 2290
2291 2291 #if defined(TRAPTRACE)
2292 2292 dump_ttrace();
2293 2293 #endif
2294 2294
2295 2295 #if !defined(__xpv)
2296 2296 if (tip->trap_type == T_DBLFLT)
2297 2297 dump_tss();
2298 2298 #endif
2299 2299 }
2300 2300
2301 2301 void
2302 2302 panic_savetrap(panic_data_t *pdp, struct panic_trap_info *tip)
2303 2303 {
2304 2304 panic_saveregs(pdp, tip->trap_regs);
2305 2305 }
|
↓ open down ↓ |
2305 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX