Print this page
OS-3825 lxbrand rsyslogd abort on centos6
OS-4047 lxbrand vsyscall while SIGSEGV? on next trap we're handler-free!
Reviewed by: Bryan Cantrill <bryan@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/i86pc/os/trap.c
+++ new/usr/src/uts/i86pc/os/trap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
|
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
25 25
26 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 28 /* All Rights Reserved */
29 29 /* */
30 30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 31 /* All Rights Reserved */
32 32 /* */
33 33
34 34 /*
35 - * Copyright 2012 Joyent, Inc. All rights reserved.
35 + * Copyright 2015 Joyent, Inc.
36 36 */
37 37
38 38 #include <sys/types.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/param.h>
41 41 #include <sys/signal.h>
42 42 #include <sys/systm.h>
43 43 #include <sys/user.h>
44 44 #include <sys/proc.h>
45 45 #include <sys/disp.h>
46 46 #include <sys/class.h>
47 47 #include <sys/core.h>
48 48 #include <sys/syscall.h>
49 49 #include <sys/cpuvar.h>
50 50 #include <sys/vm.h>
51 51 #include <sys/sysinfo.h>
52 52 #include <sys/fault.h>
53 53 #include <sys/stack.h>
54 54 #include <sys/psw.h>
55 55 #include <sys/regset.h>
56 56 #include <sys/fp.h>
57 57 #include <sys/trap.h>
58 58 #include <sys/kmem.h>
59 59 #include <sys/vtrace.h>
60 60 #include <sys/cmn_err.h>
61 61 #include <sys/prsystm.h>
62 62 #include <sys/mutex_impl.h>
63 63 #include <sys/machsystm.h>
64 64 #include <sys/archsystm.h>
65 65 #include <sys/sdt.h>
66 66 #include <sys/avintr.h>
67 67 #include <sys/kobj.h>
68 68
69 69 #include <vm/hat.h>
70 70
71 71 #include <vm/seg_kmem.h>
72 72 #include <vm/as.h>
73 73 #include <vm/seg.h>
74 74 #include <vm/hat_pte.h>
75 75 #include <vm/hat_i86.h>
76 76
77 77 #include <sys/procfs.h>
78 78
79 79 #include <sys/reboot.h>
80 80 #include <sys/debug.h>
81 81 #include <sys/debugreg.h>
82 82 #include <sys/modctl.h>
83 83 #include <sys/aio_impl.h>
84 84 #include <sys/tnf.h>
85 85 #include <sys/tnf_probe.h>
86 86 #include <sys/cred.h>
87 87 #include <sys/mman.h>
88 88 #include <sys/x86_archext.h>
89 89 #include <sys/copyops.h>
90 90 #include <c2/audit.h>
91 91 #include <sys/ftrace.h>
92 92 #include <sys/panic.h>
93 93 #include <sys/traptrace.h>
|
↓ open down ↓ |
48 lines elided |
↑ open up ↑ |
94 94 #include <sys/ontrap.h>
95 95 #include <sys/cpc_impl.h>
96 96 #include <sys/bootconf.h>
97 97 #include <sys/bootinfo.h>
98 98 #include <sys/promif.h>
99 99 #include <sys/mach_mmu.h>
100 100 #if defined(__xpv)
101 101 #include <sys/hypervisor.h>
102 102 #endif
103 103 #include <sys/contract/process_impl.h>
104 +#include <sys/brand.h>
104 105
105 106 #define USER 0x10000 /* user-mode flag added to trap type */
106 107
107 108 static const char *trap_type_mnemonic[] = {
108 109 "de", "db", "2", "bp",
109 110 "of", "br", "ud", "nm",
110 111 "df", "9", "ts", "np",
111 112 "ss", "gp", "pf", "15",
112 113 "mf", "ac", "mc", "xf"
113 114 };
114 115
115 116 static const char *trap_type[] = {
116 117 "Divide error", /* trap id 0 */
117 118 "Debug", /* trap id 1 */
118 119 "NMI interrupt", /* trap id 2 */
119 120 "Breakpoint", /* trap id 3 */
120 121 "Overflow", /* trap id 4 */
121 122 "BOUND range exceeded", /* trap id 5 */
122 123 "Invalid opcode", /* trap id 6 */
123 124 "Device not available", /* trap id 7 */
124 125 "Double fault", /* trap id 8 */
125 126 "Coprocessor segment overrun", /* trap id 9 */
126 127 "Invalid TSS", /* trap id 10 */
127 128 "Segment not present", /* trap id 11 */
128 129 "Stack segment fault", /* trap id 12 */
129 130 "General protection", /* trap id 13 */
130 131 "Page fault", /* trap id 14 */
131 132 "Reserved", /* trap id 15 */
132 133 "x87 floating point error", /* trap id 16 */
133 134 "Alignment check", /* trap id 17 */
134 135 "Machine check", /* trap id 18 */
135 136 "SIMD floating point exception", /* trap id 19 */
136 137 };
137 138
138 139 #define TRAP_TYPES (sizeof (trap_type) / sizeof (trap_type[0]))
139 140
140 141 #define SLOW_SCALL_SIZE 2
141 142 #define FAST_SCALL_SIZE 2
142 143
143 144 int tudebug = 0;
144 145 int tudebugbpt = 0;
145 146 int tudebugfpe = 0;
146 147 int tudebugsse = 0;
147 148
148 149 #if defined(TRAPDEBUG) || defined(lint)
149 150 int tdebug = 0;
150 151 int lodebug = 0;
151 152 int faultdebug = 0;
152 153 #else
153 154 #define tdebug 0
154 155 #define lodebug 0
155 156 #define faultdebug 0
156 157 #endif /* defined(TRAPDEBUG) || defined(lint) */
157 158
158 159 #if defined(TRAPTRACE)
159 160 /*
160 161 * trap trace record for cpu0 is allocated here.
161 162 * trap trace records for non-boot cpus are allocated in mp_startup_init().
162 163 */
163 164 static trap_trace_rec_t trap_tr0[TRAPTR_NENT];
164 165 trap_trace_ctl_t trap_trace_ctl[NCPU] = {
165 166 {
166 167 (uintptr_t)trap_tr0, /* next record */
167 168 (uintptr_t)trap_tr0, /* first record */
168 169 (uintptr_t)(trap_tr0 + TRAPTR_NENT), /* limit */
169 170 (uintptr_t)0 /* current */
170 171 },
171 172 };
172 173
173 174 /*
174 175 * default trap buffer size
175 176 */
176 177 size_t trap_trace_bufsize = TRAPTR_NENT * sizeof (trap_trace_rec_t);
177 178 int trap_trace_freeze = 0;
178 179 int trap_trace_off = 0;
179 180
180 181 /*
181 182 * A dummy TRAPTRACE entry to use after death.
182 183 */
183 184 trap_trace_rec_t trap_trace_postmort;
184 185
185 186 static void dump_ttrace(void);
186 187 #endif /* TRAPTRACE */
187 188 static void dumpregs(struct regs *);
188 189 static void showregs(uint_t, struct regs *, caddr_t);
189 190 static int kern_gpfault(struct regs *);
190 191
191 192 /*ARGSUSED*/
192 193 static int
193 194 die(uint_t type, struct regs *rp, caddr_t addr, processorid_t cpuid)
194 195 {
195 196 struct panic_trap_info ti;
196 197 const char *trap_name, *trap_mnemonic;
197 198
198 199 if (type < TRAP_TYPES) {
199 200 trap_name = trap_type[type];
200 201 trap_mnemonic = trap_type_mnemonic[type];
201 202 } else {
202 203 trap_name = "trap";
203 204 trap_mnemonic = "-";
204 205 }
205 206
206 207 #ifdef TRAPTRACE
207 208 TRAPTRACE_FREEZE;
208 209 #endif
209 210
210 211 ti.trap_regs = rp;
211 212 ti.trap_type = type & ~USER;
212 213 ti.trap_addr = addr;
213 214
214 215 curthread->t_panic_trap = &ti;
215 216
216 217 if (type == T_PGFLT && addr < (caddr_t)KERNELBASE) {
217 218 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p "
218 219 "occurred in module \"%s\" due to %s",
219 220 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr,
220 221 mod_containing_pc((caddr_t)rp->r_pc),
221 222 addr < (caddr_t)PAGESIZE ?
222 223 "a NULL pointer dereference" :
223 224 "an illegal access to a user address");
224 225 } else
225 226 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p",
226 227 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr);
227 228 return (0);
228 229 }
229 230
230 231 /*
231 232 * Rewrite the instruction at pc to be an int $T_SYSCALLINT instruction.
232 233 *
233 234 * int <vector> is two bytes: 0xCD <vector>
234 235 */
235 236
236 237 static int
237 238 rewrite_syscall(caddr_t pc)
238 239 {
239 240 uchar_t instr[SLOW_SCALL_SIZE] = { 0xCD, T_SYSCALLINT };
240 241
241 242 if (uwrite(curthread->t_procp, instr, SLOW_SCALL_SIZE,
242 243 (uintptr_t)pc) != 0)
243 244 return (1);
244 245
245 246 return (0);
246 247 }
247 248
248 249 /*
249 250 * Test to see if the instruction at pc is sysenter or syscall. The second
250 251 * argument should be the x86 feature flag corresponding to the expected
251 252 * instruction.
252 253 *
253 254 * sysenter is two bytes: 0x0F 0x34
254 255 * syscall is two bytes: 0x0F 0x05
255 256 * int $T_SYSCALLINT is two bytes: 0xCD 0x91
256 257 */
257 258
258 259 static int
259 260 instr_is_other_syscall(caddr_t pc, int which)
260 261 {
261 262 uchar_t instr[FAST_SCALL_SIZE];
262 263
263 264 ASSERT(which == X86FSET_SEP || which == X86FSET_ASYSC || which == 0xCD);
264 265
265 266 if (copyin_nowatch(pc, (caddr_t)instr, FAST_SCALL_SIZE) != 0)
266 267 return (0);
267 268
268 269 switch (which) {
269 270 case X86FSET_SEP:
270 271 if (instr[0] == 0x0F && instr[1] == 0x34)
271 272 return (1);
272 273 break;
273 274 case X86FSET_ASYSC:
274 275 if (instr[0] == 0x0F && instr[1] == 0x05)
275 276 return (1);
276 277 break;
277 278 case 0xCD:
278 279 if (instr[0] == 0xCD && instr[1] == T_SYSCALLINT)
279 280 return (1);
280 281 break;
281 282 }
282 283
283 284 return (0);
284 285 }
285 286
286 287 static const char *
287 288 syscall_insn_string(int syscall_insn)
288 289 {
289 290 switch (syscall_insn) {
290 291 case X86FSET_SEP:
291 292 return ("sysenter");
292 293 case X86FSET_ASYSC:
293 294 return ("syscall");
294 295 case 0xCD:
295 296 return ("int");
296 297 default:
297 298 return ("Unknown");
298 299 }
299 300 }
300 301
301 302 static int
302 303 ldt_rewrite_syscall(struct regs *rp, proc_t *p, int syscall_insn)
303 304 {
304 305 caddr_t linearpc;
305 306 int return_code = 0;
306 307
307 308 mutex_enter(&p->p_ldtlock); /* Must be held across linear_pc() */
308 309
309 310 if (linear_pc(rp, p, &linearpc) == 0) {
310 311
311 312 /*
312 313 * If another thread beat us here, it already changed
313 314 * this site to the slower (int) syscall instruction.
314 315 */
315 316 if (instr_is_other_syscall(linearpc, 0xCD)) {
316 317 return_code = 1;
317 318 } else if (instr_is_other_syscall(linearpc, syscall_insn)) {
318 319
319 320 if (rewrite_syscall(linearpc) == 0) {
320 321 return_code = 1;
321 322 }
322 323 #ifdef DEBUG
323 324 else
324 325 cmn_err(CE_WARN, "failed to rewrite %s "
325 326 "instruction in process %d",
326 327 syscall_insn_string(syscall_insn),
327 328 p->p_pid);
328 329 #endif /* DEBUG */
329 330 }
330 331 }
331 332
332 333 mutex_exit(&p->p_ldtlock); /* Must be held across linear_pc() */
333 334
334 335 return (return_code);
335 336 }
336 337
337 338 /*
338 339 * Test to see if the instruction at pc is a system call instruction.
339 340 *
340 341 * The bytes of an lcall instruction used for the syscall trap.
341 342 * static uchar_t lcall[7] = { 0x9a, 0, 0, 0, 0, 0x7, 0 };
342 343 * static uchar_t lcallalt[7] = { 0x9a, 0, 0, 0, 0, 0x27, 0 };
343 344 */
344 345
345 346 #define LCALLSIZE 7
346 347
347 348 static int
348 349 instr_is_lcall_syscall(caddr_t pc)
349 350 {
350 351 uchar_t instr[LCALLSIZE];
351 352
352 353 if (copyin_nowatch(pc, (caddr_t)instr, LCALLSIZE) == 0 &&
353 354 instr[0] == 0x9a &&
354 355 instr[1] == 0 &&
355 356 instr[2] == 0 &&
356 357 instr[3] == 0 &&
357 358 instr[4] == 0 &&
358 359 (instr[5] == 0x7 || instr[5] == 0x27) &&
359 360 instr[6] == 0)
360 361 return (1);
361 362
362 363 return (0);
363 364 }
364 365
365 366 #ifdef __amd64
366 367
367 368 /*
368 369 * In the first revisions of amd64 CPUs produced by AMD, the LAHF and
369 370 * SAHF instructions were not implemented in 64-bit mode. Later revisions
370 371 * did implement these instructions. An extension to the cpuid instruction
371 372 * was added to check for the capability of executing these instructions
372 373 * in 64-bit mode.
373 374 *
374 375 * Intel originally did not implement these instructions in EM64T either,
375 376 * but added them in later revisions.
376 377 *
377 378 * So, there are different chip revisions by both vendors out there that
378 379 * may or may not implement these instructions. The easy solution is to
379 380 * just always emulate these instructions on demand.
380 381 *
381 382 * SAHF == store %ah in the lower 8 bits of %rflags (opcode 0x9e)
382 383 * LAHF == load the lower 8 bits of %rflags into %ah (opcode 0x9f)
383 384 */
384 385
385 386 #define LSAHFSIZE 1
386 387
387 388 static int
388 389 instr_is_lsahf(caddr_t pc, uchar_t *instr)
389 390 {
390 391 if (copyin_nowatch(pc, (caddr_t)instr, LSAHFSIZE) == 0 &&
391 392 (*instr == 0x9e || *instr == 0x9f))
392 393 return (1);
393 394 return (0);
394 395 }
395 396
396 397 /*
397 398 * Emulate the LAHF and SAHF instructions. The reference manuals define
398 399 * these instructions to always load/store bit 1 as a 1, and bits 3 and 5
399 400 * as a 0. The other, defined, bits are copied (the PS_ICC bits and PS_P).
400 401 *
401 402 * Note that %ah is bits 8-15 of %rax.
402 403 */
403 404 static void
404 405 emulate_lsahf(struct regs *rp, uchar_t instr)
405 406 {
406 407 if (instr == 0x9e) {
407 408 /* sahf. Copy bits from %ah to flags. */
408 409 rp->r_ps = (rp->r_ps & ~0xff) |
409 410 ((rp->r_rax >> 8) & PSL_LSAHFMASK) | PS_MB1;
410 411 } else {
411 412 /* lahf. Copy bits from flags to %ah. */
412 413 rp->r_rax = (rp->r_rax & ~0xff00) |
413 414 (((rp->r_ps & PSL_LSAHFMASK) | PS_MB1) << 8);
414 415 }
415 416 rp->r_pc += LSAHFSIZE;
416 417 }
417 418 #endif /* __amd64 */
418 419
419 420 #ifdef OPTERON_ERRATUM_91
420 421
421 422 /*
422 423 * Test to see if the instruction at pc is a prefetch instruction.
423 424 *
424 425 * The first byte of prefetch instructions is always 0x0F.
425 426 * The second byte is 0x18 for regular prefetch or 0x0D for AMD 3dnow prefetch.
426 427 * The third byte (ModRM) contains the register field bits (bits 3-5).
427 428 * These bits must be between 0 and 3 inclusive for regular prefetch and
428 429 * 0 and 1 inclusive for AMD 3dnow prefetch.
429 430 *
430 431 * In 64-bit mode, there may be a one-byte REX prefex (0x40-0x4F).
431 432 */
432 433
433 434 static int
434 435 cmp_to_prefetch(uchar_t *p)
435 436 {
436 437 #ifdef _LP64
437 438 if ((p[0] & 0xF0) == 0x40) /* 64-bit REX prefix */
438 439 p++;
439 440 #endif
440 441 return ((p[0] == 0x0F && p[1] == 0x18 && ((p[2] >> 3) & 7) <= 3) ||
441 442 (p[0] == 0x0F && p[1] == 0x0D && ((p[2] >> 3) & 7) <= 1));
442 443 }
443 444
444 445 static int
445 446 instr_is_prefetch(caddr_t pc)
446 447 {
447 448 uchar_t instr[4]; /* optional REX prefix plus 3-byte opcode */
448 449
449 450 return (copyin_nowatch(pc, instr, sizeof (instr)) == 0 &&
450 451 cmp_to_prefetch(instr));
451 452 }
452 453
453 454 #endif /* OPTERON_ERRATUM_91 */
454 455
455 456 /*
456 457 * Called from the trap handler when a processor trap occurs.
457 458 *
458 459 * Note: All user-level traps that might call stop() must exit
459 460 * trap() by 'goto out' or by falling through.
460 461 * Note Also: trap() is usually called with interrupts enabled, (PS_IE == 1)
461 462 * however, there are paths that arrive here with PS_IE == 0 so special care
462 463 * must be taken in those cases.
463 464 */
464 465 void
465 466 trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
466 467 {
467 468 kthread_t *ct = curthread;
468 469 enum seg_rw rw;
469 470 unsigned type;
470 471 proc_t *p = ttoproc(ct);
471 472 klwp_t *lwp = ttolwp(ct);
472 473 uintptr_t lofault;
473 474 label_t *onfault;
474 475 faultcode_t pagefault(), res, errcode;
475 476 enum fault_type fault_type;
476 477 k_siginfo_t siginfo;
477 478 uint_t fault = 0;
478 479 int mstate;
479 480 int sicode = 0;
480 481 int watchcode;
481 482 int watchpage;
482 483 caddr_t vaddr;
483 484 int singlestep_twiddle;
484 485 size_t sz;
485 486 int ta;
486 487 #ifdef __amd64
487 488 uchar_t instr;
488 489 #endif
489 490
490 491 ASSERT_STACK_ALIGNED();
491 492
492 493 type = rp->r_trapno;
493 494 CPU_STATS_ADDQ(CPU, sys, trap, 1);
494 495 ASSERT(ct->t_schedflag & TS_DONT_SWAP);
495 496
496 497 if (type == T_PGFLT) {
497 498
498 499 errcode = rp->r_err;
499 500 if (errcode & PF_ERR_WRITE)
500 501 rw = S_WRITE;
501 502 else if ((caddr_t)rp->r_pc == addr ||
502 503 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC)))
503 504 rw = S_EXEC;
504 505 else
505 506 rw = S_READ;
506 507
507 508 #if defined(__i386)
508 509 /*
509 510 * Pentium Pro work-around
510 511 */
511 512 if ((errcode & PF_ERR_PROT) && pentiumpro_bug4046376) {
512 513 uint_t attr;
513 514 uint_t priv_violation;
514 515 uint_t access_violation;
515 516
516 517 if (hat_getattr(addr < (caddr_t)kernelbase ?
517 518 curproc->p_as->a_hat : kas.a_hat, addr, &attr)
518 519 == -1) {
519 520 errcode &= ~PF_ERR_PROT;
520 521 } else {
521 522 priv_violation = (errcode & PF_ERR_USER) &&
522 523 !(attr & PROT_USER);
523 524 access_violation = (errcode & PF_ERR_WRITE) &&
524 525 !(attr & PROT_WRITE);
525 526 if (!priv_violation && !access_violation)
526 527 goto cleanup;
527 528 }
528 529 }
529 530 #endif /* __i386 */
530 531
531 532 } else if (type == T_SGLSTP && lwp != NULL)
532 533 lwp->lwp_pcb.pcb_drstat = (uintptr_t)addr;
533 534
534 535 if (tdebug)
535 536 showregs(type, rp, addr);
536 537
537 538 if (USERMODE(rp->r_cs)) {
538 539 /*
539 540 * Set up the current cred to use during this trap. u_cred
540 541 * no longer exists. t_cred is used instead.
541 542 * The current process credential applies to the thread for
542 543 * the entire trap. If trapping from the kernel, this
543 544 * should already be set up.
544 545 */
545 546 if (ct->t_cred != p->p_cred) {
546 547 cred_t *oldcred = ct->t_cred;
547 548 /*
548 549 * DTrace accesses t_cred in probe context. t_cred
549 550 * must always be either NULL, or point to a valid,
550 551 * allocated cred structure.
551 552 */
552 553 ct->t_cred = crgetcred();
553 554 crfree(oldcred);
554 555 }
555 556 ASSERT(lwp != NULL);
556 557 type |= USER;
557 558 ASSERT(lwptoregs(lwp) == rp);
558 559 lwp->lwp_state = LWP_SYS;
559 560
560 561 switch (type) {
561 562 case T_PGFLT + USER:
562 563 if ((caddr_t)rp->r_pc == addr)
563 564 mstate = LMS_TFAULT;
564 565 else
565 566 mstate = LMS_DFAULT;
566 567 break;
567 568 default:
568 569 mstate = LMS_TRAP;
569 570 break;
570 571 }
571 572 /* Kernel probe */
572 573 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
573 574 tnf_microstate, state, mstate);
574 575 mstate = new_mstate(ct, mstate);
575 576
576 577 bzero(&siginfo, sizeof (siginfo));
577 578 }
578 579
579 580 switch (type) {
580 581 case T_PGFLT + USER:
581 582 case T_SGLSTP:
582 583 case T_SGLSTP + USER:
583 584 case T_BPTFLT + USER:
584 585 break;
585 586
586 587 default:
587 588 FTRACE_2("trap(): type=0x%lx, regs=0x%lx",
588 589 (ulong_t)type, (ulong_t)rp);
589 590 break;
590 591 }
591 592
592 593 switch (type) {
593 594 case T_SIMDFPE:
594 595 /* Make sure we enable interrupts before die()ing */
595 596 sti(); /* The SIMD exception comes in via cmninttrap */
596 597 /*FALLTHROUGH*/
597 598 default:
598 599 if (type & USER) {
599 600 if (tudebug)
600 601 showregs(type, rp, (caddr_t)0);
601 602 printf("trap: Unknown trap type %d in user mode\n",
602 603 type & ~USER);
603 604 siginfo.si_signo = SIGILL;
604 605 siginfo.si_code = ILL_ILLTRP;
605 606 siginfo.si_addr = (caddr_t)rp->r_pc;
606 607 siginfo.si_trapno = type & ~USER;
607 608 fault = FLTILL;
608 609 break;
609 610 } else {
610 611 (void) die(type, rp, addr, cpuid);
611 612 /*NOTREACHED*/
612 613 }
613 614
614 615 case T_PGFLT: /* system page fault */
615 616 /*
616 617 * If we're under on_trap() protection (see <sys/ontrap.h>),
617 618 * set ot_trap and bounce back to the on_trap() call site
618 619 * via the installed trampoline.
619 620 */
620 621 if ((ct->t_ontrap != NULL) &&
621 622 (ct->t_ontrap->ot_prot & OT_DATA_ACCESS)) {
622 623 ct->t_ontrap->ot_trap |= OT_DATA_ACCESS;
623 624 rp->r_pc = ct->t_ontrap->ot_trampoline;
624 625 goto cleanup;
625 626 }
626 627
627 628 /*
628 629 * If we have an Instruction fault in kernel mode, then that
629 630 * means we've tried to execute a user page (SMEP) or both of
630 631 * PAE and NXE are enabled. In either case, given that it's a
631 632 * kernel fault, we should panic immediately and not try to make
632 633 * any more forward progress. This indicates a bug in the
633 634 * kernel, which if execution continued, could be exploited to
634 635 * wreak havoc on the system.
635 636 */
636 637 if (errcode & PF_ERR_EXEC) {
637 638 (void) die(type, rp, addr, cpuid);
638 639 }
639 640
640 641 /*
641 642 * We need to check if SMAP is in play. If SMAP is in play, then
642 643 * any access to a user page will show up as a protection
643 644 * violation. To see if SMAP is enabled we first check if it's a
644 645 * user address and whether we have the feature flag set. If we
645 646 * do and the interrupted registers do not allow for user
646 647 * accesses (PS_ACHK is not enabled), then we need to die
647 648 * immediately.
648 649 */
649 650 if (addr < (caddr_t)kernelbase &&
650 651 is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE &&
651 652 (rp->r_ps & PS_ACHK) == 0) {
652 653 (void) die(type, rp, addr, cpuid);
653 654 }
654 655
655 656 /*
656 657 * See if we can handle as pagefault. Save lofault and onfault
657 658 * across this. Here we assume that an address less than
658 659 * KERNELBASE is a user fault. We can do this as copy.s
659 660 * routines verify that the starting address is less than
660 661 * KERNELBASE before starting and because we know that we
661 662 * always have KERNELBASE mapped as invalid to serve as a
662 663 * "barrier".
663 664 */
664 665 lofault = ct->t_lofault;
665 666 onfault = ct->t_onfault;
666 667 ct->t_lofault = 0;
667 668
668 669 mstate = new_mstate(ct, LMS_KFAULT);
669 670
670 671 if (addr < (caddr_t)kernelbase) {
671 672 res = pagefault(addr,
672 673 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 0);
673 674 if (res == FC_NOMAP &&
674 675 addr < p->p_usrstack &&
675 676 grow(addr))
676 677 res = 0;
677 678 } else {
678 679 res = pagefault(addr,
679 680 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 1);
680 681 }
681 682 (void) new_mstate(ct, mstate);
682 683
683 684 /*
684 685 * Restore lofault and onfault. If we resolved the fault, exit.
685 686 * If we didn't and lofault wasn't set, die.
686 687 */
687 688 ct->t_lofault = lofault;
688 689 ct->t_onfault = onfault;
689 690 if (res == 0)
690 691 goto cleanup;
691 692
692 693 #if defined(OPTERON_ERRATUM_93) && defined(_LP64)
693 694 if (lofault == 0 && opteron_erratum_93) {
694 695 /*
695 696 * Workaround for Opteron Erratum 93. On return from
696 697 * a System Managment Interrupt at a HLT instruction
697 698 * the %rip might be truncated to a 32 bit value.
698 699 * BIOS is supposed to fix this, but some don't.
699 700 * If this occurs we simply restore the high order bits.
700 701 * The HLT instruction is 1 byte of 0xf4.
701 702 */
702 703 uintptr_t rip = rp->r_pc;
703 704
704 705 if ((rip & 0xfffffffful) == rip) {
705 706 rip |= 0xfffffffful << 32;
706 707 if (hat_getpfnum(kas.a_hat, (caddr_t)rip) !=
707 708 PFN_INVALID &&
708 709 (*(uchar_t *)rip == 0xf4 ||
709 710 *(uchar_t *)(rip - 1) == 0xf4)) {
710 711 rp->r_pc = rip;
711 712 goto cleanup;
712 713 }
713 714 }
714 715 }
715 716 #endif /* OPTERON_ERRATUM_93 && _LP64 */
716 717
717 718 #ifdef OPTERON_ERRATUM_91
718 719 if (lofault == 0 && opteron_erratum_91) {
719 720 /*
720 721 * Workaround for Opteron Erratum 91. Prefetches may
721 722 * generate a page fault (they're not supposed to do
722 723 * that!). If this occurs we simply return back to the
723 724 * instruction.
724 725 */
725 726 caddr_t pc = (caddr_t)rp->r_pc;
726 727
727 728 /*
728 729 * If the faulting PC is not mapped, this is a
729 730 * legitimate kernel page fault that must result in a
730 731 * panic. If the faulting PC is mapped, it could contain
731 732 * a prefetch instruction. Check for that here.
732 733 */
733 734 if (hat_getpfnum(kas.a_hat, pc) != PFN_INVALID) {
734 735 if (cmp_to_prefetch((uchar_t *)pc)) {
735 736 #ifdef DEBUG
736 737 cmn_err(CE_WARN, "Opteron erratum 91 "
737 738 "occurred: kernel prefetch"
738 739 " at %p generated a page fault!",
739 740 (void *)rp->r_pc);
740 741 #endif /* DEBUG */
741 742 goto cleanup;
742 743 }
743 744 }
744 745 (void) die(type, rp, addr, cpuid);
745 746 }
746 747 #endif /* OPTERON_ERRATUM_91 */
747 748
748 749 if (lofault == 0)
749 750 (void) die(type, rp, addr, cpuid);
750 751
751 752 /*
752 753 * Cannot resolve fault. Return to lofault.
753 754 */
754 755 if (lodebug) {
755 756 showregs(type, rp, addr);
756 757 traceregs(rp);
757 758 }
758 759 if (FC_CODE(res) == FC_OBJERR)
759 760 res = FC_ERRNO(res);
760 761 else
761 762 res = EFAULT;
762 763 rp->r_r0 = res;
763 764 rp->r_pc = ct->t_lofault;
764 765 goto cleanup;
765 766
766 767 case T_PGFLT + USER: /* user page fault */
767 768 if (faultdebug) {
768 769 char *fault_str;
769 770
770 771 switch (rw) {
771 772 case S_READ:
772 773 fault_str = "read";
773 774 break;
774 775 case S_WRITE:
775 776 fault_str = "write";
776 777 break;
777 778 case S_EXEC:
778 779 fault_str = "exec";
779 780 break;
780 781 default:
781 782 fault_str = "";
782 783 break;
783 784 }
784 785 printf("user %s fault: addr=0x%lx errcode=0x%x\n",
785 786 fault_str, (uintptr_t)addr, errcode);
786 787 }
787 788
788 789 #if defined(OPTERON_ERRATUM_100) && defined(_LP64)
789 790 /*
790 791 * Workaround for AMD erratum 100
791 792 *
792 793 * A 32-bit process may receive a page fault on a non
793 794 * 32-bit address by mistake. The range of the faulting
794 795 * address will be
795 796 *
796 797 * 0xffffffff80000000 .. 0xffffffffffffffff or
797 798 * 0x0000000100000000 .. 0x000000017fffffff
798 799 *
799 800 * The fault is always due to an instruction fetch, however
800 801 * the value of r_pc should be correct (in 32 bit range),
801 802 * so we ignore the page fault on the bogus address.
802 803 */
803 804 if (p->p_model == DATAMODEL_ILP32 &&
804 805 (0xffffffff80000000 <= (uintptr_t)addr ||
805 806 (0x100000000 <= (uintptr_t)addr &&
806 807 (uintptr_t)addr <= 0x17fffffff))) {
807 808 if (!opteron_erratum_100)
808 809 panic("unexpected erratum #100");
809 810 if (rp->r_pc <= 0xffffffff)
810 811 goto out;
811 812 }
812 813 #endif /* OPTERON_ERRATUM_100 && _LP64 */
813 814
814 815 ASSERT(!(curthread->t_flag & T_WATCHPT));
815 816 watchpage = (pr_watch_active(p) && pr_is_watchpage(addr, rw));
816 817 #ifdef __i386
817 818 /*
818 819 * In 32-bit mode, the lcall (system call) instruction fetches
819 820 * one word from the stack, at the stack pointer, because of the
820 821 * way the call gate is constructed. This is a bogus
821 822 * read and should not be counted as a read watchpoint.
822 823 * We work around the problem here by testing to see if
823 824 * this situation applies and, if so, simply jumping to
824 825 * the code in locore.s that fields the system call trap.
825 826 * The registers on the stack are already set up properly
826 827 * due to the match between the call gate sequence and the
827 828 * trap gate sequence. We just have to adjust the pc.
828 829 */
829 830 if (watchpage && addr == (caddr_t)rp->r_sp &&
830 831 rw == S_READ && instr_is_lcall_syscall((caddr_t)rp->r_pc)) {
831 832 extern void watch_syscall(void);
832 833
833 834 rp->r_pc += LCALLSIZE;
834 835 watch_syscall(); /* never returns */
835 836 /* NOTREACHED */
836 837 }
837 838 #endif /* __i386 */
838 839 vaddr = addr;
839 840 if (!watchpage || (sz = instr_size(rp, &vaddr, rw)) <= 0)
840 841 fault_type = (errcode & PF_ERR_PROT)? F_PROT: F_INVAL;
841 842 else if ((watchcode = pr_is_watchpoint(&vaddr, &ta,
842 843 sz, NULL, rw)) != 0) {
843 844 if (ta) {
844 845 do_watch_step(vaddr, sz, rw,
845 846 watchcode, rp->r_pc);
846 847 fault_type = F_INVAL;
847 848 } else {
848 849 bzero(&siginfo, sizeof (siginfo));
849 850 siginfo.si_signo = SIGTRAP;
850 851 siginfo.si_code = watchcode;
851 852 siginfo.si_addr = vaddr;
852 853 siginfo.si_trapafter = 0;
853 854 siginfo.si_pc = (caddr_t)rp->r_pc;
854 855 fault = FLTWATCH;
|
↓ open down ↓ |
741 lines elided |
↑ open up ↑ |
855 856 break;
856 857 }
857 858 } else {
858 859 /* XXX pr_watch_emul() never succeeds (for now) */
859 860 if (rw != S_EXEC && pr_watch_emul(rp, vaddr, rw))
860 861 goto out;
861 862 do_watch_step(vaddr, sz, rw, 0, 0);
862 863 fault_type = F_INVAL;
863 864 }
864 865
866 + /*
867 + * Allow the brand to interpose on invalid memory accesses
868 + * prior to running the native pagefault handler. If this
869 + * brand hook returns zero, it was able to handle the fault
870 + * completely. Otherwise, drive on and call pagefault().
871 + */
872 + if (PROC_IS_BRANDED(p) && BROP(p)->b_pagefault != NULL &&
873 + BROP(p)->b_pagefault(p, lwp, addr, fault_type, rw) == 0) {
874 + goto out;
875 + }
876 +
865 877 res = pagefault(addr, fault_type, rw, 0);
866 878
867 879 /*
868 880 * If pagefault() succeeded, ok.
869 881 * Otherwise attempt to grow the stack.
870 882 */
871 883 if (res == 0 ||
872 884 (res == FC_NOMAP &&
873 885 addr < p->p_usrstack &&
874 886 grow(addr))) {
875 887 lwp->lwp_lastfault = FLTPAGE;
876 888 lwp->lwp_lastfaddr = addr;
877 889 if (prismember(&p->p_fltmask, FLTPAGE)) {
878 890 bzero(&siginfo, sizeof (siginfo));
879 891 siginfo.si_addr = addr;
880 892 (void) stop_on_fault(FLTPAGE, &siginfo);
881 893 }
882 894 goto out;
883 895 } else if (res == FC_PROT && addr < p->p_usrstack &&
884 896 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC))) {
885 897 report_stack_exec(p, addr);
886 898 }
887 899
888 900 #ifdef OPTERON_ERRATUM_91
889 901 /*
890 902 * Workaround for Opteron Erratum 91. Prefetches may generate a
891 903 * page fault (they're not supposed to do that!). If this
892 904 * occurs we simply return back to the instruction.
893 905 *
894 906 * We rely on copyin to properly fault in the page with r_pc.
895 907 */
896 908 if (opteron_erratum_91 &&
897 909 addr != (caddr_t)rp->r_pc &&
898 910 instr_is_prefetch((caddr_t)rp->r_pc)) {
899 911 #ifdef DEBUG
900 912 cmn_err(CE_WARN, "Opteron erratum 91 occurred: "
901 913 "prefetch at %p in pid %d generated a trap!",
902 914 (void *)rp->r_pc, p->p_pid);
903 915 #endif /* DEBUG */
904 916 goto out;
905 917 }
906 918 #endif /* OPTERON_ERRATUM_91 */
907 919
908 920 if (tudebug)
909 921 showregs(type, rp, addr);
910 922 /*
911 923 * In the case where both pagefault and grow fail,
912 924 * set the code to the value provided by pagefault.
913 925 * We map all errors returned from pagefault() to SIGSEGV.
914 926 */
915 927 bzero(&siginfo, sizeof (siginfo));
916 928 siginfo.si_addr = addr;
917 929 switch (FC_CODE(res)) {
918 930 case FC_HWERR:
919 931 case FC_NOSUPPORT:
920 932 siginfo.si_signo = SIGBUS;
921 933 siginfo.si_code = BUS_ADRERR;
922 934 fault = FLTACCESS;
923 935 break;
924 936 case FC_ALIGN:
925 937 siginfo.si_signo = SIGBUS;
926 938 siginfo.si_code = BUS_ADRALN;
927 939 fault = FLTACCESS;
928 940 break;
929 941 case FC_OBJERR:
930 942 if ((siginfo.si_errno = FC_ERRNO(res)) != EINTR) {
931 943 siginfo.si_signo = SIGBUS;
932 944 siginfo.si_code = BUS_OBJERR;
933 945 fault = FLTACCESS;
934 946 }
935 947 break;
936 948 default: /* FC_NOMAP or FC_PROT */
937 949 siginfo.si_signo = SIGSEGV;
938 950 siginfo.si_code =
939 951 (res == FC_NOMAP)? SEGV_MAPERR : SEGV_ACCERR;
940 952 fault = FLTBOUNDS;
941 953 break;
942 954 }
943 955 break;
944 956
945 957 case T_ILLINST + USER: /* invalid opcode fault */
946 958 /*
947 959 * If the syscall instruction is disabled due to LDT usage, a
948 960 * user program that attempts to execute it will trigger a #ud
949 961 * trap. Check for that case here. If this occurs on a CPU which
950 962 * doesn't even support syscall, the result of all of this will
951 963 * be to emulate that particular instruction.
952 964 */
953 965 if (p->p_ldt != NULL &&
954 966 ldt_rewrite_syscall(rp, p, X86FSET_ASYSC))
955 967 goto out;
956 968
957 969 #ifdef __amd64
958 970 /*
959 971 * Emulate the LAHF and SAHF instructions if needed.
960 972 * See the instr_is_lsahf function for details.
961 973 */
962 974 if (p->p_model == DATAMODEL_LP64 &&
963 975 instr_is_lsahf((caddr_t)rp->r_pc, &instr)) {
964 976 emulate_lsahf(rp, instr);
965 977 goto out;
966 978 }
967 979 #endif
968 980
969 981 /*FALLTHROUGH*/
970 982
971 983 if (tudebug)
972 984 showregs(type, rp, (caddr_t)0);
973 985 siginfo.si_signo = SIGILL;
974 986 siginfo.si_code = ILL_ILLOPC;
975 987 siginfo.si_addr = (caddr_t)rp->r_pc;
976 988 fault = FLTILL;
977 989 break;
978 990
979 991 case T_ZERODIV + USER: /* integer divide by zero */
980 992 if (tudebug && tudebugfpe)
981 993 showregs(type, rp, (caddr_t)0);
982 994 siginfo.si_signo = SIGFPE;
983 995 siginfo.si_code = FPE_INTDIV;
984 996 siginfo.si_addr = (caddr_t)rp->r_pc;
985 997 fault = FLTIZDIV;
986 998 break;
987 999
988 1000 case T_OVFLW + USER: /* integer overflow */
989 1001 if (tudebug && tudebugfpe)
990 1002 showregs(type, rp, (caddr_t)0);
991 1003 siginfo.si_signo = SIGFPE;
992 1004 siginfo.si_code = FPE_INTOVF;
993 1005 siginfo.si_addr = (caddr_t)rp->r_pc;
994 1006 fault = FLTIOVF;
995 1007 break;
996 1008
997 1009 case T_NOEXTFLT + USER: /* math coprocessor not available */
998 1010 if (tudebug && tudebugfpe)
999 1011 showregs(type, rp, addr);
1000 1012 if (fpnoextflt(rp)) {
1001 1013 siginfo.si_signo = SIGILL;
1002 1014 siginfo.si_code = ILL_ILLOPC;
1003 1015 siginfo.si_addr = (caddr_t)rp->r_pc;
1004 1016 fault = FLTILL;
1005 1017 }
1006 1018 break;
1007 1019
1008 1020 case T_EXTOVRFLT: /* extension overrun fault */
1009 1021 /* check if we took a kernel trap on behalf of user */
1010 1022 {
1011 1023 extern void ndptrap_frstor(void);
1012 1024 if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
1013 1025 sti(); /* T_EXTOVRFLT comes in via cmninttrap */
1014 1026 (void) die(type, rp, addr, cpuid);
1015 1027 }
1016 1028 type |= USER;
1017 1029 }
1018 1030 /*FALLTHROUGH*/
1019 1031 case T_EXTOVRFLT + USER: /* extension overrun fault */
1020 1032 if (tudebug && tudebugfpe)
1021 1033 showregs(type, rp, addr);
1022 1034 if (fpextovrflt(rp)) {
1023 1035 siginfo.si_signo = SIGSEGV;
1024 1036 siginfo.si_code = SEGV_MAPERR;
1025 1037 siginfo.si_addr = (caddr_t)rp->r_pc;
1026 1038 fault = FLTBOUNDS;
1027 1039 }
1028 1040 break;
1029 1041
1030 1042 case T_EXTERRFLT: /* x87 floating point exception pending */
1031 1043 /* check if we took a kernel trap on behalf of user */
1032 1044 {
1033 1045 extern void ndptrap_frstor(void);
1034 1046 if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
1035 1047 sti(); /* T_EXTERRFLT comes in via cmninttrap */
1036 1048 (void) die(type, rp, addr, cpuid);
1037 1049 }
1038 1050 type |= USER;
1039 1051 }
1040 1052 /*FALLTHROUGH*/
1041 1053
1042 1054 case T_EXTERRFLT + USER: /* x87 floating point exception pending */
1043 1055 if (tudebug && tudebugfpe)
1044 1056 showregs(type, rp, addr);
1045 1057 if (sicode = fpexterrflt(rp)) {
1046 1058 siginfo.si_signo = SIGFPE;
1047 1059 siginfo.si_code = sicode;
1048 1060 siginfo.si_addr = (caddr_t)rp->r_pc;
1049 1061 fault = FLTFPE;
1050 1062 }
1051 1063 break;
1052 1064
1053 1065 case T_SIMDFPE + USER: /* SSE and SSE2 exceptions */
1054 1066 if (tudebug && tudebugsse)
1055 1067 showregs(type, rp, addr);
1056 1068 if (!is_x86_feature(x86_featureset, X86FSET_SSE) &&
1057 1069 !is_x86_feature(x86_featureset, X86FSET_SSE2)) {
1058 1070 /*
1059 1071 * There are rumours that some user instructions
1060 1072 * on older CPUs can cause this trap to occur; in
1061 1073 * which case send a SIGILL instead of a SIGFPE.
1062 1074 */
1063 1075 siginfo.si_signo = SIGILL;
1064 1076 siginfo.si_code = ILL_ILLTRP;
1065 1077 siginfo.si_addr = (caddr_t)rp->r_pc;
1066 1078 siginfo.si_trapno = type & ~USER;
1067 1079 fault = FLTILL;
1068 1080 } else if ((sicode = fpsimderrflt(rp)) != 0) {
1069 1081 siginfo.si_signo = SIGFPE;
1070 1082 siginfo.si_code = sicode;
1071 1083 siginfo.si_addr = (caddr_t)rp->r_pc;
1072 1084 fault = FLTFPE;
1073 1085 }
1074 1086
1075 1087 sti(); /* The SIMD exception comes in via cmninttrap */
1076 1088 break;
1077 1089
1078 1090 case T_BPTFLT: /* breakpoint trap */
1079 1091 /*
1080 1092 * Kernel breakpoint traps should only happen when kmdb is
1081 1093 * active, and even then, it'll have interposed on the IDT, so
1082 1094 * control won't get here. If it does, we've hit a breakpoint
1083 1095 * without the debugger, which is very strange, and very
1084 1096 * fatal.
1085 1097 */
1086 1098 if (tudebug && tudebugbpt)
1087 1099 showregs(type, rp, (caddr_t)0);
1088 1100
1089 1101 (void) die(type, rp, addr, cpuid);
1090 1102 break;
1091 1103
1092 1104 case T_SGLSTP: /* single step/hw breakpoint exception */
1093 1105
1094 1106 /* Now evaluate how we got here */
1095 1107 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
1096 1108 /*
1097 1109 * i386 single-steps even through lcalls which
1098 1110 * change the privilege level. So we take a trap at
1099 1111 * the first instruction in privileged mode.
1100 1112 *
1101 1113 * Set a flag to indicate that upon completion of
1102 1114 * the system call, deal with the single-step trap.
1103 1115 *
1104 1116 * The same thing happens for sysenter, too.
1105 1117 */
1106 1118 singlestep_twiddle = 0;
1107 1119 if (rp->r_pc == (uintptr_t)sys_sysenter ||
1108 1120 rp->r_pc == (uintptr_t)brand_sys_sysenter) {
1109 1121 singlestep_twiddle = 1;
1110 1122 #if defined(__amd64)
1111 1123 /*
1112 1124 * Since we are already on the kernel's
1113 1125 * %gs, on 64-bit systems the sysenter case
1114 1126 * needs to adjust the pc to avoid
1115 1127 * executing the swapgs instruction at the
1116 1128 * top of the handler.
1117 1129 */
1118 1130 if (rp->r_pc == (uintptr_t)sys_sysenter)
1119 1131 rp->r_pc = (uintptr_t)
1120 1132 _sys_sysenter_post_swapgs;
1121 1133 else
1122 1134 rp->r_pc = (uintptr_t)
1123 1135 _brand_sys_sysenter_post_swapgs;
1124 1136 #endif
1125 1137 }
1126 1138 #if defined(__i386)
1127 1139 else if (rp->r_pc == (uintptr_t)sys_call ||
1128 1140 rp->r_pc == (uintptr_t)brand_sys_call) {
1129 1141 singlestep_twiddle = 1;
1130 1142 }
1131 1143 #endif
1132 1144 else {
1133 1145 /* not on sysenter/syscall; uregs available */
1134 1146 if (tudebug && tudebugbpt)
1135 1147 showregs(type, rp, (caddr_t)0);
1136 1148 }
1137 1149 if (singlestep_twiddle) {
1138 1150 rp->r_ps &= ~PS_T; /* turn off trace */
1139 1151 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
1140 1152 ct->t_post_sys = 1;
1141 1153 aston(curthread);
1142 1154 goto cleanup;
1143 1155 }
1144 1156 }
1145 1157 /* XXX - needs review on debugger interface? */
1146 1158 if (boothowto & RB_DEBUG)
1147 1159 debug_enter((char *)NULL);
1148 1160 else
1149 1161 (void) die(type, rp, addr, cpuid);
1150 1162 break;
1151 1163
1152 1164 case T_NMIFLT: /* NMI interrupt */
1153 1165 printf("Unexpected NMI in system mode\n");
1154 1166 goto cleanup;
1155 1167
1156 1168 case T_NMIFLT + USER: /* NMI interrupt */
1157 1169 printf("Unexpected NMI in user mode\n");
1158 1170 break;
1159 1171
1160 1172 case T_GPFLT: /* general protection violation */
1161 1173 /*
1162 1174 * Any #GP that occurs during an on_trap .. no_trap bracket
1163 1175 * with OT_DATA_ACCESS or OT_SEGMENT_ACCESS protection,
1164 1176 * or in a on_fault .. no_fault bracket, is forgiven
1165 1177 * and we trampoline. This protection is given regardless
1166 1178 * of whether we are 32/64 bit etc - if a distinction is
1167 1179 * required then define new on_trap protection types.
1168 1180 *
1169 1181 * On amd64, we can get a #gp from referencing addresses
1170 1182 * in the virtual address hole e.g. from a copyin or in
1171 1183 * update_sregs while updating user segment registers.
1172 1184 *
1173 1185 * On the 32-bit hypervisor we could also generate one in
1174 1186 * mfn_to_pfn by reaching around or into where the hypervisor
1175 1187 * lives which is protected by segmentation.
1176 1188 */
1177 1189
1178 1190 /*
1179 1191 * If we're under on_trap() protection (see <sys/ontrap.h>),
1180 1192 * set ot_trap and trampoline back to the on_trap() call site
1181 1193 * for OT_DATA_ACCESS or OT_SEGMENT_ACCESS.
1182 1194 */
1183 1195 if (ct->t_ontrap != NULL) {
1184 1196 int ttype = ct->t_ontrap->ot_prot &
1185 1197 (OT_DATA_ACCESS | OT_SEGMENT_ACCESS);
1186 1198
1187 1199 if (ttype != 0) {
1188 1200 ct->t_ontrap->ot_trap |= ttype;
1189 1201 if (tudebug)
1190 1202 showregs(type, rp, (caddr_t)0);
1191 1203 rp->r_pc = ct->t_ontrap->ot_trampoline;
1192 1204 goto cleanup;
1193 1205 }
1194 1206 }
1195 1207
1196 1208 /*
1197 1209 * If we're under lofault protection (copyin etc.),
1198 1210 * longjmp back to lofault with an EFAULT.
1199 1211 */
1200 1212 if (ct->t_lofault) {
1201 1213 /*
1202 1214 * Fault is not resolvable, so just return to lofault
1203 1215 */
1204 1216 if (lodebug) {
1205 1217 showregs(type, rp, addr);
1206 1218 traceregs(rp);
1207 1219 }
1208 1220 rp->r_r0 = EFAULT;
1209 1221 rp->r_pc = ct->t_lofault;
1210 1222 goto cleanup;
1211 1223 }
1212 1224
1213 1225 /*
1214 1226 * We fall through to the next case, which repeats
1215 1227 * the OT_SEGMENT_ACCESS check which we've already
1216 1228 * done, so we'll always fall through to the
1217 1229 * T_STKFLT case.
1218 1230 */
1219 1231 /*FALLTHROUGH*/
1220 1232 case T_SEGFLT: /* segment not present fault */
1221 1233 /*
1222 1234 * One example of this is #NP in update_sregs while
1223 1235 * attempting to update a user segment register
1224 1236 * that points to a descriptor that is marked not
1225 1237 * present.
1226 1238 */
1227 1239 if (ct->t_ontrap != NULL &&
1228 1240 ct->t_ontrap->ot_prot & OT_SEGMENT_ACCESS) {
1229 1241 ct->t_ontrap->ot_trap |= OT_SEGMENT_ACCESS;
1230 1242 if (tudebug)
1231 1243 showregs(type, rp, (caddr_t)0);
1232 1244 rp->r_pc = ct->t_ontrap->ot_trampoline;
1233 1245 goto cleanup;
1234 1246 }
1235 1247 /*FALLTHROUGH*/
1236 1248 case T_STKFLT: /* stack fault */
1237 1249 case T_TSSFLT: /* invalid TSS fault */
1238 1250 if (tudebug)
1239 1251 showregs(type, rp, (caddr_t)0);
1240 1252 if (kern_gpfault(rp))
1241 1253 (void) die(type, rp, addr, cpuid);
1242 1254 goto cleanup;
1243 1255
1244 1256 /*
1245 1257 * ONLY 32-bit PROCESSES can USE a PRIVATE LDT! 64-bit apps
1246 1258 * should have no need for them, so we put a stop to it here.
1247 1259 *
1248 1260 * So: not-present fault is ONLY valid for 32-bit processes with
1249 1261 * a private LDT trying to do a system call. Emulate it.
1250 1262 *
1251 1263 * #gp fault is ONLY valid for 32-bit processes also, which DO NOT
1252 1264 * have a private LDT, and are trying to do a system call. Emulate it.
1253 1265 */
1254 1266
1255 1267 case T_SEGFLT + USER: /* segment not present fault */
1256 1268 case T_GPFLT + USER: /* general protection violation */
1257 1269 #ifdef _SYSCALL32_IMPL
1258 1270 if (p->p_model != DATAMODEL_NATIVE) {
1259 1271 #endif /* _SYSCALL32_IMPL */
1260 1272 if (instr_is_lcall_syscall((caddr_t)rp->r_pc)) {
1261 1273 if (type == T_SEGFLT + USER)
1262 1274 ASSERT(p->p_ldt != NULL);
1263 1275
1264 1276 if ((p->p_ldt == NULL && type == T_GPFLT + USER) ||
1265 1277 type == T_SEGFLT + USER) {
1266 1278
1267 1279 /*
1268 1280 * The user attempted a system call via the obsolete
1269 1281 * call gate mechanism. Because the process doesn't have
1270 1282 * an LDT (i.e. the ldtr contains 0), a #gp results.
1271 1283 * Emulate the syscall here, just as we do above for a
1272 1284 * #np trap.
1273 1285 */
1274 1286
1275 1287 /*
1276 1288 * Since this is a not-present trap, rp->r_pc points to
1277 1289 * the trapping lcall instruction. We need to bump it
1278 1290 * to the next insn so the app can continue on.
1279 1291 */
1280 1292 rp->r_pc += LCALLSIZE;
1281 1293 lwp->lwp_regs = rp;
1282 1294
1283 1295 /*
1284 1296 * Normally the microstate of the LWP is forced back to
1285 1297 * LMS_USER by the syscall handlers. Emulate that
1286 1298 * behavior here.
1287 1299 */
1288 1300 mstate = LMS_USER;
1289 1301
1290 1302 dosyscall();
1291 1303 goto out;
1292 1304 }
1293 1305 }
1294 1306 #ifdef _SYSCALL32_IMPL
1295 1307 }
1296 1308 #endif /* _SYSCALL32_IMPL */
1297 1309 /*
1298 1310 * If the current process is using a private LDT and the
1299 1311 * trapping instruction is sysenter, the sysenter instruction
1300 1312 * has been disabled on the CPU because it destroys segment
1301 1313 * registers. If this is the case, rewrite the instruction to
1302 1314 * be a safe system call and retry it. If this occurs on a CPU
1303 1315 * which doesn't even support sysenter, the result of all of
1304 1316 * this will be to emulate that particular instruction.
1305 1317 */
1306 1318 if (p->p_ldt != NULL &&
1307 1319 ldt_rewrite_syscall(rp, p, X86FSET_SEP))
1308 1320 goto out;
1309 1321
1310 1322 /*FALLTHROUGH*/
1311 1323
1312 1324 case T_BOUNDFLT + USER: /* bound fault */
1313 1325 case T_STKFLT + USER: /* stack fault */
1314 1326 case T_TSSFLT + USER: /* invalid TSS fault */
1315 1327 if (tudebug)
1316 1328 showregs(type, rp, (caddr_t)0);
1317 1329 siginfo.si_signo = SIGSEGV;
1318 1330 siginfo.si_code = SEGV_MAPERR;
1319 1331 siginfo.si_addr = (caddr_t)rp->r_pc;
1320 1332 fault = FLTBOUNDS;
1321 1333 break;
1322 1334
1323 1335 case T_ALIGNMENT + USER: /* user alignment error (486) */
1324 1336 if (tudebug)
1325 1337 showregs(type, rp, (caddr_t)0);
1326 1338 bzero(&siginfo, sizeof (siginfo));
1327 1339 siginfo.si_signo = SIGBUS;
1328 1340 siginfo.si_code = BUS_ADRALN;
1329 1341 siginfo.si_addr = (caddr_t)rp->r_pc;
1330 1342 fault = FLTACCESS;
1331 1343 break;
1332 1344
1333 1345 case T_SGLSTP + USER: /* single step/hw breakpoint exception */
1334 1346 if (tudebug && tudebugbpt)
1335 1347 showregs(type, rp, (caddr_t)0);
1336 1348
1337 1349 /* Was it single-stepping? */
1338 1350 if (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP) {
1339 1351 pcb_t *pcb = &lwp->lwp_pcb;
1340 1352
1341 1353 rp->r_ps &= ~PS_T;
1342 1354 /*
1343 1355 * If both NORMAL_STEP and WATCH_STEP are in effect,
1344 1356 * give precedence to WATCH_STEP. If neither is set,
1345 1357 * user must have set the PS_T bit in %efl; treat this
1346 1358 * as NORMAL_STEP.
1347 1359 */
1348 1360 if ((fault = undo_watch_step(&siginfo)) == 0 &&
1349 1361 ((pcb->pcb_flags & NORMAL_STEP) ||
1350 1362 !(pcb->pcb_flags & WATCH_STEP))) {
1351 1363 siginfo.si_signo = SIGTRAP;
1352 1364 siginfo.si_code = TRAP_TRACE;
1353 1365 siginfo.si_addr = (caddr_t)rp->r_pc;
1354 1366 fault = FLTTRACE;
1355 1367 }
1356 1368 pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
1357 1369 }
1358 1370 break;
1359 1371
1360 1372 case T_BPTFLT + USER: /* breakpoint trap */
1361 1373 if (tudebug && tudebugbpt)
1362 1374 showregs(type, rp, (caddr_t)0);
1363 1375 /*
1364 1376 * int 3 (the breakpoint instruction) leaves the pc referring
1365 1377 * to the address one byte after the breakpointed address.
1366 1378 * If the P_PR_BPTADJ flag has been set via /proc, We adjust
1367 1379 * it back so it refers to the breakpointed address.
1368 1380 */
1369 1381 if (p->p_proc_flag & P_PR_BPTADJ)
1370 1382 rp->r_pc--;
1371 1383 siginfo.si_signo = SIGTRAP;
1372 1384 siginfo.si_code = TRAP_BRKPT;
1373 1385 siginfo.si_addr = (caddr_t)rp->r_pc;
1374 1386 fault = FLTBPT;
1375 1387 break;
1376 1388
1377 1389 case T_AST:
1378 1390 /*
1379 1391 * This occurs only after the cs register has been made to
1380 1392 * look like a kernel selector, either through debugging or
1381 1393 * possibly by functions like setcontext(). The thread is
1382 1394 * about to cause a general protection fault at common_iret()
1383 1395 * in locore. We let that happen immediately instead of
1384 1396 * doing the T_AST processing.
1385 1397 */
1386 1398 goto cleanup;
1387 1399
1388 1400 case T_AST + USER: /* profiling, resched, h/w error pseudo trap */
1389 1401 if (lwp->lwp_pcb.pcb_flags & ASYNC_HWERR) {
1390 1402 proc_t *p = ttoproc(curthread);
1391 1403 extern void print_msg_hwerr(ctid_t ct_id, proc_t *p);
1392 1404
1393 1405 lwp->lwp_pcb.pcb_flags &= ~ASYNC_HWERR;
1394 1406 print_msg_hwerr(p->p_ct_process->conp_contract.ct_id,
1395 1407 p);
1396 1408 contract_process_hwerr(p->p_ct_process, p);
1397 1409 siginfo.si_signo = SIGKILL;
1398 1410 siginfo.si_code = SI_NOINFO;
1399 1411 } else if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW) {
1400 1412 lwp->lwp_pcb.pcb_flags &= ~CPC_OVERFLOW;
1401 1413 if (kcpc_overflow_ast()) {
1402 1414 /*
1403 1415 * Signal performance counter overflow
1404 1416 */
1405 1417 if (tudebug)
1406 1418 showregs(type, rp, (caddr_t)0);
1407 1419 bzero(&siginfo, sizeof (siginfo));
1408 1420 siginfo.si_signo = SIGEMT;
1409 1421 siginfo.si_code = EMT_CPCOVF;
1410 1422 siginfo.si_addr = (caddr_t)rp->r_pc;
1411 1423 fault = FLTCPCOVF;
1412 1424 }
1413 1425 }
1414 1426
1415 1427 break;
1416 1428 }
1417 1429
1418 1430 /*
1419 1431 * We can't get here from a system trap
1420 1432 */
1421 1433 ASSERT(type & USER);
1422 1434
1423 1435 if (fault) {
1424 1436 /* We took a fault so abort single step. */
1425 1437 lwp->lwp_pcb.pcb_flags &= ~(NORMAL_STEP|WATCH_STEP);
1426 1438 /*
1427 1439 * Remember the fault and fault adddress
1428 1440 * for real-time (SIGPROF) profiling.
1429 1441 */
1430 1442 lwp->lwp_lastfault = fault;
1431 1443 lwp->lwp_lastfaddr = siginfo.si_addr;
1432 1444
1433 1445 DTRACE_PROC2(fault, int, fault, ksiginfo_t *, &siginfo);
1434 1446
1435 1447 /*
1436 1448 * If a debugger has declared this fault to be an
1437 1449 * event of interest, stop the lwp. Otherwise just
1438 1450 * deliver the associated signal.
1439 1451 */
1440 1452 if (siginfo.si_signo != SIGKILL &&
1441 1453 prismember(&p->p_fltmask, fault) &&
1442 1454 stop_on_fault(fault, &siginfo) == 0)
1443 1455 siginfo.si_signo = 0;
1444 1456 }
1445 1457
1446 1458 if (siginfo.si_signo)
1447 1459 trapsig(&siginfo, (fault != FLTFPE && fault != FLTCPCOVF));
1448 1460
1449 1461 if (lwp->lwp_oweupc)
1450 1462 profil_tick(rp->r_pc);
1451 1463
1452 1464 if (ct->t_astflag | ct->t_sig_check) {
1453 1465 /*
1454 1466 * Turn off the AST flag before checking all the conditions that
1455 1467 * may have caused an AST. This flag is on whenever a signal or
1456 1468 * unusual condition should be handled after the next trap or
1457 1469 * syscall.
1458 1470 */
1459 1471 astoff(ct);
1460 1472 /*
1461 1473 * If a single-step trap occurred on a syscall (see above)
1462 1474 * recognize it now. Do this before checking for signals
1463 1475 * because deferred_singlestep_trap() may generate a SIGTRAP to
1464 1476 * the LWP or may otherwise mark the LWP to call issig(FORREAL).
1465 1477 */
1466 1478 if (lwp->lwp_pcb.pcb_flags & DEBUG_PENDING)
1467 1479 deferred_singlestep_trap((caddr_t)rp->r_pc);
1468 1480
1469 1481 ct->t_sig_check = 0;
1470 1482
1471 1483 /*
1472 1484 * As in other code paths that check against TP_CHANGEBIND,
1473 1485 * we perform the check first without p_lock held -- only
1474 1486 * acquiring p_lock in the unlikely event that it is indeed
1475 1487 * set. This is safe because we are doing this after the
1476 1488 * astoff(); if we are racing another thread setting
1477 1489 * TP_CHANGEBIND on us, we will pick it up on a subsequent
1478 1490 * lap through.
1479 1491 */
1480 1492 if (curthread->t_proc_flag & TP_CHANGEBIND) {
1481 1493 mutex_enter(&p->p_lock);
1482 1494 if (curthread->t_proc_flag & TP_CHANGEBIND) {
1483 1495 timer_lwpbind();
1484 1496 curthread->t_proc_flag &= ~TP_CHANGEBIND;
1485 1497 }
1486 1498 mutex_exit(&p->p_lock);
1487 1499 }
1488 1500
1489 1501 /*
1490 1502 * for kaio requests that are on the per-process poll queue,
1491 1503 * aiop->aio_pollq, they're AIO_POLL bit is set, the kernel
1492 1504 * should copyout their result_t to user memory. by copying
1493 1505 * out the result_t, the user can poll on memory waiting
1494 1506 * for the kaio request to complete.
1495 1507 */
1496 1508 if (p->p_aio)
1497 1509 aio_cleanup(0);
1498 1510 /*
1499 1511 * If this LWP was asked to hold, call holdlwp(), which will
1500 1512 * stop. holdlwps() sets this up and calls pokelwps() which
1501 1513 * sets the AST flag.
1502 1514 *
1503 1515 * Also check TP_EXITLWP, since this is used by fresh new LWPs
1504 1516 * through lwp_rtt(). That flag is set if the lwp_create(2)
1505 1517 * syscall failed after creating the LWP.
1506 1518 */
1507 1519 if (ISHOLD(p))
1508 1520 holdlwp();
1509 1521
1510 1522 /*
1511 1523 * All code that sets signals and makes ISSIG evaluate true must
1512 1524 * set t_astflag afterwards.
1513 1525 */
1514 1526 if (ISSIG_PENDING(ct, lwp, p)) {
1515 1527 if (issig(FORREAL))
1516 1528 psig();
1517 1529 ct->t_sig_check = 1;
1518 1530 }
1519 1531
1520 1532 if (ct->t_rprof != NULL) {
1521 1533 realsigprof(0, 0, 0);
1522 1534 ct->t_sig_check = 1;
1523 1535 }
1524 1536
1525 1537 /*
1526 1538 * /proc can't enable/disable the trace bit itself
1527 1539 * because that could race with the call gate used by
1528 1540 * system calls via "lcall". If that happened, an
1529 1541 * invalid EFLAGS would result. prstep()/prnostep()
1530 1542 * therefore schedule an AST for the purpose.
1531 1543 */
1532 1544 if (lwp->lwp_pcb.pcb_flags & REQUEST_STEP) {
1533 1545 lwp->lwp_pcb.pcb_flags &= ~REQUEST_STEP;
1534 1546 rp->r_ps |= PS_T;
1535 1547 }
1536 1548 if (lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP) {
1537 1549 lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP;
1538 1550 rp->r_ps &= ~PS_T;
1539 1551 }
1540 1552 }
1541 1553
1542 1554 out: /* We can't get here from a system trap */
1543 1555 ASSERT(type & USER);
1544 1556
1545 1557 if (ISHOLD(p))
1546 1558 holdlwp();
1547 1559
1548 1560 /*
1549 1561 * Set state to LWP_USER here so preempt won't give us a kernel
1550 1562 * priority if it occurs after this point. Call CL_TRAPRET() to
1551 1563 * restore the user-level priority.
1552 1564 *
1553 1565 * It is important that no locks (other than spinlocks) be entered
1554 1566 * after this point before returning to user mode (unless lwp_state
1555 1567 * is set back to LWP_SYS).
1556 1568 */
1557 1569 lwp->lwp_state = LWP_USER;
1558 1570
1559 1571 if (ct->t_trapret) {
1560 1572 ct->t_trapret = 0;
1561 1573 thread_lock(ct);
1562 1574 CL_TRAPRET(ct);
1563 1575 thread_unlock(ct);
1564 1576 }
1565 1577 if (CPU->cpu_runrun || curthread->t_schedflag & TS_ANYWAITQ)
1566 1578 preempt();
1567 1579 prunstop();
1568 1580 (void) new_mstate(ct, mstate);
1569 1581
1570 1582 /* Kernel probe */
1571 1583 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */,
1572 1584 tnf_microstate, state, LMS_USER);
1573 1585
1574 1586 return;
1575 1587
1576 1588 cleanup: /* system traps end up here */
1577 1589 ASSERT(!(type & USER));
1578 1590 }
1579 1591
1580 1592 /*
1581 1593 * Patch non-zero to disable preemption of threads in the kernel.
1582 1594 */
1583 1595 int IGNORE_KERNEL_PREEMPTION = 0; /* XXX - delete this someday */
1584 1596
1585 1597 struct kpreempt_cnts { /* kernel preemption statistics */
1586 1598 int kpc_idle; /* executing idle thread */
1587 1599 int kpc_intr; /* executing interrupt thread */
1588 1600 int kpc_clock; /* executing clock thread */
1589 1601 int kpc_blocked; /* thread has blocked preemption (t_preempt) */
1590 1602 int kpc_notonproc; /* thread is surrendering processor */
1591 1603 int kpc_inswtch; /* thread has ratified scheduling decision */
1592 1604 int kpc_prilevel; /* processor interrupt level is too high */
1593 1605 int kpc_apreempt; /* asynchronous preemption */
1594 1606 int kpc_spreempt; /* synchronous preemption */
1595 1607 } kpreempt_cnts;
1596 1608
1597 1609 /*
1598 1610 * kernel preemption: forced rescheduling, preempt the running kernel thread.
1599 1611 * the argument is old PIL for an interrupt,
1600 1612 * or the distingished value KPREEMPT_SYNC.
1601 1613 */
1602 1614 void
1603 1615 kpreempt(int asyncspl)
1604 1616 {
1605 1617 kthread_t *ct = curthread;
1606 1618
1607 1619 if (IGNORE_KERNEL_PREEMPTION) {
1608 1620 aston(CPU->cpu_dispthread);
1609 1621 return;
1610 1622 }
1611 1623
1612 1624 /*
1613 1625 * Check that conditions are right for kernel preemption
1614 1626 */
1615 1627 do {
1616 1628 if (ct->t_preempt) {
1617 1629 /*
1618 1630 * either a privileged thread (idle, panic, interrupt)
1619 1631 * or will check when t_preempt is lowered
1620 1632 * We need to specifically handle the case where
1621 1633 * the thread is in the middle of swtch (resume has
1622 1634 * been called) and has its t_preempt set
1623 1635 * [idle thread and a thread which is in kpreempt
1624 1636 * already] and then a high priority thread is
1625 1637 * available in the local dispatch queue.
1626 1638 * In this case the resumed thread needs to take a
1627 1639 * trap so that it can call kpreempt. We achieve
1628 1640 * this by using siron().
1629 1641 * How do we detect this condition:
1630 1642 * idle thread is running and is in the midst of
1631 1643 * resume: curthread->t_pri == -1 && CPU->dispthread
1632 1644 * != CPU->thread
1633 1645 * Need to ensure that this happens only at high pil
1634 1646 * resume is called at high pil
1635 1647 * Only in resume_from_idle is the pil changed.
1636 1648 */
1637 1649 if (ct->t_pri < 0) {
1638 1650 kpreempt_cnts.kpc_idle++;
1639 1651 if (CPU->cpu_dispthread != CPU->cpu_thread)
1640 1652 siron();
1641 1653 } else if (ct->t_flag & T_INTR_THREAD) {
1642 1654 kpreempt_cnts.kpc_intr++;
1643 1655 if (ct->t_pil == CLOCK_LEVEL)
1644 1656 kpreempt_cnts.kpc_clock++;
1645 1657 } else {
1646 1658 kpreempt_cnts.kpc_blocked++;
1647 1659 if (CPU->cpu_dispthread != CPU->cpu_thread)
1648 1660 siron();
1649 1661 }
1650 1662 aston(CPU->cpu_dispthread);
1651 1663 return;
1652 1664 }
1653 1665 if (ct->t_state != TS_ONPROC ||
1654 1666 ct->t_disp_queue != CPU->cpu_disp) {
1655 1667 /* this thread will be calling swtch() shortly */
1656 1668 kpreempt_cnts.kpc_notonproc++;
1657 1669 if (CPU->cpu_thread != CPU->cpu_dispthread) {
1658 1670 /* already in swtch(), force another */
1659 1671 kpreempt_cnts.kpc_inswtch++;
1660 1672 siron();
1661 1673 }
1662 1674 return;
1663 1675 }
1664 1676 if (getpil() >= DISP_LEVEL) {
1665 1677 /*
1666 1678 * We can't preempt this thread if it is at
1667 1679 * a PIL >= DISP_LEVEL since it may be holding
1668 1680 * a spin lock (like sched_lock).
1669 1681 */
1670 1682 siron(); /* check back later */
1671 1683 kpreempt_cnts.kpc_prilevel++;
1672 1684 return;
1673 1685 }
1674 1686 if (!interrupts_enabled()) {
1675 1687 /*
1676 1688 * Can't preempt while running with ints disabled
1677 1689 */
1678 1690 kpreempt_cnts.kpc_prilevel++;
1679 1691 return;
1680 1692 }
1681 1693 if (asyncspl != KPREEMPT_SYNC)
1682 1694 kpreempt_cnts.kpc_apreempt++;
1683 1695 else
1684 1696 kpreempt_cnts.kpc_spreempt++;
1685 1697
1686 1698 ct->t_preempt++;
1687 1699 preempt();
1688 1700 ct->t_preempt--;
1689 1701 } while (CPU->cpu_kprunrun);
1690 1702 }
1691 1703
1692 1704 /*
1693 1705 * Print out debugging info.
1694 1706 */
1695 1707 static void
1696 1708 showregs(uint_t type, struct regs *rp, caddr_t addr)
1697 1709 {
1698 1710 int s;
1699 1711
1700 1712 s = spl7();
1701 1713 type &= ~USER;
1702 1714 if (PTOU(curproc)->u_comm[0])
1703 1715 printf("%s: ", PTOU(curproc)->u_comm);
1704 1716 if (type < TRAP_TYPES)
1705 1717 printf("#%s %s\n", trap_type_mnemonic[type], trap_type[type]);
1706 1718 else
1707 1719 switch (type) {
1708 1720 case T_SYSCALL:
1709 1721 printf("Syscall Trap:\n");
1710 1722 break;
1711 1723 case T_AST:
1712 1724 printf("AST\n");
1713 1725 break;
1714 1726 default:
1715 1727 printf("Bad Trap = %d\n", type);
1716 1728 break;
1717 1729 }
1718 1730 if (type == T_PGFLT) {
1719 1731 printf("Bad %s fault at addr=0x%lx\n",
1720 1732 USERMODE(rp->r_cs) ? "user": "kernel", (uintptr_t)addr);
1721 1733 } else if (addr) {
1722 1734 printf("addr=0x%lx\n", (uintptr_t)addr);
1723 1735 }
1724 1736
1725 1737 printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n",
1726 1738 (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ?
1727 1739 ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps);
1728 1740
1729 1741 #if defined(__lint)
1730 1742 /*
1731 1743 * this clause can be deleted when lint bug 4870403 is fixed
1732 1744 * (lint thinks that bit 32 is illegal in a %b format string)
1733 1745 */
1734 1746 printf("cr0: %x cr4: %b\n",
1735 1747 (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4);
1736 1748 #else
1737 1749 printf("cr0: %b cr4: %b\n",
1738 1750 (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
1739 1751 #endif /* __lint */
1740 1752
1741 1753 printf("cr2: %lx", getcr2());
1742 1754 #if !defined(__xpv)
1743 1755 printf("cr3: %lx", getcr3());
1744 1756 #if defined(__amd64)
1745 1757 printf("cr8: %lx\n", getcr8());
1746 1758 #endif
1747 1759 #endif
1748 1760 printf("\n");
1749 1761
1750 1762 dumpregs(rp);
1751 1763 splx(s);
1752 1764 }
1753 1765
1754 1766 static void
1755 1767 dumpregs(struct regs *rp)
1756 1768 {
1757 1769 #if defined(__amd64)
1758 1770 const char fmt[] = "\t%3s: %16lx %3s: %16lx %3s: %16lx\n";
1759 1771
1760 1772 printf(fmt, "rdi", rp->r_rdi, "rsi", rp->r_rsi, "rdx", rp->r_rdx);
1761 1773 printf(fmt, "rcx", rp->r_rcx, " r8", rp->r_r8, " r9", rp->r_r9);
1762 1774 printf(fmt, "rax", rp->r_rax, "rbx", rp->r_rbx, "rbp", rp->r_rbp);
1763 1775 printf(fmt, "r10", rp->r_r10, "r11", rp->r_r11, "r12", rp->r_r12);
1764 1776 printf(fmt, "r13", rp->r_r13, "r14", rp->r_r14, "r15", rp->r_r15);
1765 1777
1766 1778 printf(fmt, "fsb", rdmsr(MSR_AMD_FSBASE), "gsb", rdmsr(MSR_AMD_GSBASE),
1767 1779 " ds", rp->r_ds);
1768 1780 printf(fmt, " es", rp->r_es, " fs", rp->r_fs, " gs", rp->r_gs);
1769 1781
1770 1782 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err, "rip", rp->r_rip);
1771 1783 printf(fmt, " cs", rp->r_cs, "rfl", rp->r_rfl, "rsp", rp->r_rsp);
1772 1784
1773 1785 printf("\t%3s: %16lx\n", " ss", rp->r_ss);
1774 1786
1775 1787 #elif defined(__i386)
1776 1788 const char fmt[] = "\t%3s: %8lx %3s: %8lx %3s: %8lx %3s: %8lx\n";
1777 1789
1778 1790 printf(fmt, " gs", rp->r_gs, " fs", rp->r_fs,
1779 1791 " es", rp->r_es, " ds", rp->r_ds);
1780 1792 printf(fmt, "edi", rp->r_edi, "esi", rp->r_esi,
1781 1793 "ebp", rp->r_ebp, "esp", rp->r_esp);
1782 1794 printf(fmt, "ebx", rp->r_ebx, "edx", rp->r_edx,
1783 1795 "ecx", rp->r_ecx, "eax", rp->r_eax);
1784 1796 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err,
1785 1797 "eip", rp->r_eip, " cs", rp->r_cs);
1786 1798 printf("\t%3s: %8lx %3s: %8lx %3s: %8lx\n",
1787 1799 "efl", rp->r_efl, "usp", rp->r_uesp, " ss", rp->r_ss);
1788 1800
1789 1801 #endif /* __i386 */
1790 1802 }
1791 1803
1792 1804 /*
1793 1805 * Test to see if the instruction is iret on i386 or iretq on amd64.
1794 1806 *
1795 1807 * On the hypervisor we can only test for nopop_sys_rtt_syscall. If true
1796 1808 * then we are in the context of hypervisor's failsafe handler because it
1797 1809 * tried to iret and failed due to a bad selector. See xen_failsafe_callback.
1798 1810 */
1799 1811 static int
1800 1812 instr_is_iret(caddr_t pc)
1801 1813 {
1802 1814
1803 1815 #if defined(__xpv)
1804 1816 extern void nopop_sys_rtt_syscall(void);
1805 1817 return ((pc == (caddr_t)nopop_sys_rtt_syscall) ? 1 : 0);
1806 1818
1807 1819 #else
1808 1820
1809 1821 #if defined(__amd64)
1810 1822 static const uint8_t iret_insn[2] = { 0x48, 0xcf }; /* iretq */
1811 1823
1812 1824 #elif defined(__i386)
1813 1825 static const uint8_t iret_insn[1] = { 0xcf }; /* iret */
1814 1826 #endif /* __i386 */
1815 1827 return (bcmp(pc, iret_insn, sizeof (iret_insn)) == 0);
1816 1828
1817 1829 #endif /* __xpv */
1818 1830 }
1819 1831
1820 1832 #if defined(__i386)
1821 1833
1822 1834 /*
1823 1835 * Test to see if the instruction is part of __SEGREGS_POP
1824 1836 *
1825 1837 * Note carefully the appallingly awful dependency between
1826 1838 * the instruction sequence used in __SEGREGS_POP and these
1827 1839 * instructions encoded here.
1828 1840 */
1829 1841 static int
1830 1842 instr_is_segregs_pop(caddr_t pc)
1831 1843 {
1832 1844 static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 };
1833 1845 static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 };
1834 1846 static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 };
1835 1847 static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc };
1836 1848
1837 1849 if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 ||
1838 1850 bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 ||
1839 1851 bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 ||
1840 1852 bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0)
1841 1853 return (1);
1842 1854
1843 1855 return (0);
1844 1856 }
1845 1857
1846 1858 #endif /* __i386 */
1847 1859
1848 1860 /*
1849 1861 * Test to see if the instruction is part of _sys_rtt.
1850 1862 *
1851 1863 * Again on the hypervisor if we try to IRET to user land with a bad code
1852 1864 * or stack selector we will get vectored through xen_failsafe_callback.
1853 1865 * In which case we assume we got here via _sys_rtt since we only allow
1854 1866 * IRET to user land to take place in _sys_rtt.
1855 1867 */
1856 1868 static int
1857 1869 instr_is_sys_rtt(caddr_t pc)
1858 1870 {
1859 1871 extern void _sys_rtt(), _sys_rtt_end();
1860 1872
1861 1873 if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
1862 1874 (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
1863 1875 return (0);
1864 1876
1865 1877 return (1);
1866 1878 }
1867 1879
1868 1880 /*
1869 1881 * Handle #gp faults in kernel mode.
1870 1882 *
1871 1883 * One legitimate way this can happen is if we attempt to update segment
1872 1884 * registers to naughty values on the way out of the kernel.
1873 1885 *
1874 1886 * This can happen in a couple of ways: someone - either accidentally or
1875 1887 * on purpose - creates (setcontext(2), lwp_create(2)) or modifies
1876 1888 * (signal(2)) a ucontext that contains silly segment register values.
1877 1889 * Or someone - either accidentally or on purpose - modifies the prgregset_t
1878 1890 * of a subject process via /proc to contain silly segment register values.
1879 1891 *
1880 1892 * (The unfortunate part is that we can end up discovering the bad segment
1881 1893 * register value in the middle of an 'iret' after we've popped most of the
1882 1894 * stack. So it becomes quite difficult to associate an accurate ucontext
1883 1895 * with the lwp, because the act of taking the #gp trap overwrites most of
1884 1896 * what we were going to send the lwp.)
1885 1897 *
1886 1898 * OTOH if it turns out that's -not- the problem, and we're -not- an lwp
1887 1899 * trying to return to user mode and we get a #gp fault, then we need
1888 1900 * to die() -- which will happen if we return non-zero from this routine.
1889 1901 */
1890 1902 static int
1891 1903 kern_gpfault(struct regs *rp)
1892 1904 {
1893 1905 kthread_t *t = curthread;
1894 1906 proc_t *p = ttoproc(t);
1895 1907 klwp_t *lwp = ttolwp(t);
1896 1908 struct regs tmpregs, *trp = NULL;
1897 1909 caddr_t pc = (caddr_t)rp->r_pc;
1898 1910 int v;
1899 1911 uint32_t auditing = AU_AUDITING();
1900 1912
1901 1913 /*
1902 1914 * if we're not an lwp, or in the case of running native the
1903 1915 * pc range is outside _sys_rtt, then we should immediately
1904 1916 * be die()ing horribly.
1905 1917 */
1906 1918 if (lwp == NULL || !instr_is_sys_rtt(pc))
1907 1919 return (1);
1908 1920
1909 1921 /*
1910 1922 * So at least we're in the right part of the kernel.
1911 1923 *
1912 1924 * Disassemble the instruction at the faulting pc.
1913 1925 * Once we know what it is, we carefully reconstruct the stack
1914 1926 * based on the order in which the stack is deconstructed in
1915 1927 * _sys_rtt. Ew.
1916 1928 */
1917 1929 if (instr_is_iret(pc)) {
1918 1930 /*
1919 1931 * We took the #gp while trying to perform the IRET.
1920 1932 * This means that either %cs or %ss are bad.
1921 1933 * All we know for sure is that most of the general
1922 1934 * registers have been restored, including the
1923 1935 * segment registers, and all we have left on the
1924 1936 * topmost part of the lwp's stack are the
1925 1937 * registers that the iretq was unable to consume.
1926 1938 *
1927 1939 * All the rest of the state was crushed by the #gp
1928 1940 * which pushed -its- registers atop our old save area
1929 1941 * (because we had to decrement the stack pointer, sigh) so
1930 1942 * all that we can try and do is to reconstruct the
1931 1943 * crushed frame from the #gp trap frame itself.
1932 1944 */
1933 1945 trp = &tmpregs;
1934 1946 trp->r_ss = lwptoregs(lwp)->r_ss;
1935 1947 trp->r_sp = lwptoregs(lwp)->r_sp;
1936 1948 trp->r_ps = lwptoregs(lwp)->r_ps;
1937 1949 trp->r_cs = lwptoregs(lwp)->r_cs;
1938 1950 trp->r_pc = lwptoregs(lwp)->r_pc;
1939 1951 bcopy(rp, trp, offsetof(struct regs, r_pc));
1940 1952
1941 1953 /*
1942 1954 * Validate simple math
1943 1955 */
1944 1956 ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc);
1945 1957 ASSERT(trp->r_err == rp->r_err);
1946 1958
1947 1959
1948 1960
1949 1961 }
1950 1962
1951 1963 #if defined(__amd64)
1952 1964 if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) {
1953 1965
1954 1966 /*
1955 1967 * This is the common case -- we're trying to load
1956 1968 * a bad segment register value in the only section
1957 1969 * of kernel code that ever loads segment registers.
1958 1970 *
1959 1971 * We don't need to do anything at this point because
1960 1972 * the pcb contains all the pending segment register
1961 1973 * state, and the regs are still intact because we
1962 1974 * didn't adjust the stack pointer yet. Given the fidelity
1963 1975 * of all this, we could conceivably send a signal
1964 1976 * to the lwp, rather than core-ing.
1965 1977 */
1966 1978 trp = lwptoregs(lwp);
1967 1979 ASSERT((caddr_t)trp == (caddr_t)rp->r_sp);
1968 1980 }
1969 1981
1970 1982 #elif defined(__i386)
1971 1983
1972 1984 if (trp == NULL && instr_is_segregs_pop(pc))
1973 1985 trp = lwptoregs(lwp);
1974 1986
1975 1987 #endif /* __i386 */
1976 1988
1977 1989 if (trp == NULL)
1978 1990 return (1);
1979 1991
1980 1992 /*
1981 1993 * If we get to here, we're reasonably confident that we've
1982 1994 * correctly decoded what happened on the way out of the kernel.
1983 1995 * Rewrite the lwp's registers so that we can create a core dump
1984 1996 * the (at least vaguely) represents the mcontext we were
1985 1997 * being asked to restore when things went so terribly wrong.
1986 1998 */
1987 1999
1988 2000 /*
1989 2001 * Make sure that we have a meaningful %trapno and %err.
1990 2002 */
1991 2003 trp->r_trapno = rp->r_trapno;
1992 2004 trp->r_err = rp->r_err;
1993 2005
1994 2006 if ((caddr_t)trp != (caddr_t)lwptoregs(lwp))
1995 2007 bcopy(trp, lwptoregs(lwp), sizeof (*trp));
1996 2008
1997 2009
1998 2010 mutex_enter(&p->p_lock);
1999 2011 lwp->lwp_cursig = SIGSEGV;
2000 2012 mutex_exit(&p->p_lock);
2001 2013
2002 2014 /*
2003 2015 * Terminate all LWPs but don't discard them. If another lwp beat
2004 2016 * us to the punch by calling exit(), evaporate now.
2005 2017 */
2006 2018 proc_is_exiting(p);
2007 2019 if (exitlwps(1) != 0) {
2008 2020 mutex_enter(&p->p_lock);
2009 2021 lwp_exit();
2010 2022 }
2011 2023
2012 2024 if (auditing) /* audit core dump */
2013 2025 audit_core_start(SIGSEGV);
2014 2026 v = core(SIGSEGV, B_FALSE);
2015 2027 if (auditing) /* audit core dump */
2016 2028 audit_core_finish(v ? CLD_KILLED : CLD_DUMPED);
2017 2029 exit(v ? CLD_KILLED : CLD_DUMPED, SIGSEGV);
2018 2030 return (0);
2019 2031 }
2020 2032
2021 2033 /*
2022 2034 * dump_tss() - Display the TSS structure
2023 2035 */
2024 2036
2025 2037 #if !defined(__xpv)
2026 2038 #if defined(__amd64)
2027 2039
2028 2040 static void
2029 2041 dump_tss(void)
2030 2042 {
2031 2043 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */
2032 2044 tss_t *tss = CPU->cpu_tss;
2033 2045
2034 2046 printf(tss_fmt, "tss_rsp0", (void *)tss->tss_rsp0);
2035 2047 printf(tss_fmt, "tss_rsp1", (void *)tss->tss_rsp1);
2036 2048 printf(tss_fmt, "tss_rsp2", (void *)tss->tss_rsp2);
2037 2049
2038 2050 printf(tss_fmt, "tss_ist1", (void *)tss->tss_ist1);
2039 2051 printf(tss_fmt, "tss_ist2", (void *)tss->tss_ist2);
2040 2052 printf(tss_fmt, "tss_ist3", (void *)tss->tss_ist3);
2041 2053 printf(tss_fmt, "tss_ist4", (void *)tss->tss_ist4);
2042 2054 printf(tss_fmt, "tss_ist5", (void *)tss->tss_ist5);
2043 2055 printf(tss_fmt, "tss_ist6", (void *)tss->tss_ist6);
2044 2056 printf(tss_fmt, "tss_ist7", (void *)tss->tss_ist7);
2045 2057 }
2046 2058
2047 2059 #elif defined(__i386)
2048 2060
2049 2061 static void
2050 2062 dump_tss(void)
2051 2063 {
2052 2064 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */
2053 2065 tss_t *tss = CPU->cpu_tss;
2054 2066
2055 2067 printf(tss_fmt, "tss_link", (void *)(uintptr_t)tss->tss_link);
2056 2068 printf(tss_fmt, "tss_esp0", (void *)(uintptr_t)tss->tss_esp0);
2057 2069 printf(tss_fmt, "tss_ss0", (void *)(uintptr_t)tss->tss_ss0);
2058 2070 printf(tss_fmt, "tss_esp1", (void *)(uintptr_t)tss->tss_esp1);
2059 2071 printf(tss_fmt, "tss_ss1", (void *)(uintptr_t)tss->tss_ss1);
2060 2072 printf(tss_fmt, "tss_esp2", (void *)(uintptr_t)tss->tss_esp2);
2061 2073 printf(tss_fmt, "tss_ss2", (void *)(uintptr_t)tss->tss_ss2);
2062 2074 printf(tss_fmt, "tss_cr3", (void *)(uintptr_t)tss->tss_cr3);
2063 2075 printf(tss_fmt, "tss_eip", (void *)(uintptr_t)tss->tss_eip);
2064 2076 printf(tss_fmt, "tss_eflags", (void *)(uintptr_t)tss->tss_eflags);
2065 2077 printf(tss_fmt, "tss_eax", (void *)(uintptr_t)tss->tss_eax);
2066 2078 printf(tss_fmt, "tss_ebx", (void *)(uintptr_t)tss->tss_ebx);
2067 2079 printf(tss_fmt, "tss_ecx", (void *)(uintptr_t)tss->tss_ecx);
2068 2080 printf(tss_fmt, "tss_edx", (void *)(uintptr_t)tss->tss_edx);
2069 2081 printf(tss_fmt, "tss_esp", (void *)(uintptr_t)tss->tss_esp);
2070 2082 }
2071 2083
2072 2084 #endif /* __amd64 */
2073 2085 #endif /* !__xpv */
2074 2086
2075 2087 #if defined(TRAPTRACE)
2076 2088
2077 2089 int ttrace_nrec = 10; /* number of records to dump out */
2078 2090 int ttrace_dump_nregs = 0; /* dump out this many records with regs too */
2079 2091
2080 2092 /*
2081 2093 * Dump out the last ttrace_nrec traptrace records on each CPU
2082 2094 */
2083 2095 static void
2084 2096 dump_ttrace(void)
2085 2097 {
2086 2098 trap_trace_ctl_t *ttc;
2087 2099 trap_trace_rec_t *rec;
2088 2100 uintptr_t current;
2089 2101 int i, j, k;
2090 2102 int n = NCPU;
2091 2103 #if defined(__amd64)
2092 2104 const char banner[] =
2093 2105 "\ncpu address timestamp "
2094 2106 "type vc handler pc\n";
2095 2107 const char fmt1[] = "%3d %016lx %12llx ";
2096 2108 #elif defined(__i386)
2097 2109 const char banner[] =
2098 2110 "\ncpu address timestamp type vc handler pc\n";
2099 2111 const char fmt1[] = "%3d %08lx %12llx ";
2100 2112 #endif
2101 2113 const char fmt2[] = "%4s %3x ";
2102 2114 const char fmt3[] = "%8s ";
2103 2115
2104 2116 if (ttrace_nrec == 0)
2105 2117 return;
2106 2118
2107 2119 printf(banner);
2108 2120
2109 2121 for (i = 0; i < n; i++) {
2110 2122 ttc = &trap_trace_ctl[i];
2111 2123 if (ttc->ttc_first == NULL)
2112 2124 continue;
2113 2125
2114 2126 current = ttc->ttc_next - sizeof (trap_trace_rec_t);
2115 2127 for (j = 0; j < ttrace_nrec; j++) {
2116 2128 struct sysent *sys;
2117 2129 struct autovec *vec;
2118 2130 extern struct av_head autovect[];
2119 2131 int type;
2120 2132 ulong_t off;
2121 2133 char *sym, *stype;
2122 2134
2123 2135 if (current < ttc->ttc_first)
2124 2136 current =
2125 2137 ttc->ttc_limit - sizeof (trap_trace_rec_t);
2126 2138
2127 2139 if (current == NULL)
2128 2140 continue;
2129 2141
2130 2142 rec = (trap_trace_rec_t *)current;
2131 2143
2132 2144 if (rec->ttr_stamp == 0)
2133 2145 break;
2134 2146
2135 2147 printf(fmt1, i, (uintptr_t)rec, rec->ttr_stamp);
2136 2148
2137 2149 switch (rec->ttr_marker) {
2138 2150 case TT_SYSCALL:
2139 2151 case TT_SYSENTER:
2140 2152 case TT_SYSC:
2141 2153 case TT_SYSC64:
2142 2154 #if defined(__amd64)
2143 2155 sys = &sysent32[rec->ttr_sysnum];
2144 2156 switch (rec->ttr_marker) {
2145 2157 case TT_SYSC64:
2146 2158 sys = &sysent[rec->ttr_sysnum];
2147 2159 /*FALLTHROUGH*/
2148 2160 #elif defined(__i386)
2149 2161 sys = &sysent[rec->ttr_sysnum];
2150 2162 switch (rec->ttr_marker) {
2151 2163 case TT_SYSC64:
2152 2164 #endif
2153 2165 case TT_SYSC:
2154 2166 stype = "sysc"; /* syscall */
2155 2167 break;
2156 2168 case TT_SYSCALL:
2157 2169 stype = "lcal"; /* lcall */
2158 2170 break;
2159 2171 case TT_SYSENTER:
2160 2172 stype = "syse"; /* sysenter */
2161 2173 break;
2162 2174 default:
2163 2175 break;
2164 2176 }
2165 2177 printf(fmt2, "sysc", rec->ttr_sysnum);
2166 2178 if (sys != NULL) {
2167 2179 sym = kobj_getsymname(
2168 2180 (uintptr_t)sys->sy_callc,
2169 2181 &off);
2170 2182 if (sym != NULL)
2171 2183 printf(fmt3, sym);
2172 2184 else
2173 2185 printf("%p ", sys->sy_callc);
2174 2186 } else {
2175 2187 printf(fmt3, "unknown");
2176 2188 }
2177 2189 break;
2178 2190
2179 2191 case TT_INTERRUPT:
2180 2192 printf(fmt2, "intr", rec->ttr_vector);
2181 2193 if (get_intr_handler != NULL)
2182 2194 vec = (struct autovec *)
2183 2195 (*get_intr_handler)
2184 2196 (rec->ttr_cpuid, rec->ttr_vector);
2185 2197 else
2186 2198 vec =
2187 2199 autovect[rec->ttr_vector].avh_link;
2188 2200
2189 2201 if (vec != NULL) {
2190 2202 sym = kobj_getsymname(
2191 2203 (uintptr_t)vec->av_vector, &off);
2192 2204 if (sym != NULL)
2193 2205 printf(fmt3, sym);
2194 2206 else
2195 2207 printf("%p ", vec->av_vector);
2196 2208 } else {
2197 2209 printf(fmt3, "unknown ");
2198 2210 }
2199 2211 break;
2200 2212
2201 2213 case TT_TRAP:
2202 2214 case TT_EVENT:
2203 2215 type = rec->ttr_regs.r_trapno;
2204 2216 printf(fmt2, "trap", type);
2205 2217 if (type < TRAP_TYPES)
2206 2218 printf(" #%s ",
2207 2219 trap_type_mnemonic[type]);
2208 2220 else
2209 2221 switch (type) {
2210 2222 case T_AST:
2211 2223 printf(fmt3, "ast");
2212 2224 break;
2213 2225 default:
2214 2226 printf(fmt3, "");
2215 2227 break;
2216 2228 }
2217 2229 break;
2218 2230
2219 2231 default:
2220 2232 break;
2221 2233 }
2222 2234
2223 2235 sym = kobj_getsymname(rec->ttr_regs.r_pc, &off);
2224 2236 if (sym != NULL)
2225 2237 printf("%s+%lx\n", sym, off);
2226 2238 else
2227 2239 printf("%lx\n", rec->ttr_regs.r_pc);
2228 2240
2229 2241 if (ttrace_dump_nregs-- > 0) {
2230 2242 int s;
2231 2243
2232 2244 if (rec->ttr_marker == TT_INTERRUPT)
2233 2245 printf(
2234 2246 "\t\tipl %x spl %x pri %x\n",
2235 2247 rec->ttr_ipl,
2236 2248 rec->ttr_spl,
2237 2249 rec->ttr_pri);
2238 2250
2239 2251 dumpregs(&rec->ttr_regs);
2240 2252
2241 2253 printf("\t%3s: %p\n\n", " ct",
2242 2254 (void *)rec->ttr_curthread);
2243 2255
2244 2256 /*
2245 2257 * print out the pc stack that we recorded
2246 2258 * at trap time (if any)
2247 2259 */
2248 2260 for (s = 0; s < rec->ttr_sdepth; s++) {
2249 2261 uintptr_t fullpc;
2250 2262
2251 2263 if (s >= TTR_STACK_DEPTH) {
2252 2264 printf("ttr_sdepth corrupt\n");
2253 2265 break;
2254 2266 }
2255 2267
2256 2268 fullpc = (uintptr_t)rec->ttr_stack[s];
2257 2269
2258 2270 sym = kobj_getsymname(fullpc, &off);
2259 2271 if (sym != NULL)
2260 2272 printf("-> %s+0x%lx()\n",
2261 2273 sym, off);
2262 2274 else
2263 2275 printf("-> 0x%lx()\n", fullpc);
2264 2276 }
2265 2277 printf("\n");
2266 2278 }
2267 2279 current -= sizeof (trap_trace_rec_t);
2268 2280 }
2269 2281 }
2270 2282 }
2271 2283
2272 2284 #endif /* TRAPTRACE */
2273 2285
2274 2286 void
2275 2287 panic_showtrap(struct panic_trap_info *tip)
2276 2288 {
2277 2289 showregs(tip->trap_type, tip->trap_regs, tip->trap_addr);
2278 2290
2279 2291 #if defined(TRAPTRACE)
2280 2292 dump_ttrace();
2281 2293 #endif
2282 2294
2283 2295 #if !defined(__xpv)
2284 2296 if (tip->trap_type == T_DBLFLT)
2285 2297 dump_tss();
2286 2298 #endif
2287 2299 }
2288 2300
2289 2301 void
2290 2302 panic_savetrap(panic_data_t *pdp, struct panic_trap_info *tip)
2291 2303 {
2292 2304 panic_saveregs(pdp, tip->trap_regs);
2293 2305 }
|
↓ open down ↓ |
1419 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX