Print this page
15254 %ymm registers not restored after signal handler
15367 x86 getfpregs() summons corrupting %xmm ghosts
15333 want x86 /proc xregs support (libc_db, libproc, mdb, etc.)
15336 want libc functions for extended ucontext_t
15334 want ps_lwphandle-specific reg routines
15328 FPU_CW_INIT mistreats reserved bit
15335 i86pc fpu_subr.c isn't really platform-specific
15332 setcontext(2) isn't actually noreturn
15331 need <sys/stdalign.h>
Change-Id: I7060aa86042dfb989f77fc3323c065ea2eafa9ad
Conflicts:
    usr/src/uts/common/fs/proc/prcontrol.c
    usr/src/uts/intel/os/archdep.c
    usr/src/uts/intel/sys/ucontext.h
    usr/src/uts/intel/syscall/getcontext.c

@@ -29,10 +29,14 @@
 
 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
 /*      All Rights Reserved   */
 
+/*
+ * Copyright 2023 Oxide Computer Company
+ */
+
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/sysmacros.h>
 #include <sys/signal.h>
 #include <sys/systm.h>

@@ -110,13 +114,14 @@
  *
  * old %rsp:
  *              <128 bytes of untouched stack space>
  *              <a siginfo_t [optional]>
  *              <a ucontext_t>
- *              <siginfo_t *>
- *              <signal number>
- * new %rsp:    <return address (deliberately invalid)>
+ *              <a ucontext_t's xsave state>
+ *              <siginfo_t *>                             ---+
+ *              <signal number>                              | sigframe
+ * new %rsp:    <return address (deliberately invalid)>   ---+
  *
  * The signal number and siginfo_t pointer are only pushed onto the stack in
  * order to allow stack backtraces.  The actual signal handling code expects the
  * arguments in registers.
  */

@@ -128,12 +133,14 @@
 };
 
 int
 sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
 {
-        volatile int minstacksz;
-        int newstack;
+        volatile size_t minstacksz;
+        boolean_t newstack;
+        size_t xsave_size;
+        int ret;
         label_t ljb;
         volatile caddr_t sp;
         caddr_t fp;
         volatile struct regs *rp;
         volatile greg_t upc;

@@ -169,18 +176,30 @@
          * above means that sp (and thus sigframe) will be 8-byte aligned,
          * but not 16-byte aligned. ucontext_t, however, contains %xmm regs
          * which must be 16-byte aligned. Because of this, for correct
          * alignment, sigframe must be a multiple of 8-bytes in length, but
          * not 16-bytes. This will place ucontext_t at a nice 16-byte boundary.
+         *
+         * When we move onto the xsave state, right now, we don't guarantee any
+         * alignment of the resulting data, but we will ensure that the
+         * resulting sp does have proper alignment. This will ensure that the
+         * guarantee on the ucontex_t is not violated.
          */
 
-        /* LINTED: logical expression always true: op "||" */
-        ASSERT((sizeof (struct sigframe) % 16) == 8);
+        CTASSERT((sizeof (struct sigframe) % 16) == 8);
 
         minstacksz = sizeof (struct sigframe) + SA(sizeof (*uc));
         if (sip != NULL)
                 minstacksz += SA(sizeof (siginfo_t));
+
+        if (fpu_xsave_enabled()) {
+                xsave_size = SA(fpu_signal_size(lwp));
+                minstacksz += xsave_size;
+        } else {
+                xsave_size = 0;
+        }
+
         ASSERT((minstacksz & (STACK_ENTRY_ALIGN - 1ul)) == 0);
 
         /*
          * Figure out whether we will be handling this signal on
          * an alternate stack specified by the user.  Then allocate

@@ -293,20 +312,30 @@
                             sizeof (curthread->t_rprof->rp_state));
                 }
         } else
                 sip_addr = NULL;
 
+        no_fault();
+
         /*
-         * save the current context on the user stack directly after the
-         * sigframe. Since sigframe is 8-byte-but-not-16-byte aligned,
-         * and since sizeof (struct sigframe) is 24, this guarantees
-         * 16-byte alignment for ucontext_t and its %xmm registers.
+         * Save the current context on the user stack directly after the
+         * sigframe. Since sigframe is 8-byte-but-not-16-byte aligned, and since
+         * sizeof (struct sigframe) is 24, this guarantees 16-byte alignment for
+         * ucontext_t and its %xmm registers. The xsave state part of the
+         * ucontext_t may be inbetween these two. However, we have ensured that
+         * the size of the stack space is 16-byte aligned as the actual size may
+         * vary.
          */
-        uc = (ucontext_t *)(sp + sizeof (struct sigframe));
         tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP);
-        no_fault();
-        savecontext(tuc, &lwp->lwp_sigoldmask);
+        if (xsave_size != 0) {
+                tuc->uc_xsave = (unsigned long)(sp + sizeof (struct sigframe));
+        }
+        uc = (ucontext_t *)(sp + sizeof (struct sigframe) + xsave_size);
+        ret = savecontext(tuc, &lwp->lwp_sigoldmask, SAVECTXT_F_EXTD |
+            SAVECTXT_F_ONFAULT);
+        if (ret != 0)
+                goto postfault;
         if (on_fault(&ljb))
                 goto badstack;
         copyout_noerr(tuc, uc, sizeof (*tuc));
         kmem_free(tuc, sizeof (*tuc));
         tuc = NULL;

@@ -376,10 +405,11 @@
          */
         return (1);
 
 badstack:
         no_fault();
+postfault:
         if (watched)
                 watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE);
         if (tuc)
                 kmem_free(tuc, sizeof (*tuc));
 #ifdef DEBUG

@@ -397,10 +427,11 @@
  * An i386 SVR4/ABI signal frame looks like this on the stack:
  *
  * old %esp:
  *              <a siginfo32_t [optional]>
  *              <a ucontext32_t>
+ *              <a ucontext32_t's xsave state>
  *              <pointer to that ucontext32_t>
  *              <pointer to that siginfo32_t>
  *              <signo>
  * new %esp:    <return address (deliberately invalid)>
  */

@@ -412,12 +443,14 @@
 };
 
 int
 sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)())
 {
-        volatile int minstacksz;
-        int newstack;
+        volatile size_t minstacksz;
+        boolean_t newstack;
+        size_t xsave_size;
+        int ret;
         label_t ljb;
         volatile caddr_t sp;
         caddr_t fp;
         volatile struct regs *rp;
         volatile greg_t upc;

@@ -432,10 +465,17 @@
         upc = rp->r_pc;
 
         minstacksz = SA32(sizeof (struct sigframe32)) + SA32(sizeof (*uc));
         if (sip != NULL)
                 minstacksz += SA32(sizeof (siginfo32_t));
+
+        if (fpu_xsave_enabled()) {
+                xsave_size = SA32(fpu_signal_size(lwp));
+                minstacksz += xsave_size;
+        } else {
+                xsave_size = 0;
+        }
         ASSERT((minstacksz & (STACK_ALIGN32 - 1)) == 0);
 
         /*
          * Figure out whether we will be handling this signal on
          * an alternate stack specified by the user.  Then allocate

@@ -538,17 +578,24 @@
                             sip_addr->si_mstate,
                             sizeof (curthread->t_rprof->rp_state));
                 }
         } else
                 sip_addr = NULL;
+        no_fault();
 
         /* save the current context on the user stack */
+        tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP);
         fp -= SA32(sizeof (*tuc));
         uc = (ucontext32_t *)fp;
-        tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP);
-        no_fault();
-        savecontext32(tuc, &lwp->lwp_sigoldmask);
+        if (xsave_size != 0) {
+                fp -= xsave_size;
+                tuc->uc_xsave = (int32_t)(uintptr_t)fp;
+        }
+        ret = savecontext32(tuc, &lwp->lwp_sigoldmask, SAVECTXT_F_EXTD |
+            SAVECTXT_F_ONFAULT);
+        if (ret != 0)
+                goto postfault;
         if (on_fault(&ljb))
                 goto badstack;
         copyout_noerr(tuc, uc, sizeof (*tuc));
         kmem_free(tuc, sizeof (*tuc));
         tuc = NULL;

@@ -618,10 +665,11 @@
          */
         return (1);
 
 badstack:
         no_fault();
+postfault:
         if (watched)
                 watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE);
         if (tuc)
                 kmem_free(tuc, sizeof (*tuc));
 #ifdef DEBUG