Print this page
manifest
        
@@ -28,10 +28,14 @@
  */
 
 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
 /*        All Rights Reserved   */
 
+/*
+ * Copyright 2023 Oxide Computer Company
+ */
+
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/vmparam.h>
 #include <sys/systm.h>
 #include <sys/signal.h>
@@ -50,26 +54,87 @@
 #include <sys/debug.h>
 #include <sys/sysmacros.h>
 #include <sys/sdt.h>
 
 /*
+ * This is a wrapper around copyout_noerr that returns a guaranteed error code.
+ * Because we're using copyout_noerr(), we need to bound the time we're under an
+ * on_fault/no_fault and attempt to do so only while we're actually copying data
+ * out. The main reason for this is because we're being called back from the
+ * FPU, which is being held with a kpreempt_disable() and related, we can't use
+ * a larger on_fault()/no_fault() as that would both hide legitimate errors we
+ * make, masquerading as user issues, and it gets trickier to reason about the
+ * correct restoration of our state.
+ */
+static int
+savecontext_copyout(const void *kaddr, void *uaddr, size_t size)
+{
+        label_t ljb;
+        if (!on_fault(&ljb)) {
+                copyout_noerr(kaddr, uaddr, size);
+                no_fault();
+                return (0);
+        } else {
+                no_fault();
+                return (EFAULT);
+        }
+}
+
+/*
  * Save user context.
+ *
+ * Generally speaking ucp is a pointer to kernel memory. In the traditional
+ * version of this (when flags is 0), then we just write and fill out all of the
+ * ucontext_t without any care for what was there ahead of this. However, when
+ * we extended the state to push additional data when user pointers in the
+ * ucontext_t are valid (currently only uc_xsave), then we will copy out that
+ * extended state to the user pointer.
+ *
+ * We allow the copying to happen in two different ways mostly because this is
+ * also used in the signal handling context where we must be much more careful
+ * about how to copy out data.
  */
-void
-savecontext(ucontext_t *ucp, const k_sigset_t *mask)
+
+int
+savecontext(ucontext_t *ucp, const k_sigset_t *mask, savecontext_flags_t flags)
 {
         proc_t *p = ttoproc(curthread);
         klwp_t *lwp = ttolwp(curthread);
         struct regs *rp = lwptoregs(lwp);
+        boolean_t need_xsave = B_FALSE;
+        boolean_t fpu_en;
+        long user_xsave = 0;
+        int ret;
 
+        VERIFY0(flags & ~(SAVECTXT_F_EXTD | SAVECTXT_F_ONFAULT));
+
         /*
          * We unconditionally assign to every field through the end
          * of the gregs, but we need to bzero() everything -after- that
          * to avoid having any kernel stack garbage escape to userland.
+         *
+         * If we have been asked to save extended state, then we must make sure
+         * that we don't clobber that value. We must also determine if the
+         * processor has xsave state. If it does not, then we just simply honor
+         * the pointer, but do not write anything out and do not set the flag.
          */
+        if ((flags & SAVECTXT_F_EXTD) != 0) {
+                user_xsave = ucp->uc_xsave;
+                if (fpu_xsave_enabled() && user_xsave != 0) {
+                        need_xsave = B_TRUE;
+                }
+        } else {
+                /*
+                 * The only other flag that we have right now is about modifying
+                 * the copyout behavior when we're copying out extended
+                 * information. If it's not here, we should not do anything.
+                 */
+                VERIFY0(flags);
+        }
         bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext_t) -
             offsetof(ucontext_t, uc_mcontext.fpregs));
+        ucp->uc_xsave = user_xsave;
 
         ucp->uc_flags = UC_ALL;
         ucp->uc_link = (struct ucontext *)lwp->lwp_oldcontext;
 
         /*
@@ -112,21 +177,22 @@
                          * honoured. (for eg: the lwp is stopped by
                          * stop_on_fault() called from trap(), after being
                          * awakened it might see a pending signal and call
                          * savecontext(), however on the way back to userland
                          * there is no place it can be detected). Hence in
-                         * anticipation of such occassions, set AST flag for
+                         * anticipation of such occasions, set AST flag for
                          * the thread which will make the thread take an
                          * excursion through trap() where it will be handled
                          * appropriately.
                          */
                         aston(curthread);
                 }
         }
 
         getgregs(lwp, ucp->uc_mcontext.gregs);
-        if (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN)
+        fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
+        if (fpu_en)
                 getfpregs(lwp, &ucp->uc_mcontext.fpregs);
         else
                 ucp->uc_flags &= ~UC_FPU;
 
         if (mask != NULL) {
@@ -137,17 +203,47 @@
         } else {
                 ucp->uc_flags &= ~UC_SIGMASK;
                 bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask));
         }
 
+        /*
+         * Determine if we need to get the reset of the xsave context out here.
+         * If the thread doesn't actually have the FPU enabled, then we don't
+         * actually need to do this. We also don't have to if it wasn't
+         * requested.
+         */
+        if (!need_xsave || !fpu_en) {
+                return (0);
+        }
+
+        ucp->uc_flags |= UC_XSAVE;
+
+        /*
+         * While you might be asking why and contemplating despair, just know
+         * that some things need to just be done in the face of signal (half the
+         * reason this function exists). Basically when in signal context we
+         * can't trigger watch points. This means we need to tell the FPU copy
+         * logic to actually use the on_fault/no_fault and the non-error form of
+         * copyout (which still checks if it's a user address at least).
+         */
+        if ((flags & SAVECTXT_F_ONFAULT) != 0) {
+                ret = fpu_signal_copyout(lwp, ucp->uc_xsave,
+                    savecontext_copyout);
+        } else {
+                ret = fpu_signal_copyout(lwp, ucp->uc_xsave, copyout);
+        }
+
         if (PROC_IS_BRANDED(p) && BROP(p)->b_savecontext != NULL) {
                 /*
                  * Allow the brand the chance to modify the context we
                  * saved:
                  */
+                /* XXX KEBE SAYS FIX ME! */
                 BROP(p)->b_savecontext(ucp);
         }
+
+        return (ret);
 }
 
 /*
  * Restore user context.
  */
@@ -191,12 +287,22 @@
                 lwp->lwp_eosys = JUSTRETURN;
                 t->t_post_sys = 1;
                 aston(curthread);
         }
 
-        if (ucp->uc_flags & UC_FPU)
+        /*
+         * The logic to copy in the ucontex_t takes care of combining the UC_FPU
+         * and UC_XSAVE, so at this point only one of them should be set, if
+         * any.
+         */
+        if (ucp->uc_flags & UC_XSAVE) {
+                ASSERT0(ucp->uc_flags & UC_FPU);
+                ASSERT3U((uintptr_t)ucp->uc_xsave, >=, _kernelbase);
+                fpu_set_xsave(lwp, (const void *)ucp->uc_xsave);
+        } else if (ucp->uc_flags & UC_FPU) {
                 setfpregs(lwp, &ucp->uc_mcontext.fpregs);
+        }
 
         if (ucp->uc_flags & UC_SIGMASK) {
                 /*
                  * We don't need to acquire p->p_lock here;
                  * we are manipulating thread-private data.
@@ -213,12 +319,14 @@
 getsetcontext(int flag, void *arg)
 {
         ucontext_t uc;
         ucontext_t *ucp;
         klwp_t *lwp = ttolwp(curthread);
+        void *fpu = NULL;
         stack_t dummy_stk;
         proc_t *p = lwptoproc(lwp);
+        int ret;
 
         /*
          * In future releases, when the ucontext structure grows,
          * getcontext should be modified to only return the fields
          * specified in the uc_flags.  That way, the structure can grow
@@ -230,27 +338,59 @@
         default:
                 return (set_errno(EINVAL));
 
         case GETCONTEXT:
                 schedctl_finish_sigblock(curthread);
-                savecontext(&uc, &curthread->t_hold);
+                ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
+                if (ret != 0)
+                        return (set_errno(ret));
                 if (uc.uc_flags & UC_SIGMASK)
                         SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
                 if (copyout(&uc, arg, sizeof (uc)))
                         return (set_errno(EFAULT));
                 return (0);
 
+        /*
+         * In the case of GETCONTEXT_EXTD, we've theoretically been given all
+         * the required pointers of the appropriate length by libc in the
+         * ucontext_t. We must first copyin the offsets that we care about to
+         * seed the known extensions. Right now that is just the uc_xsave
+         * member. As we are setting uc_flags, we only look at the members we
+         * need to care about.
+         *
+         * The main reason that we have a different entry point is that we don't
+         * want to assume that callers have always properly zeroed their
+         * ucontext_t ahead of calling into libc. In fact, it often is just
+         * declared on the stack so we can't assume that at all. Instead,
+         * getcontext_extd does require that.
+         */
+        case GETCONTEXT_EXTD:
+                schedctl_finish_sigblock(curthread);
+                ucp = arg;
+                if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
+                    sizeof (uc.uc_xsave)) != 0) {
+                        return (set_errno(EFAULT));
+                }
+                ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
+                if (ret != 0)
+                        return (set_errno(ret));
+                if (uc.uc_flags & UC_SIGMASK)
+                        SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
+                if (copyout(&uc, arg, sizeof (uc)))
+                        return (set_errno(EFAULT));
+                return (0);
+
+
         case SETCONTEXT:
                 ucp = arg;
                 if (ucp == NULL)
                         exit(CLD_EXITED, 0);
                 /*
                  * Don't copyin filler or floating state unless we need it.
                  * The ucontext_t struct and fields are specified in the ABI.
                  */
-                if (copyin(ucp, &uc, sizeof (ucontext_t) -
-                    sizeof (uc.uc_filler) -
+                if (copyin(ucp, &uc, offsetof(ucontext_t, uc_brand_data) -
                     sizeof (uc.uc_mcontext.fpregs))) {
                         return (set_errno(EFAULT));
                 }
                 if (uc.uc_flags & UC_SIGMASK)
                         SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
@@ -268,10 +408,25 @@
                 if (PROC_IS_BRANDED(p) && copyin(&ucp->uc_brand_data,
                     &uc.uc_brand_data, sizeof (uc.uc_brand_data)) != 0) {
                         return (set_errno(EFAULT));
                 }
 
+                uc.uc_xsave = 0;
+                if ((uc.uc_flags & UC_XSAVE) != 0) {
+                        int ret;
+
+                        if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
+                            sizeof (uc.uc_xsave)) != 0) {
+                                return (set_errno(EFAULT));
+                        }
+
+                        ret = fpu_signal_copyin(lwp, &uc);
+                        if (ret != 0) {
+                                return (set_errno(ret));
+                        }
+                }
+
                 restorecontext(&uc);
 
                 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
                         (void) copyout(&uc.uc_stack, (stack_t *)lwp->lwp_ustack,
                             sizeof (uc.uc_stack));
@@ -293,19 +448,37 @@
 #ifdef _SYSCALL32_IMPL
 
 /*
  * Save user context for 32-bit processes.
  */
-void
-savecontext32(ucontext32_t *ucp, const k_sigset_t *mask)
+int
+savecontext32(ucontext32_t *ucp, const k_sigset_t *mask,
+    savecontext_flags_t flags)
 {
         proc_t *p = ttoproc(curthread);
         klwp_t *lwp = ttolwp(curthread);
         struct regs *rp = lwptoregs(lwp);
+        boolean_t need_xsave = B_FALSE;
+        boolean_t fpu_en;
+        int32_t user_xsave = 0;
+        uintptr_t uaddr;
+        int ret;
 
+        /*
+         * See savecontext for an explanation of this.
+         */
+        if ((flags & SAVECTXT_F_EXTD) != 0) {
+                user_xsave = ucp->uc_xsave;
+                if (fpu_xsave_enabled() && user_xsave != 0) {
+                        need_xsave = B_TRUE;
+                }
+        } else {
+                VERIFY0(flags);
+        }
         bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext32_t) -
             offsetof(ucontext32_t, uc_mcontext.fpregs));
+        ucp->uc_xsave = user_xsave;
 
         ucp->uc_flags = UC_ALL;
         ucp->uc_link = (caddr32_t)lwp->lwp_oldcontext;
 
         if (lwp->lwp_ustack == (uintptr_t)NULL ||
@@ -346,11 +519,12 @@
                         aston(curthread);
                 }
         }
 
         getgregs32(lwp, ucp->uc_mcontext.gregs);
-        if (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN)
+        fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
+        if (fpu_en)
                 getfpregs32(lwp, &ucp->uc_mcontext.fpregs);
         else
                 ucp->uc_flags &= ~UC_FPU;
 
         if (mask != NULL) {
@@ -361,17 +535,42 @@
         } else {
                 ucp->uc_flags &= ~UC_SIGMASK;
                 bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask));
         }
 
+        if (!need_xsave || !fpu_en) {
+                return (0);
+        }
+
+        ucp->uc_flags |= UC_XSAVE;
+
+        /*
+         * Due to not wanting to change or break programs, the filler in the
+         * ucontext_t was always declared as a long, which is signed. Because
+         * this is the 32-bit version, this is an int32_t. We cannot directly go
+         * to a uintptr_t otherwise we might get sign extension, so we first
+         * have to go through a uint32_t and then a uintptr_t. Otherwise, see
+         * savecontext().
+         */
+        uaddr = (uintptr_t)(uint32_t)ucp->uc_xsave;
+        if ((flags & SAVECTXT_F_ONFAULT) != 0) {
+                ret = fpu_signal_copyout(lwp, uaddr, savecontext_copyout);
+        } else {
+                ret = fpu_signal_copyout(lwp, uaddr, copyout);
+        }
+
+
         if (PROC_IS_BRANDED(p) && BROP(p)->b_savecontext32 != NULL) {
                 /*
                  * Allow the brand the chance to modify the context we
                  * saved:
                  */
+                /* XXX KEBE SAYS FIX ME */
                 BROP(p)->b_savecontext32(ucp);
         }
+
+        return (ret);
 }
 
 int
 getsetcontext32(int flag, void *arg)
 {
@@ -380,30 +579,51 @@
         ucontext32_t *ucp;
         klwp_t *lwp = ttolwp(curthread);
         caddr32_t ustack32;
         stack32_t dummy_stk32;
         proc_t *p = lwptoproc(lwp);
+        int ret;
 
         switch (flag) {
         default:
                 return (set_errno(EINVAL));
 
         case GETCONTEXT:
                 schedctl_finish_sigblock(curthread);
-                savecontext32(&uc, &curthread->t_hold);
+                ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
+                if (ret != 0)
+                        return (set_errno(ret));
                 if (uc.uc_flags & UC_SIGMASK)
                         SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
                 if (copyout(&uc, arg, sizeof (uc)))
                         return (set_errno(EFAULT));
                 return (0);
 
+        /*
+         * See getsetcontext() for an explanation of what is going on here.
+         */
+        case GETCONTEXT_EXTD:
+                schedctl_finish_sigblock(curthread);
+                ucp = arg;
+                if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
+                    sizeof (uc.uc_xsave)) != 0) {
+                        return (set_errno(EFAULT));
+                }
+                ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
+                if (ret != 0)
+                        return (set_errno(ret));
+                if (uc.uc_flags & UC_SIGMASK)
+                        SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
+                if (copyout(&uc, arg, sizeof (uc)))
+                        return (set_errno(EFAULT));
+                return (0);
+
         case SETCONTEXT:
                 ucp = arg;
                 if (ucp == NULL)
                         exit(CLD_EXITED, 0);
-                if (copyin(ucp, &uc, sizeof (uc) -
-                    sizeof (uc.uc_filler) -
+                if (copyin(ucp, &uc, offsetof(ucontext32_t, uc_brand_data) -
                     sizeof (uc.uc_mcontext.fpregs))) {
                         return (set_errno(EFAULT));
                 }
                 if (uc.uc_flags & UC_SIGMASK)
                         SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
@@ -420,11 +640,26 @@
                 if (PROC_IS_BRANDED(p) && copyin(&ucp->uc_brand_data,
                     &uc.uc_brand_data, sizeof (uc.uc_brand_data)) != 0) {
                         return (set_errno(EFAULT));
                 }
 
+                uc.uc_xsave = 0;
+                if ((uc.uc_flags & UC_XSAVE) != 0 &&
+                    copyin(&ucp->uc_xsave, &uc.uc_xsave,
+                    sizeof (uc.uc_xsave)) != 0) {
+                        return (set_errno(EFAULT));
+                }
+
                 ucontext_32ton(&uc, &ucnat);
+
+                if ((ucnat.uc_flags & UC_XSAVE) != 0) {
+                        int ret = fpu_signal_copyin(lwp, &ucnat);
+                        if (ret != 0) {
+                                return (set_errno(ret));
+                        }
+                }
+
                 restorecontext(&ucnat);
 
                 if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
                         (void) copyout(&uc.uc_stack,
                             (stack32_t *)lwp->lwp_ustack, sizeof (uc.uc_stack));