Print this page
15254 %ymm registers not restored after signal handler
15367 x86 getfpregs() summons corrupting %xmm ghosts
15333 want x86 /proc xregs support (libc_db, libproc, mdb, etc.)
15336 want libc functions for extended ucontext_t
15334 want ps_lwphandle-specific reg routines
15328 FPU_CW_INIT mistreats reserved bit
15335 i86pc fpu_subr.c isn't really platform-specific
15332 setcontext(2) isn't actually noreturn
15331 need <sys/stdalign.h>
Change-Id: I7060aa86042dfb989f77fc3323c065ea2eafa9ad
Conflicts:
    usr/src/uts/common/fs/proc/prcontrol.c
    usr/src/uts/intel/os/archdep.c
    usr/src/uts/intel/sys/ucontext.h
    usr/src/uts/intel/syscall/getcontext.c

@@ -25,10 +25,14 @@
  */
 
 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
 /*        All Rights Reserved   */
 
+/*
+ * Copyright 2023 Oxide Computer Company
+ */
+
 #include <sys/types.h>
 #include <sys/t_lock.h>
 #include <sys/param.h>
 #include <sys/cred.h>
 #include <sys/debug.h>

@@ -45,10 +49,11 @@
 #include <sys/pcb.h>
 #include <sys/buf.h>
 #include <sys/signal.h>
 #include <sys/user.h>
 #include <sys/cpuvar.h>
+#include <sys/stdalign.h>
 
 #include <sys/fault.h>
 #include <sys/syscall.h>
 #include <sys/procfs.h>
 #include <sys/cmn_err.h>

@@ -234,47 +239,204 @@
         setfpregs32(lwp, pfp);
 }
 #endif  /* _SYSCALL32_IMPL */
 
 /*
- * Does the system support extra register state?
+ * This is a general function that the main part of /proc and the rest of the
+ * system uses to ask does a given process actually have extended state. Right
+ * now, this question is not process-specific, but rather CPU specific. We look
+ * at whether xsave has been enabled to determine that. While strictly speaking
+ * one could make the argument that all amd64 CPUs support fxsave and we could
+ * emulate something that only supports that, we don't think that makes sense.
  */
-/* ARGSUSED */
 int
 prhasx(proc_t *p)
 {
-        return (0);
+        return (fpu_xsave_enabled());
 }
 
 /*
- * Get the size of the extra registers.
+ * Return the minimum size that we need to determine the full size of a
+ * prxregset_t.
  */
-/* ARGSUSED */
-int
+boolean_t
+prwriteminxreg(size_t *sizep)
+{
+        *sizep = sizeof (prxregset_hdr_t);
+        return (B_TRUE);
+}
+
+/*
+ * This routine services both ILP32 and LP64 callers. We cannot assume anything
+ * about the alignment of argp and must bcopy things to known structures that we
+ * care about. We are guaranteed we have prxregset_hdr_t bytes because we asked
+ * for them above.
+ */
+boolean_t
+prwritesizexreg(const void *argp, size_t *sizep)
+{
+        prxregset_hdr_t hdr;
+
+        /*
+         * While it's tempting to validate everything here, the only thing we
+         * care about is that we understand the type and the size meets our
+         * constraints:
+         *
+         *  o We actually have an item of type PR_TYPE_XSAVE, otherwise we
+         *    don't know what this is.
+         *  o The indicated size actually contains at least the
+         *    prxregset_hdr_t.
+         *  o The indicated size isn't larger than what the FPU tells us is
+         *    allowed.
+         *
+         * We do not check if the reset of the structure makes semantic sense at
+         * this point. We save all other validation for the normal set function
+         * as that's when we'll have the rest of our data.
+         */
+        bcopy(argp, &hdr, sizeof (hdr));
+        if (hdr.pr_type != PR_TYPE_XSAVE ||
+            hdr.pr_size > fpu_proc_xregs_max_size() ||
+            hdr.pr_size < sizeof (prxregset_hdr_t)) {
+                return (B_FALSE);
+        }
+
+        *sizep = hdr.pr_size - sizeof (prxregset_hdr_t);
+        return (B_TRUE);
+}
+
+/*
+ * Get the size of the extra registers. The ultimate size here depends on a
+ * combination of a few different things. Right now the xregs always have our
+ * header, the illumos-specific XCR information, the xsave information, and then
+ * otherwise this varies based on the items that the CPU supports.
+ *
+ * The ultimate size here is going to be:
+ *
+ *  o 1x prxregset_hdr_t
+ *  o n  prxregset_info_t structures
+ *  o The individual data for each one
+ */
+size_t
 prgetprxregsize(proc_t *p)
 {
-        return (0);
+        uint32_t size;
+
+        fpu_proc_xregs_info(p, NULL, &size, NULL);
+        return (size);
 }
 
 /*
  * Get extra registers.
  */
-/*ARGSUSED*/
 void
-prgetprxregs(klwp_t *lwp, caddr_t prx)
+prgetprxregs(klwp_t *lwp, prxregset_t *prx)
 {
-        /* no extra registers */
+        fpu_proc_xregs_get(lwp, prx);
 }
 
 /*
  * Set extra registers.
+ *
+ * We've been given a regset to set. Before we hand it off to the FPU, we have
+ * to go through and make sure that the different parts of this actually make
+ * sense. The kernel has guaranteed us through the functions above that we have
+ * the number of bytes that the header indicates are present. In particular we
+ * need to validate:
+ *
+ *   o The information in the header is reasonable: we have a known type, flags
+ *     and padding are zero, and there is at least one info structure.
+ *   o Each of the info structures has a valid type, size, and fits within the
+ *     data we were given.
+ *   o We do not validate or modify the actual data in the different pieces for
+ *     validity. That is considered something that the FPU does. Similarly if
+ *     something is read-only or not used, that is something that it checks.
+ *
+ * While we would like to return something other than EINVAL, the /proc APIs
+ * pretty much lead that to being the primary errno for all sorts of situations.
  */
-/*ARGSUSED*/
-void
-prsetprxregs(klwp_t *lwp, caddr_t prx)
+int
+prsetprxregs(klwp_t *lwp, prxregset_t *prx)
 {
-        /* no extra registers */
+        size_t infosz;
+        prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
+
+        if (hdr->pr_type != PR_TYPE_XSAVE || hdr->pr_flags != 0 ||
+            hdr->pr_pad[0] != 0 || hdr->pr_pad[1] != 0 || hdr->pr_pad[2] != 0 ||
+            hdr->pr_pad[3] != 0 || hdr->pr_ninfo == 0) {
+                return (EINVAL);
+        }
+
+        infosz = hdr->pr_ninfo * sizeof (prxregset_info_t) +
+            sizeof (prxregset_hdr_t);
+        if (infosz > hdr->pr_size) {
+                return (EINVAL);
+        }
+
+        for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
+                uint32_t exp_size;
+                size_t need_len, exp_align;
+                const prxregset_info_t *info = &hdr->pr_info[i];
+
+                switch (info->pri_type) {
+                case PRX_INFO_XCR:
+                        exp_size = sizeof (prxregset_xcr_t);
+                        exp_align = alignof (prxregset_xcr_t);
+                        break;
+                case PRX_INFO_XSAVE:
+                        exp_size = sizeof (prxregset_xsave_t);
+                        exp_align = alignof (prxregset_xsave_t);
+                        break;
+                case PRX_INFO_YMM:
+                        exp_size = sizeof (prxregset_ymm_t);
+                        exp_align = alignof (prxregset_ymm_t);
+                        break;
+                case PRX_INFO_OPMASK:
+                        exp_size = sizeof (prxregset_opmask_t);
+                        exp_align = alignof (prxregset_opmask_t);
+                        break;
+                case PRX_INFO_ZMM:
+                        exp_size = sizeof (prxregset_zmm_t);
+                        exp_align = alignof (prxregset_zmm_t);
+                        break;
+                case PRX_INFO_HI_ZMM:
+                        exp_size = sizeof (prxregset_hi_zmm_t);
+                        exp_align = alignof (prxregset_hi_zmm_t);
+                        break;
+                default:
+                        return (EINVAL);
+                }
+
+                if (info->pri_flags != 0 || info->pri_size != exp_size) {
+                        return (EINVAL);
+                }
+
+                if ((info->pri_offset % exp_align) != 0) {
+                        return (EINVAL);
+                }
+
+                /*
+                 * No bytes of this item's entry should overlap with the
+                 * information area. If users want to overlap the actual data
+                 * information for some odd reason, we don't check that and let
+                 * them do what they want. However, the total data for this
+                 * region must actually fit. Because exp_size and pri_offset are
+                 * uint32_t's, we can sum them without overflow worries in an
+                 * LP64 environment.
+                 *
+                 * While we try to grantee alignment when writing this structure
+                 * out to userland, that is in no way a requirement and users
+                 * are allowed to start these structures wherever they want.
+                 * Hence that is not checked here.
+                 */
+                need_len = (size_t)exp_size + (size_t)info->pri_offset;
+                if (info->pri_offset < infosz ||
+                    need_len > (size_t)hdr->pr_size) {
+                        return (EINVAL);
+                }
+        }
+
+        return (fpu_proc_xregs_set(lwp, prx));
 }
 
 /*
  * Return the base (lower limit) of the process stack.
  */