Print this page
15254 %ymm registers not restored after signal handler
15367 x86 getfpregs() summons corrupting %xmm ghosts
15333 want x86 /proc xregs support (libc_db, libproc, mdb, etc.)
15336 want libc functions for extended ucontext_t
15334 want ps_lwphandle-specific reg routines
15328 FPU_CW_INIT mistreats reserved bit
15335 i86pc fpu_subr.c isn't really platform-specific
15332 setcontext(2) isn't actually noreturn
15331 need <sys/stdalign.h>
Change-Id: I7060aa86042dfb989f77fc3323c065ea2eafa9ad
Conflicts:
    usr/src/uts/common/fs/proc/prcontrol.c
    usr/src/uts/intel/os/archdep.c
    usr/src/uts/intel/sys/ucontext.h
    usr/src/uts/intel/syscall/getcontext.c

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/fs/proc/prmachdep.c
          +++ new/usr/src/uts/intel/fs/proc/prmachdep.c
↓ open down ↓ 17 lines elided ↑ open up ↑
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28      -/*        All Rights Reserved   */
       28 +/*        All Rights Reserved   */
  29   29  
       30 +/*
       31 + * Copyright 2023 Oxide Computer Company
       32 + */
       33 +
  30   34  #include <sys/types.h>
  31   35  #include <sys/t_lock.h>
  32   36  #include <sys/param.h>
  33   37  #include <sys/cred.h>
  34   38  #include <sys/debug.h>
  35   39  #include <sys/inline.h>
  36   40  #include <sys/kmem.h>
  37   41  #include <sys/proc.h>
  38   42  #include <sys/regset.h>
  39   43  #include <sys/privregs.h>
  40   44  #include <sys/sysmacros.h>
  41   45  #include <sys/systm.h>
  42   46  #include <sys/vfs.h>
  43   47  #include <sys/vnode.h>
  44   48  #include <sys/psw.h>
  45   49  #include <sys/pcb.h>
  46   50  #include <sys/buf.h>
  47   51  #include <sys/signal.h>
  48   52  #include <sys/user.h>
  49   53  #include <sys/cpuvar.h>
       54 +#include <sys/stdalign.h>
  50   55  
  51   56  #include <sys/fault.h>
  52   57  #include <sys/syscall.h>
  53   58  #include <sys/procfs.h>
  54   59  #include <sys/cmn_err.h>
  55   60  #include <sys/stack.h>
  56   61  #include <sys/debugreg.h>
  57   62  #include <sys/copyops.h>
  58   63  
  59   64  #include <sys/vmem.h>
↓ open down ↓ 169 lines elided ↑ open up ↑
 229  234  
 230  235  #if defined(_SYSCALL32_IMPL)
 231  236  void
 232  237  prsetprfpregs32(klwp_t *lwp, prfpregset32_t *pfp)
 233  238  {
 234  239          setfpregs32(lwp, pfp);
 235  240  }
 236  241  #endif  /* _SYSCALL32_IMPL */
 237  242  
 238  243  /*
 239      - * Does the system support extra register state?
      244 + * This is a general function that the main part of /proc and the rest of the
      245 + * system uses to ask does a given process actually have extended state. Right
      246 + * now, this question is not process-specific, but rather CPU specific. We look
      247 + * at whether xsave has been enabled to determine that. While strictly speaking
      248 + * one could make the argument that all amd64 CPUs support fxsave and we could
      249 + * emulate something that only supports that, we don't think that makes sense.
 240  250   */
 241      -/* ARGSUSED */
 242  251  int
 243  252  prhasx(proc_t *p)
 244  253  {
 245      -        return (0);
      254 +        return (fpu_xsave_enabled());
 246  255  }
 247  256  
 248  257  /*
 249      - * Get the size of the extra registers.
      258 + * Return the minimum size that we need to determine the full size of a
      259 + * prxregset_t.
 250  260   */
 251      -/* ARGSUSED */
 252      -int
      261 +boolean_t
      262 +prwriteminxreg(size_t *sizep)
      263 +{
      264 +        *sizep = sizeof (prxregset_hdr_t);
      265 +        return (B_TRUE);
      266 +}
      267 +
      268 +/*
      269 + * This routine services both ILP32 and LP64 callers. We cannot assume anything
      270 + * about the alignment of argp and must bcopy things to known structures that we
      271 + * care about. We are guaranteed we have prxregset_hdr_t bytes because we asked
      272 + * for them above.
      273 + */
      274 +boolean_t
      275 +prwritesizexreg(const void *argp, size_t *sizep)
      276 +{
      277 +        prxregset_hdr_t hdr;
      278 +
      279 +        /*
      280 +         * While it's tempting to validate everything here, the only thing we
      281 +         * care about is that we understand the type and the size meets our
      282 +         * constraints:
      283 +         *
      284 +         *  o We actually have an item of type PR_TYPE_XSAVE, otherwise we
      285 +         *    don't know what this is.
      286 +         *  o The indicated size actually contains at least the
      287 +         *    prxregset_hdr_t.
      288 +         *  o The indicated size isn't larger than what the FPU tells us is
      289 +         *    allowed.
      290 +         *
      291 +         * We do not check if the reset of the structure makes semantic sense at
      292 +         * this point. We save all other validation for the normal set function
      293 +         * as that's when we'll have the rest of our data.
      294 +         */
      295 +        bcopy(argp, &hdr, sizeof (hdr));
      296 +        if (hdr.pr_type != PR_TYPE_XSAVE ||
      297 +            hdr.pr_size > fpu_proc_xregs_max_size() ||
      298 +            hdr.pr_size < sizeof (prxregset_hdr_t)) {
      299 +                return (B_FALSE);
      300 +        }
      301 +
      302 +        *sizep = hdr.pr_size - sizeof (prxregset_hdr_t);
      303 +        return (B_TRUE);
      304 +}
      305 +
      306 +/*
      307 + * Get the size of the extra registers. The ultimate size here depends on a
      308 + * combination of a few different things. Right now the xregs always have our
      309 + * header, the illumos-specific XCR information, the xsave information, and then
      310 + * otherwise this varies based on the items that the CPU supports.
      311 + *
      312 + * The ultimate size here is going to be:
      313 + *
      314 + *  o 1x prxregset_hdr_t
      315 + *  o n  prxregset_info_t structures
      316 + *  o The individual data for each one
      317 + */
      318 +size_t
 253  319  prgetprxregsize(proc_t *p)
 254  320  {
 255      -        return (0);
      321 +        uint32_t size;
      322 +
      323 +        fpu_proc_xregs_info(p, NULL, &size, NULL);
      324 +        return (size);
 256  325  }
 257  326  
 258  327  /*
 259  328   * Get extra registers.
 260  329   */
 261      -/*ARGSUSED*/
 262  330  void
 263      -prgetprxregs(klwp_t *lwp, caddr_t prx)
      331 +prgetprxregs(klwp_t *lwp, prxregset_t *prx)
 264  332  {
 265      -        /* no extra registers */
      333 +        fpu_proc_xregs_get(lwp, prx);
 266  334  }
 267  335  
 268  336  /*
 269  337   * Set extra registers.
      338 + *
      339 + * We've been given a regset to set. Before we hand it off to the FPU, we have
      340 + * to go through and make sure that the different parts of this actually make
      341 + * sense. The kernel has guaranteed us through the functions above that we have
      342 + * the number of bytes that the header indicates are present. In particular we
      343 + * need to validate:
      344 + *
      345 + *   o The information in the header is reasonable: we have a known type, flags
      346 + *     and padding are zero, and there is at least one info structure.
      347 + *   o Each of the info structures has a valid type, size, and fits within the
      348 + *     data we were given.
      349 + *   o We do not validate or modify the actual data in the different pieces for
      350 + *     validity. That is considered something that the FPU does. Similarly if
      351 + *     something is read-only or not used, that is something that it checks.
      352 + *
      353 + * While we would like to return something other than EINVAL, the /proc APIs
      354 + * pretty much lead that to being the primary errno for all sorts of situations.
 270  355   */
 271      -/*ARGSUSED*/
 272      -void
 273      -prsetprxregs(klwp_t *lwp, caddr_t prx)
      356 +int
      357 +prsetprxregs(klwp_t *lwp, prxregset_t *prx)
 274  358  {
 275      -        /* no extra registers */
      359 +        size_t infosz;
      360 +        prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
      361 +
      362 +        if (hdr->pr_type != PR_TYPE_XSAVE || hdr->pr_flags != 0 ||
      363 +            hdr->pr_pad[0] != 0 || hdr->pr_pad[1] != 0 || hdr->pr_pad[2] != 0 ||
      364 +            hdr->pr_pad[3] != 0 || hdr->pr_ninfo == 0) {
      365 +                return (EINVAL);
      366 +        }
      367 +
      368 +        infosz = hdr->pr_ninfo * sizeof (prxregset_info_t) +
      369 +            sizeof (prxregset_hdr_t);
      370 +        if (infosz > hdr->pr_size) {
      371 +                return (EINVAL);
      372 +        }
      373 +
      374 +        for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
      375 +                uint32_t exp_size;
      376 +                size_t need_len, exp_align;
      377 +                const prxregset_info_t *info = &hdr->pr_info[i];
      378 +
      379 +                switch (info->pri_type) {
      380 +                case PRX_INFO_XCR:
      381 +                        exp_size = sizeof (prxregset_xcr_t);
      382 +                        exp_align = alignof (prxregset_xcr_t);
      383 +                        break;
      384 +                case PRX_INFO_XSAVE:
      385 +                        exp_size = sizeof (prxregset_xsave_t);
      386 +                        exp_align = alignof (prxregset_xsave_t);
      387 +                        break;
      388 +                case PRX_INFO_YMM:
      389 +                        exp_size = sizeof (prxregset_ymm_t);
      390 +                        exp_align = alignof (prxregset_ymm_t);
      391 +                        break;
      392 +                case PRX_INFO_OPMASK:
      393 +                        exp_size = sizeof (prxregset_opmask_t);
      394 +                        exp_align = alignof (prxregset_opmask_t);
      395 +                        break;
      396 +                case PRX_INFO_ZMM:
      397 +                        exp_size = sizeof (prxregset_zmm_t);
      398 +                        exp_align = alignof (prxregset_zmm_t);
      399 +                        break;
      400 +                case PRX_INFO_HI_ZMM:
      401 +                        exp_size = sizeof (prxregset_hi_zmm_t);
      402 +                        exp_align = alignof (prxregset_hi_zmm_t);
      403 +                        break;
      404 +                default:
      405 +                        return (EINVAL);
      406 +                }
      407 +
      408 +                if (info->pri_flags != 0 || info->pri_size != exp_size) {
      409 +                        return (EINVAL);
      410 +                }
      411 +
      412 +                if ((info->pri_offset % exp_align) != 0) {
      413 +                        return (EINVAL);
      414 +                }
      415 +
      416 +                /*
      417 +                 * No bytes of this item's entry should overlap with the
      418 +                 * information area. If users want to overlap the actual data
      419 +                 * information for some odd reason, we don't check that and let
      420 +                 * them do what they want. However, the total data for this
      421 +                 * region must actually fit. Because exp_size and pri_offset are
      422 +                 * uint32_t's, we can sum them without overflow worries in an
      423 +                 * LP64 environment.
      424 +                 *
      425 +                 * While we try to grantee alignment when writing this structure
      426 +                 * out to userland, that is in no way a requirement and users
      427 +                 * are allowed to start these structures wherever they want.
      428 +                 * Hence that is not checked here.
      429 +                 */
      430 +                need_len = (size_t)exp_size + (size_t)info->pri_offset;
      431 +                if (info->pri_offset < infosz ||
      432 +                    need_len > (size_t)hdr->pr_size) {
      433 +                        return (EINVAL);
      434 +                }
      435 +        }
      436 +
      437 +        return (fpu_proc_xregs_set(lwp, prx));
 276  438  }
 277  439  
 278  440  /*
 279  441   * Return the base (lower limit) of the process stack.
 280  442   */
 281  443  caddr_t
 282  444  prgetstackbase(proc_t *p)
 283  445  {
 284  446          return (p->p_usrstack - p->p_stksize);
 285  447  }
↓ open down ↓ 290 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX