Print this page
    
OS-4915 want FX high priority zone configuration option
OS-4925 ps pri shows misleading value for zone in RT scheduling class
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/disp/fx.c
          +++ new/usr/src/uts/common/disp/fx.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright 2013, Joyent, Inc. All rights reserved.
       24 + * Copyright 2015, Joyent, Inc.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/cred.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/session.h>
  33   33  #include <sys/strsubr.h>
  34   34  #include <sys/user.h>
  35   35  #include <sys/priocntl.h>
  36   36  #include <sys/class.h>
  37   37  #include <sys/disp.h>
  38   38  #include <sys/procset.h>
  39   39  #include <sys/debug.h>
  40   40  #include <sys/kmem.h>
  41   41  #include <sys/errno.h>
  42   42  #include <sys/fx.h>
  43   43  #include <sys/fxpriocntl.h>
  44   44  #include <sys/cpuvar.h>
  45   45  #include <sys/systm.h>
  46   46  #include <sys/vtrace.h>
  47   47  #include <sys/schedctl.h>
  48   48  #include <sys/tnf_probe.h>
  49   49  #include <sys/sunddi.h>
  50   50  #include <sys/spl.h>
  51   51  #include <sys/modctl.h>
  52   52  #include <sys/policy.h>
  53   53  #include <sys/sdt.h>
  54   54  #include <sys/cpupart.h>
  55   55  #include <sys/cpucaps.h>
  56   56  
  57   57  static pri_t fx_init(id_t, int, classfuncs_t **);
  58   58  
  59   59  static struct sclass csw = {
  60   60          "FX",
  61   61          fx_init,
  62   62          0
  63   63  };
  
    | 
      ↓ open down ↓ | 
    29 lines elided | 
    
      ↑ open up ↑ | 
  
  64   64  
  65   65  static struct modlsched modlsched = {
  66   66          &mod_schedops, "Fixed priority sched class", &csw
  67   67  };
  68   68  
  69   69  static struct modlinkage modlinkage = {
  70   70          MODREV_1, (void *)&modlsched, NULL
  71   71  };
  72   72  
  73   73  
  74      -/*
  75      - * control flags (kparms->fx_cflags).
  76      - */
  77      -#define FX_DOUPRILIM    0x01    /* change user priority limit */
  78      -#define FX_DOUPRI       0x02    /* change user priority */
  79      -#define FX_DOTQ         0x04    /* change FX time quantum */
  80      -
  81      -
  82      -#define FXMAXUPRI 60            /* maximum user priority setting */
  83      -
  84   74  #define FX_MAX_UNPRIV_PRI       0       /* maximum unpriviledge priority */
  85   75  
  86   76  /*
  87   77   * The fxproc_t structures that have a registered callback vector,
  88   78   * are also kept in an array of circular doubly linked lists. A hash on
  89   79   * the thread id (from ddi_get_kt_did()) is used to determine which list
  90   80   * each of such fxproc structures should be placed. Each list has a dummy
  91   81   * "head" which is never removed, so the list is never empty.
  92   82   */
  93   83  
  94   84  #define FX_CB_LISTS 16          /* number of lists, must be power of 2 */
  95   85  #define FX_CB_LIST_HASH(ktid)   ((uint_t)ktid & (FX_CB_LISTS - 1))
  96   86  
  97   87  /* Insert fxproc into callback list */
  98   88  #define FX_CB_LIST_INSERT(fxpp)                                         \
  99   89  {                                                                       \
 100   90          int index = FX_CB_LIST_HASH(fxpp->fx_ktid);                     \
 101   91          kmutex_t *lockp = &fx_cb_list_lock[index];                      \
 102   92          fxproc_t *headp = &fx_cb_plisthead[index];                      \
 103   93          mutex_enter(lockp);                                             \
 104   94          fxpp->fx_cb_next = headp->fx_cb_next;                           \
 105   95          fxpp->fx_cb_prev = headp;                                       \
 106   96          headp->fx_cb_next->fx_cb_prev = fxpp;                           \
 107   97          headp->fx_cb_next = fxpp;                                       \
 108   98          mutex_exit(lockp);                                              \
 109   99  }
 110  100  
 111  101  /*
 112  102   * Remove thread from callback list.
 113  103   */
 114  104  #define FX_CB_LIST_DELETE(fxpp)                                         \
 115  105  {                                                                       \
 116  106          int index = FX_CB_LIST_HASH(fxpp->fx_ktid);                     \
 117  107          kmutex_t *lockp = &fx_cb_list_lock[index];                      \
 118  108          mutex_enter(lockp);                                             \
 119  109          fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next;                \
 120  110          fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev;                \
 121  111          mutex_exit(lockp);                                              \
 122  112  }
 123  113  
 124  114  #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL)
 125  115  
 126  116  /* adjust x to be between 0 and fx_maxumdpri */
 127  117  
 128  118  #define FX_ADJUST_PRI(pri)                                              \
 129  119  {                                                                       \
 130  120          if (pri < 0)                                                    \
 131  121                  pri = 0;                                                \
 132  122          else if (pri > fx_maxumdpri)                                    \
 133  123                  pri = fx_maxumdpri;                                     \
 134  124  }
 135  125  
 136  126  #define FX_ADJUST_QUANTUM(q)                                            \
 137  127  {                                                                       \
 138  128          if (q > INT_MAX)                                                \
 139  129                  q = INT_MAX;                                            \
 140  130          else if (q <= 0)                                                \
 141  131                  q = FX_TQINF;                                           \
 142  132  }
 143  133  
 144  134  #define FX_ISVALID(pri, quantum) \
 145  135          (((pri >= 0) || (pri == FX_CB_NOCHANGE)) &&                     \
 146  136              ((quantum >= 0) || (quantum == FX_NOCHANGE) ||              \
 147  137                  (quantum == FX_TQDEF) || (quantum == FX_TQINF)))
 148  138  
 149  139  
 150  140  static id_t     fx_cid;         /* fixed priority class ID */
 151  141  static fxdpent_t *fx_dptbl;     /* fixed priority disp parameter table */
 152  142  
 153  143  static pri_t    fx_maxupri = FXMAXUPRI;
 154  144  static pri_t    fx_maxumdpri;   /* max user mode fixed priority */
 155  145  
 156  146  static pri_t    fx_maxglobpri;  /* maximum global priority used by fx class */
 157  147  static kmutex_t fx_dptblock;    /* protects fixed priority dispatch table */
 158  148  
 159  149  
 160  150  static kmutex_t fx_cb_list_lock[FX_CB_LISTS];   /* protects list of fxprocs */
 161  151                                                  /* that have callbacks */
 162  152  static fxproc_t fx_cb_plisthead[FX_CB_LISTS];   /* dummy fxproc at head of */
 163  153                                                  /* list of fxprocs with */
 164  154                                                  /* callbacks */
 165  155  
 166  156  static int      fx_admin(caddr_t, cred_t *);
 167  157  static int      fx_getclinfo(void *);
 168  158  static int      fx_parmsin(void *);
 169  159  static int      fx_parmsout(void *, pc_vaparms_t *);
 170  160  static int      fx_vaparmsin(void *, pc_vaparms_t *);
 171  161  static int      fx_vaparmsout(void *, pc_vaparms_t *);
 172  162  static int      fx_getclpri(pcpri_t *);
 173  163  static int      fx_alloc(void **, int);
 174  164  static void     fx_free(void *);
 175  165  static int      fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
 176  166  static void     fx_exitclass(void *);
 177  167  static int      fx_canexit(kthread_t *, cred_t *);
 178  168  static int      fx_fork(kthread_t *, kthread_t *, void *);
 179  169  static void     fx_forkret(kthread_t *, kthread_t *);
 180  170  static void     fx_parmsget(kthread_t *, void *);
 181  171  static int      fx_parmsset(kthread_t *, void *, id_t, cred_t *);
 182  172  static void     fx_stop(kthread_t *, int, int);
 183  173  static void     fx_exit(kthread_t *);
 184  174  static pri_t    fx_swapin(kthread_t *, int);
 185  175  static pri_t    fx_swapout(kthread_t *, int);
 186  176  static void     fx_trapret(kthread_t *);
 187  177  static void     fx_preempt(kthread_t *);
 188  178  static void     fx_setrun(kthread_t *);
 189  179  static void     fx_sleep(kthread_t *);
 190  180  static void     fx_tick(kthread_t *);
 191  181  static void     fx_wakeup(kthread_t *);
 192  182  static int      fx_donice(kthread_t *, cred_t *, int, int *);
 193  183  static int      fx_doprio(kthread_t *, cred_t *, int, int *);
 194  184  static pri_t    fx_globpri(kthread_t *);
 195  185  static void     fx_yield(kthread_t *);
 196  186  static void     fx_nullsys();
 197  187  
 198  188  extern fxdpent_t *fx_getdptbl(void);
 199  189  
 200  190  static void     fx_change_priority(kthread_t *, fxproc_t *);
 201  191  static fxproc_t *fx_list_lookup(kt_did_t);
 202  192  static void fx_list_release(fxproc_t *);
 203  193  
 204  194  
 205  195  static struct classfuncs fx_classfuncs = {
 206  196          /* class functions */
 207  197          fx_admin,
 208  198          fx_getclinfo,
 209  199          fx_parmsin,
 210  200          fx_parmsout,
 211  201          fx_vaparmsin,
 212  202          fx_vaparmsout,
 213  203          fx_getclpri,
 214  204          fx_alloc,
 215  205          fx_free,
 216  206  
 217  207          /* thread functions */
 218  208          fx_enterclass,
 219  209          fx_exitclass,
 220  210          fx_canexit,
 221  211          fx_fork,
 222  212          fx_forkret,
 223  213          fx_parmsget,
 224  214          fx_parmsset,
 225  215          fx_stop,
 226  216          fx_exit,
 227  217          fx_nullsys,     /* active */
 228  218          fx_nullsys,     /* inactive */
 229  219          fx_swapin,
 230  220          fx_swapout,
 231  221          fx_trapret,
 232  222          fx_preempt,
 233  223          fx_setrun,
 234  224          fx_sleep,
 235  225          fx_tick,
 236  226          fx_wakeup,
 237  227          fx_donice,
 238  228          fx_globpri,
 239  229          fx_nullsys,     /* set_process_group */
 240  230          fx_yield,
 241  231          fx_doprio,
 242  232  };
 243  233  
 244  234  
 245  235  int
 246  236  _init()
 247  237  {
 248  238          return (mod_install(&modlinkage));
 249  239  }
 250  240  
 251  241  int
 252  242  _fini()
 253  243  {
 254  244          return (EBUSY);
 255  245  }
 256  246  
 257  247  int
 258  248  _info(struct modinfo *modinfop)
 259  249  {
 260  250          return (mod_info(&modlinkage, modinfop));
 261  251  }
 262  252  
 263  253  /*
 264  254   * Fixed priority class initialization. Called by dispinit() at boot time.
 265  255   * We can ignore the clparmsz argument since we know that the smallest
 266  256   * possible parameter buffer is big enough for us.
 267  257   */
 268  258  /* ARGSUSED */
 269  259  static pri_t
 270  260  fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 271  261  {
 272  262          int i;
 273  263          extern pri_t fx_getmaxumdpri(void);
 274  264  
 275  265          fx_dptbl = fx_getdptbl();
 276  266          fx_maxumdpri = fx_getmaxumdpri();
 277  267          fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri;
 278  268  
 279  269          fx_cid = cid;           /* Record our class ID */
 280  270  
 281  271          /*
 282  272           * Initialize the hash table for fxprocs with callbacks
 283  273           */
 284  274          for (i = 0; i < FX_CB_LISTS; i++) {
 285  275                  fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev =
 286  276                      &fx_cb_plisthead[i];
 287  277          }
 288  278  
 289  279          /*
 290  280           * We're required to return a pointer to our classfuncs
 291  281           * structure and the highest global priority value we use.
 292  282           */
 293  283          *clfuncspp = &fx_classfuncs;
 294  284          return (fx_maxglobpri);
 295  285  }
 296  286  
 297  287  /*
 298  288   * Get or reset the fx_dptbl values per the user's request.
 299  289   */
 300  290  static int
 301  291  fx_admin(caddr_t uaddr, cred_t *reqpcredp)
 302  292  {
 303  293          fxadmin_t       fxadmin;
 304  294          fxdpent_t       *tmpdpp;
 305  295          int             userdpsz;
 306  296          int             i;
 307  297          size_t          fxdpsz;
 308  298  
 309  299          if (get_udatamodel() == DATAMODEL_NATIVE) {
 310  300                  if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t)))
 311  301                          return (EFAULT);
 312  302          }
 313  303  #ifdef _SYSCALL32_IMPL
 314  304          else {
 315  305                  /* get fxadmin struct from ILP32 caller */
 316  306                  fxadmin32_t fxadmin32;
 317  307                  if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t)))
 318  308                          return (EFAULT);
 319  309                  fxadmin.fx_dpents =
 320  310                      (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents;
 321  311                  fxadmin.fx_ndpents = fxadmin32.fx_ndpents;
 322  312                  fxadmin.fx_cmd = fxadmin32.fx_cmd;
 323  313          }
 324  314  #endif /* _SYSCALL32_IMPL */
 325  315  
 326  316          fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t);
 327  317  
 328  318          switch (fxadmin.fx_cmd) {
 329  319          case FX_GETDPSIZE:
 330  320                  fxadmin.fx_ndpents = fx_maxumdpri + 1;
 331  321  
 332  322                  if (get_udatamodel() == DATAMODEL_NATIVE) {
 333  323                          if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t)))
 334  324                                  return (EFAULT);
 335  325                  }
 336  326  #ifdef _SYSCALL32_IMPL
 337  327                  else {
 338  328                          /* return fxadmin struct to ILP32 caller */
 339  329                          fxadmin32_t fxadmin32;
 340  330                          fxadmin32.fx_dpents =
 341  331                              (caddr32_t)(uintptr_t)fxadmin.fx_dpents;
 342  332                          fxadmin32.fx_ndpents = fxadmin.fx_ndpents;
 343  333                          fxadmin32.fx_cmd = fxadmin.fx_cmd;
 344  334                          if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t)))
 345  335                                  return (EFAULT);
 346  336                  }
 347  337  #endif /* _SYSCALL32_IMPL */
 348  338                  break;
 349  339  
 350  340          case FX_GETDPTBL:
 351  341                  userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t),
 352  342                      fxdpsz);
 353  343                  if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz))
 354  344                          return (EFAULT);
 355  345  
 356  346                  fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t);
 357  347  
 358  348                  if (get_udatamodel() == DATAMODEL_NATIVE) {
 359  349                          if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t)))
 360  350                                  return (EFAULT);
 361  351                  }
 362  352  #ifdef _SYSCALL32_IMPL
 363  353                  else {
 364  354                          /* return fxadmin struct to ILP32 callers */
 365  355                          fxadmin32_t fxadmin32;
 366  356                          fxadmin32.fx_dpents =
 367  357                              (caddr32_t)(uintptr_t)fxadmin.fx_dpents;
 368  358                          fxadmin32.fx_ndpents = fxadmin.fx_ndpents;
 369  359                          fxadmin32.fx_cmd = fxadmin.fx_cmd;
 370  360                          if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t)))
 371  361                                  return (EFAULT);
 372  362                  }
 373  363  #endif /* _SYSCALL32_IMPL */
 374  364                  break;
 375  365  
 376  366          case FX_SETDPTBL:
 377  367                  /*
 378  368                   * We require that the requesting process has sufficient
 379  369                   * privileges. We also require that the table supplied by
 380  370                   * the user exactly match the current fx_dptbl in size.
 381  371                   */
 382  372                  if (secpolicy_dispadm(reqpcredp) != 0) {
 383  373                          return (EPERM);
 384  374                  }
 385  375                  if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) {
 386  376                          return (EINVAL);
 387  377                  }
 388  378  
 389  379                  /*
 390  380                   * We read the user supplied table into a temporary buffer
 391  381                   * where it is validated before being copied over the
 392  382                   * fx_dptbl.
 393  383                   */
 394  384                  tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP);
 395  385                  if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) {
 396  386                          kmem_free(tmpdpp, fxdpsz);
 397  387                          return (EFAULT);
 398  388                  }
 399  389                  for (i = 0; i < fxadmin.fx_ndpents; i++) {
 400  390  
 401  391                          /*
 402  392                           * Validate the user supplied values. All we are doing
 403  393                           * here is verifying that the values are within their
 404  394                           * allowable ranges and will not panic the system. We
 405  395                           * make no attempt to ensure that the resulting
 406  396                           * configuration makes sense or results in reasonable
 407  397                           * performance.
 408  398                           */
 409  399                          if (tmpdpp[i].fx_quantum <= 0 &&
 410  400                              tmpdpp[i].fx_quantum != FX_TQINF) {
 411  401                                  kmem_free(tmpdpp, fxdpsz);
 412  402                                  return (EINVAL);
 413  403                          }
 414  404                  }
 415  405  
 416  406                  /*
 417  407                   * Copy the user supplied values over the current fx_dptbl
 418  408                   * values. The fx_globpri member is read-only so we don't
 419  409                   * overwrite it.
 420  410                   */
 421  411                  mutex_enter(&fx_dptblock);
 422  412                  for (i = 0; i < fxadmin.fx_ndpents; i++) {
 423  413                          fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum;
 424  414                  }
 425  415                  mutex_exit(&fx_dptblock);
 426  416                  kmem_free(tmpdpp, fxdpsz);
 427  417                  break;
 428  418  
 429  419          default:
 430  420                  return (EINVAL);
 431  421          }
 432  422          return (0);
 433  423  }
 434  424  
 435  425  /*
 436  426   * Allocate a fixed priority class specific thread structure and
 437  427   * initialize it with the parameters supplied. Also move the thread
 438  428   * to specified priority.
 439  429   */
 440  430  static int
 441  431  fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
 442  432      void *bufp)
 443  433  {
 444  434          fxkparms_t      *fxkparmsp = (fxkparms_t *)parmsp;
 445  435          fxproc_t        *fxpp;
 446  436          pri_t           reqfxupri;
 447  437          pri_t           reqfxuprilim;
 448  438  
 449  439          fxpp = (fxproc_t *)bufp;
 450  440          ASSERT(fxpp != NULL);
 451  441  
 452  442          /*
 453  443           * Initialize the fxproc structure.
 454  444           */
 455  445          fxpp->fx_flags = 0;
 456  446          fxpp->fx_callback = NULL;
 457  447          fxpp->fx_cookie = NULL;
 458  448  
 459  449          if (fxkparmsp == NULL) {
 460  450                  /*
 461  451                   * Use default values.
 462  452                   */
 463  453                  fxpp->fx_pri = fxpp->fx_uprilim = 0;
 464  454                  fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
 465  455                  fxpp->fx_nice =  NZERO;
 466  456          } else {
 467  457                  /*
 468  458                   * Use supplied values.
 469  459                   */
 470  460  
 471  461                  if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) {
 472  462                          reqfxuprilim = 0;
 473  463                  } else {
 474  464                          if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI &&
 475  465                              secpolicy_setpriority(reqpcredp) != 0)
 476  466                                  return (EPERM);
 477  467                          reqfxuprilim = fxkparmsp->fx_uprilim;
 478  468                          FX_ADJUST_PRI(reqfxuprilim);
 479  469                  }
 480  470  
 481  471                  if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) {
 482  472                          reqfxupri = reqfxuprilim;
 483  473                  } else {
 484  474                          if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI &&
 485  475                              secpolicy_setpriority(reqpcredp) != 0)
 486  476                                  return (EPERM);
 487  477                          /*
 488  478                           * Set the user priority to the requested value
 489  479                           * or the upri limit, whichever is lower.
 490  480                           */
 491  481                          reqfxupri = fxkparmsp->fx_upri;
 492  482                          FX_ADJUST_PRI(reqfxupri);
 493  483  
 494  484                          if (reqfxupri > reqfxuprilim)
 495  485                                  reqfxupri = reqfxuprilim;
 496  486                  }
 497  487  
 498  488  
 499  489                  fxpp->fx_uprilim = reqfxuprilim;
 500  490                  fxpp->fx_pri = reqfxupri;
 501  491  
 502  492                  fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri;
 503  493  
 504  494                  if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) ||
 505  495                      (fxkparmsp->fx_tqntm == FX_TQDEF)) {
 506  496                          fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
 507  497                  } else {
 508  498                          if (secpolicy_setpriority(reqpcredp) != 0)
 509  499                                  return (EPERM);
 510  500  
 511  501                          if (fxkparmsp->fx_tqntm == FX_TQINF)
 512  502                                  fxpp->fx_pquantum = FX_TQINF;
 513  503                          else {
 514  504                                  fxpp->fx_pquantum = fxkparmsp->fx_tqntm;
 515  505                          }
 516  506                  }
 517  507  
 518  508          }
 519  509  
 520  510          fxpp->fx_timeleft = fxpp->fx_pquantum;
 521  511          cpucaps_sc_init(&fxpp->fx_caps);
 522  512          fxpp->fx_tp = t;
 523  513  
 524  514          thread_lock(t);                 /* get dispatcher lock on thread */
 525  515          t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 526  516          t->t_cid = cid;
 527  517          t->t_cldata = (void *)fxpp;
 528  518          t->t_schedflag &= ~TS_RUNQMATCH;
 529  519          fx_change_priority(t, fxpp);
 530  520          thread_unlock(t);
 531  521  
 532  522          return (0);
 533  523  }
 534  524  
 535  525  /*
 536  526   * The thread is exiting.
 537  527   */
 538  528  static void
 539  529  fx_exit(kthread_t *t)
 540  530  {
 541  531          fxproc_t *fxpp;
 542  532  
 543  533          thread_lock(t);
 544  534          fxpp = (fxproc_t *)(t->t_cldata);
 545  535  
 546  536          /*
 547  537           * A thread could be exiting in between clock ticks, so we need to
 548  538           * calculate how much CPU time it used since it was charged last time.
 549  539           *
 550  540           * CPU caps are not enforced on exiting processes - it is usually
 551  541           * desirable to exit as soon as possible to free resources.
 552  542           */
 553  543          (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY);
 554  544  
 555  545          if (FX_HAS_CB(fxpp)) {
 556  546                  FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie);
 557  547                  fxpp->fx_callback = NULL;
 558  548                  fxpp->fx_cookie = NULL;
 559  549                  thread_unlock(t);
 560  550                  FX_CB_LIST_DELETE(fxpp);
 561  551                  return;
 562  552          }
 563  553  
 564  554          thread_unlock(t);
 565  555  }
 566  556  
 567  557  /*
 568  558   * Exiting the class. Free fxproc structure of thread.
 569  559   */
 570  560  static void
 571  561  fx_exitclass(void *procp)
 572  562  {
 573  563          fxproc_t *fxpp = (fxproc_t *)procp;
 574  564  
 575  565          thread_lock(fxpp->fx_tp);
 576  566          if (FX_HAS_CB(fxpp)) {
 577  567  
 578  568                  FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie);
 579  569  
 580  570                  fxpp->fx_callback = NULL;
 581  571                  fxpp->fx_cookie = NULL;
 582  572                  thread_unlock(fxpp->fx_tp);
 583  573                  FX_CB_LIST_DELETE(fxpp);
 584  574          } else
 585  575                  thread_unlock(fxpp->fx_tp);
 586  576  
 587  577          kmem_free(fxpp, sizeof (fxproc_t));
 588  578  }
 589  579  
 590  580  /* ARGSUSED */
 591  581  static int
 592  582  fx_canexit(kthread_t *t, cred_t *cred)
 593  583  {
 594  584          /*
 595  585           * A thread can always leave the FX class
 596  586           */
 597  587          return (0);
 598  588  }
 599  589  
 600  590  /*
 601  591   * Initialize fixed-priority class specific proc structure for a child.
 602  592   * callbacks are not inherited upon fork.
 603  593   */
 604  594  static int
 605  595  fx_fork(kthread_t *t, kthread_t *ct, void *bufp)
 606  596  {
 607  597          fxproc_t        *pfxpp;         /* ptr to parent's fxproc structure */
 608  598          fxproc_t        *cfxpp;         /* ptr to child's fxproc structure */
 609  599  
 610  600          ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 611  601  
 612  602          cfxpp = (fxproc_t *)bufp;
 613  603          ASSERT(cfxpp != NULL);
 614  604          thread_lock(t);
 615  605          pfxpp = (fxproc_t *)t->t_cldata;
 616  606          /*
 617  607           * Initialize child's fxproc structure.
 618  608           */
 619  609          cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum;
 620  610          cfxpp->fx_pri = pfxpp->fx_pri;
 621  611          cfxpp->fx_uprilim = pfxpp->fx_uprilim;
 622  612          cfxpp->fx_nice = pfxpp->fx_nice;
 623  613          cfxpp->fx_callback = NULL;
 624  614          cfxpp->fx_cookie = NULL;
 625  615          cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ);
 626  616          cpucaps_sc_init(&cfxpp->fx_caps);
 627  617  
 628  618          cfxpp->fx_tp = ct;
 629  619          ct->t_cldata = (void *)cfxpp;
 630  620          thread_unlock(t);
 631  621  
 632  622          /*
 633  623           * Link new structure into fxproc list.
 634  624           */
 635  625          return (0);
 636  626  }
 637  627  
 638  628  
 639  629  /*
 640  630   * Child is placed at back of dispatcher queue and parent gives
 641  631   * up processor so that the child runs first after the fork.
 642  632   * This allows the child immediately execing to break the multiple
 643  633   * use of copy on write pages with no disk home. The parent will
 644  634   * get to steal them back rather than uselessly copying them.
 645  635   */
 646  636  static void
 647  637  fx_forkret(kthread_t *t, kthread_t *ct)
 648  638  {
 649  639          proc_t  *pp = ttoproc(t);
 650  640          proc_t  *cp = ttoproc(ct);
 651  641          fxproc_t *fxpp;
 652  642  
 653  643          ASSERT(t == curthread);
 654  644          ASSERT(MUTEX_HELD(&pidlock));
 655  645  
 656  646          /*
 657  647           * Grab the child's p_lock before dropping pidlock to ensure
 658  648           * the process does not disappear before we set it running.
 659  649           */
 660  650          mutex_enter(&cp->p_lock);
 661  651          continuelwps(cp);
 662  652          mutex_exit(&cp->p_lock);
 663  653  
 664  654          mutex_enter(&pp->p_lock);
 665  655          mutex_exit(&pidlock);
 666  656          continuelwps(pp);
 667  657  
 668  658          thread_lock(t);
 669  659          fxpp = (fxproc_t *)(t->t_cldata);
 670  660          t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri;
 671  661          ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri);
 672  662          THREAD_TRANSITION(t);
 673  663          fx_setrun(t);
 674  664          thread_unlock(t);
 675  665          /*
 676  666           * Safe to drop p_lock now since it is safe to change
 677  667           * the scheduling class after this point.
 678  668           */
 679  669          mutex_exit(&pp->p_lock);
 680  670  
 681  671          swtch();
 682  672  }
 683  673  
 684  674  
 685  675  /*
 686  676   * Get information about the fixed-priority class into the buffer
 687  677   * pointed to by fxinfop. The maximum configured user priority
 688  678   * is the only information we supply.
 689  679   */
 690  680  static int
 691  681  fx_getclinfo(void *infop)
 692  682  {
 693  683          fxinfo_t *fxinfop = (fxinfo_t *)infop;
 694  684          fxinfop->fx_maxupri = fx_maxupri;
 695  685          return (0);
 696  686  }
 697  687  
 698  688  
 699  689  
 700  690  /*
 701  691   * Return the user mode scheduling priority range.
 702  692   */
 703  693  static int
 704  694  fx_getclpri(pcpri_t *pcprip)
 705  695  {
 706  696          pcprip->pc_clpmax = fx_maxupri;
 707  697          pcprip->pc_clpmin = 0;
 708  698          return (0);
 709  699  }
 710  700  
 711  701  
 712  702  static void
 713  703  fx_nullsys()
 714  704  {}
 715  705  
 716  706  
 717  707  /*
 718  708   * Get the fixed-priority parameters of the thread pointed to by
 719  709   * fxprocp into the buffer pointed to by fxparmsp.
 720  710   */
 721  711  static void
 722  712  fx_parmsget(kthread_t *t, void *parmsp)
 723  713  {
 724  714          fxproc_t *fxpp = (fxproc_t *)t->t_cldata;
 725  715          fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp;
 726  716  
 727  717          fxkparmsp->fx_upri = fxpp->fx_pri;
 728  718          fxkparmsp->fx_uprilim = fxpp->fx_uprilim;
 729  719          fxkparmsp->fx_tqntm = fxpp->fx_pquantum;
 730  720  }
 731  721  
 732  722  
 733  723  
 734  724  /*
 735  725   * Check the validity of the fixed-priority parameters in the buffer
 736  726   * pointed to by fxparmsp.
 737  727   */
 738  728  static int
 739  729  fx_parmsin(void *parmsp)
 740  730  {
 741  731          fxparms_t       *fxparmsp = (fxparms_t *)parmsp;
 742  732          uint_t          cflags;
 743  733          longlong_t      ticks;
 744  734          /*
 745  735           * Check validity of parameters.
 746  736           */
 747  737  
 748  738          if ((fxparmsp->fx_uprilim > fx_maxupri ||
 749  739              fxparmsp->fx_uprilim < 0) &&
 750  740              fxparmsp->fx_uprilim != FX_NOCHANGE)
 751  741                  return (EINVAL);
 752  742  
 753  743          if ((fxparmsp->fx_upri > fx_maxupri ||
 754  744              fxparmsp->fx_upri < 0) &&
 755  745              fxparmsp->fx_upri != FX_NOCHANGE)
 756  746                  return (EINVAL);
 757  747  
 758  748          if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) ||
 759  749              fxparmsp->fx_tqnsecs >= NANOSEC)
 760  750                  return (EINVAL);
 761  751  
 762  752          cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0);
 763  753  
 764  754          if (fxparmsp->fx_uprilim != FX_NOCHANGE) {
 765  755                  cflags |= FX_DOUPRILIM;
 766  756          }
 767  757  
 768  758          if (fxparmsp->fx_tqnsecs != FX_NOCHANGE)
 769  759                  cflags |= FX_DOTQ;
 770  760  
 771  761          /*
 772  762           * convert the buffer to kernel format.
 773  763           */
 774  764  
 775  765          if (fxparmsp->fx_tqnsecs >= 0) {
 776  766                  if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) +
 777  767                      NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX)
 778  768                          return (ERANGE);
 779  769  
 780  770                  ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks;
 781  771          } else {
 782  772                  if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) &&
 783  773                      (fxparmsp->fx_tqnsecs != FX_TQINF) &&
 784  774                      (fxparmsp->fx_tqnsecs != FX_TQDEF))
 785  775                          return (EINVAL);
 786  776                  ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs;
 787  777          }
 788  778  
 789  779          ((fxkparms_t *)fxparmsp)->fx_cflags = cflags;
 790  780  
 791  781          return (0);
 792  782  }
 793  783  
 794  784  
 795  785  /*
 796  786   * Check the validity of the fixed-priority parameters in the pc_vaparms_t
 797  787   * structure vaparmsp and put them in the buffer pointed to by fxprmsp.
 798  788   * pc_vaparms_t contains (key, value) pairs of parameter.
 799  789   */
 800  790  static int
 801  791  fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
 802  792  {
 803  793          uint_t          secs = 0;
 804  794          uint_t          cnt;
 805  795          int             nsecs = 0;
 806  796          int             priflag, secflag, nsecflag, limflag;
 807  797          longlong_t      ticks;
 808  798          fxkparms_t      *fxprmsp = (fxkparms_t *)prmsp;
 809  799          pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 810  800  
 811  801  
 812  802          /*
 813  803           * First check the validity of parameters and convert them
 814  804           * from the user supplied format to the internal format.
 815  805           */
 816  806          priflag = secflag = nsecflag = limflag = 0;
 817  807  
 818  808          fxprmsp->fx_cflags = 0;
 819  809  
 820  810          if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 821  811                  return (EINVAL);
 822  812  
 823  813          for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 824  814  
 825  815                  switch (vpp->pc_key) {
 826  816                  case FX_KY_UPRILIM:
 827  817                          if (limflag++)
 828  818                                  return (EINVAL);
 829  819                          fxprmsp->fx_cflags |= FX_DOUPRILIM;
 830  820                          fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm;
 831  821                          if (fxprmsp->fx_uprilim > fx_maxupri ||
 832  822                              fxprmsp->fx_uprilim < 0)
 833  823                                  return (EINVAL);
 834  824                          break;
 835  825  
 836  826                  case FX_KY_UPRI:
 837  827                          if (priflag++)
 838  828                                  return (EINVAL);
 839  829                          fxprmsp->fx_cflags |= FX_DOUPRI;
 840  830                          fxprmsp->fx_upri = (pri_t)vpp->pc_parm;
 841  831                          if (fxprmsp->fx_upri > fx_maxupri ||
 842  832                              fxprmsp->fx_upri < 0)
 843  833                                  return (EINVAL);
 844  834                          break;
 845  835  
 846  836                  case FX_KY_TQSECS:
 847  837                          if (secflag++)
 848  838                                  return (EINVAL);
 849  839                          fxprmsp->fx_cflags |= FX_DOTQ;
 850  840                          secs = (uint_t)vpp->pc_parm;
 851  841                          break;
 852  842  
 853  843                  case FX_KY_TQNSECS:
 854  844                          if (nsecflag++)
 855  845                                  return (EINVAL);
 856  846                          fxprmsp->fx_cflags |= FX_DOTQ;
 857  847                          nsecs = (int)vpp->pc_parm;
 858  848                          break;
 859  849  
 860  850                  default:
 861  851                          return (EINVAL);
 862  852                  }
 863  853          }
 864  854  
 865  855          if (vaparmsp->pc_vaparmscnt == 0) {
 866  856                  /*
 867  857                   * Use default parameters.
 868  858                   */
 869  859                  fxprmsp->fx_upri = 0;
 870  860                  fxprmsp->fx_uprilim = 0;
 871  861                  fxprmsp->fx_tqntm = FX_TQDEF;
 872  862                  fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ;
 873  863          } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) {
 874  864                  if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
 875  865                          return (EINVAL);
 876  866  
 877  867                  if (nsecs >= 0) {
 878  868                          if ((ticks = SEC_TO_TICK((longlong_t)secs) +
 879  869                              NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
 880  870                                  return (ERANGE);
 881  871  
 882  872                          fxprmsp->fx_tqntm = (int)ticks;
 883  873                  } else {
 884  874                          if (nsecs != FX_TQINF && nsecs != FX_TQDEF)
 885  875                                  return (EINVAL);
 886  876                          fxprmsp->fx_tqntm = nsecs;
 887  877                  }
 888  878          }
 889  879  
 890  880          return (0);
 891  881  }
 892  882  
 893  883  
 894  884  /*
 895  885   * Nothing to do here but return success.
 896  886   */
 897  887  /* ARGSUSED */
 898  888  static int
 899  889  fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp)
 900  890  {
 901  891          register fxkparms_t     *fxkprmsp = (fxkparms_t *)parmsp;
 902  892  
 903  893          if (vaparmsp != NULL)
 904  894                  return (0);
 905  895  
 906  896          if (fxkprmsp->fx_tqntm < 0) {
 907  897                  /*
 908  898                   * Quantum field set to special value (e.g. FX_TQINF)
 909  899                   */
 910  900                  ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm;
 911  901                  ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0;
 912  902  
 913  903          } else {
 914  904                  /* Convert quantum from ticks to seconds-nanoseconds */
 915  905  
 916  906                  timestruc_t ts;
 917  907                  TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts);
 918  908                  ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec;
 919  909                  ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec;
 920  910          }
 921  911  
 922  912          return (0);
 923  913  }
 924  914  
 925  915  
 926  916  /*
 927  917   * Copy all selected fixed-priority class parameters to the user.
 928  918   * The parameters are specified by a key.
 929  919   */
 930  920  static int
 931  921  fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
 932  922  {
 933  923          fxkparms_t      *fxkprmsp = (fxkparms_t *)prmsp;
 934  924          timestruc_t     ts;
 935  925          uint_t          cnt;
 936  926          uint_t          secs;
 937  927          int             nsecs;
 938  928          int             priflag, secflag, nsecflag, limflag;
 939  929          pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 940  930  
 941  931          ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
 942  932  
 943  933          priflag = secflag = nsecflag = limflag = 0;
 944  934  
 945  935          if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 946  936                  return (EINVAL);
 947  937  
 948  938          if (fxkprmsp->fx_tqntm < 0) {
 949  939                  /*
 950  940                   * Quantum field set to special value (e.g. FX_TQINF).
 951  941                   */
 952  942                  secs = 0;
 953  943                  nsecs = fxkprmsp->fx_tqntm;
 954  944          } else {
 955  945                  /*
 956  946                   * Convert quantum from ticks to seconds-nanoseconds.
 957  947                   */
 958  948                  TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts);
 959  949                  secs = ts.tv_sec;
 960  950                  nsecs = ts.tv_nsec;
 961  951          }
 962  952  
 963  953  
 964  954          for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 965  955  
 966  956                  switch (vpp->pc_key) {
 967  957                  case FX_KY_UPRILIM:
 968  958                          if (limflag++)
 969  959                                  return (EINVAL);
 970  960                          if (copyout(&fxkprmsp->fx_uprilim,
 971  961                              (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
 972  962                                  return (EFAULT);
 973  963                          break;
 974  964  
 975  965                  case FX_KY_UPRI:
 976  966                          if (priflag++)
 977  967                                  return (EINVAL);
 978  968                          if (copyout(&fxkprmsp->fx_upri,
 979  969                              (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
 980  970                                  return (EFAULT);
 981  971                          break;
 982  972  
 983  973                  case FX_KY_TQSECS:
 984  974                          if (secflag++)
 985  975                                  return (EINVAL);
 986  976                          if (copyout(&secs,
 987  977                              (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t)))
 988  978                                  return (EFAULT);
 989  979                          break;
 990  980  
 991  981                  case FX_KY_TQNSECS:
 992  982                          if (nsecflag++)
 993  983                                  return (EINVAL);
 994  984                          if (copyout(&nsecs,
 995  985                              (void *)(uintptr_t)vpp->pc_parm, sizeof (int)))
 996  986                                  return (EFAULT);
 997  987                          break;
 998  988  
 999  989                  default:
1000  990                          return (EINVAL);
1001  991                  }
1002  992          }
1003  993  
1004  994          return (0);
1005  995  }
1006  996  
1007  997  /*
1008  998   * Set the scheduling parameters of the thread pointed to by fxprocp
1009  999   * to those specified in the buffer pointed to by fxparmsp.
1010 1000   */
1011 1001  /* ARGSUSED */
1012 1002  static int
1013 1003  fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1014 1004  {
1015 1005          char            nice;
1016 1006          pri_t           reqfxuprilim;
1017 1007          pri_t           reqfxupri;
1018 1008          fxkparms_t      *fxkparmsp = (fxkparms_t *)parmsp;
1019 1009          fxproc_t        *fxpp;
1020 1010  
1021 1011  
1022 1012          ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
1023 1013  
1024 1014          thread_lock(tx);
1025 1015          fxpp = (fxproc_t *)tx->t_cldata;
1026 1016  
1027 1017          if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0)
1028 1018                  reqfxuprilim = fxpp->fx_uprilim;
1029 1019          else
1030 1020                  reqfxuprilim = fxkparmsp->fx_uprilim;
1031 1021  
1032 1022          /*
1033 1023           * Basic permissions enforced by generic kernel code
1034 1024           * for all classes require that a thread attempting
1035 1025           * to change the scheduling parameters of a target
1036 1026           * thread be privileged or have a real or effective
1037 1027           * UID matching that of the target thread. We are not
1038 1028           * called unless these basic permission checks have
1039 1029           * already passed. The fixed priority class requires in
1040 1030           * addition that the calling thread be privileged if it
1041 1031           * is attempting to raise the pri above its current
1042 1032           * value This may have been checked previously but if our
1043 1033           * caller passed us a non-NULL credential pointer we assume
1044 1034           * it hasn't and we check it here.
1045 1035           */
1046 1036  
1047 1037          if ((reqpcredp != NULL) &&
1048 1038              (reqfxuprilim > fxpp->fx_uprilim ||
1049 1039              ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) &&
1050 1040              secpolicy_raisepriority(reqpcredp) != 0) {
1051 1041                  thread_unlock(tx);
1052 1042                  return (EPERM);
1053 1043          }
1054 1044  
1055 1045          FX_ADJUST_PRI(reqfxuprilim);
1056 1046  
1057 1047          if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0)
1058 1048                  reqfxupri = fxpp->fx_pri;
1059 1049          else
1060 1050                  reqfxupri = fxkparmsp->fx_upri;
1061 1051  
1062 1052  
1063 1053          /*
1064 1054           * Make sure the user priority doesn't exceed the upri limit.
1065 1055           */
1066 1056          if (reqfxupri > reqfxuprilim)
1067 1057                  reqfxupri = reqfxuprilim;
1068 1058  
1069 1059          /*
1070 1060           * Set fx_nice to the nice value corresponding to the user
1071 1061           * priority we are setting.  Note that setting the nice field
1072 1062           * of the parameter struct won't affect upri or nice.
1073 1063           */
1074 1064  
1075 1065          nice = NZERO - (reqfxupri * NZERO) / fx_maxupri;
1076 1066  
1077 1067          if (nice > NZERO)
1078 1068                  nice = NZERO;
1079 1069  
1080 1070          fxpp->fx_uprilim = reqfxuprilim;
1081 1071          fxpp->fx_pri = reqfxupri;
1082 1072  
1083 1073          if (fxkparmsp->fx_tqntm == FX_TQINF)
1084 1074                  fxpp->fx_pquantum = FX_TQINF;
1085 1075          else if (fxkparmsp->fx_tqntm == FX_TQDEF)
1086 1076                  fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
1087 1077          else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)
1088 1078                  fxpp->fx_pquantum = fxkparmsp->fx_tqntm;
1089 1079  
1090 1080          fxpp->fx_nice = nice;
1091 1081  
1092 1082          fx_change_priority(tx, fxpp);
1093 1083          thread_unlock(tx);
1094 1084          return (0);
1095 1085  }
1096 1086  
1097 1087  
1098 1088  /*
1099 1089   * Return the global scheduling priority that would be assigned
1100 1090   * to a thread entering the fixed-priority class with the fx_upri.
1101 1091   */
1102 1092  static pri_t
1103 1093  fx_globpri(kthread_t *t)
1104 1094  {
1105 1095          fxproc_t *fxpp;
1106 1096  
1107 1097          ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
1108 1098  
1109 1099          fxpp = (fxproc_t *)t->t_cldata;
1110 1100          return (fx_dptbl[fxpp->fx_pri].fx_globpri);
1111 1101  
1112 1102  }
1113 1103  
1114 1104  /*
1115 1105   * Arrange for thread to be placed in appropriate location
1116 1106   * on dispatcher queue.
1117 1107   *
1118 1108   * This is called with the current thread in TS_ONPROC and locked.
1119 1109   */
1120 1110  static void
1121 1111  fx_preempt(kthread_t *t)
1122 1112  {
1123 1113          fxproc_t        *fxpp = (fxproc_t *)(t->t_cldata);
1124 1114  
1125 1115          ASSERT(t == curthread);
1126 1116          ASSERT(THREAD_LOCK_HELD(curthread));
1127 1117  
1128 1118          (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE);
1129 1119  
1130 1120          /*
1131 1121           * Check to see if we're doing "preemption control" here.  If
1132 1122           * we are, and if the user has requested that this thread not
1133 1123           * be preempted, and if preemptions haven't been put off for
1134 1124           * too long, let the preemption happen here but try to make
1135 1125           * sure the thread is rescheduled as soon as possible.  We do
1136 1126           * this by putting it on the front of the highest priority run
1137 1127           * queue in the FX class.  If the preemption has been put off
1138 1128           * for too long, clear the "nopreempt" bit and let the thread
1139 1129           * be preempted.
1140 1130           */
1141 1131          if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1142 1132                  if (fxpp->fx_pquantum == FX_TQINF ||
1143 1133                      fxpp->fx_timeleft > -SC_MAX_TICKS) {
1144 1134                          DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t);
1145 1135                          schedctl_set_yield(t, 1);
1146 1136                          setfrontdq(t);
1147 1137                          return;
1148 1138                  } else {
1149 1139                          schedctl_set_nopreempt(t, 0);
1150 1140                          DTRACE_SCHED1(schedctl__preempt, kthread_t *, t);
1151 1141                          TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt",
1152 1142                              /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid,
1153 1143                              tnf_lwpid, lwpid, t->t_tid);
1154 1144                          /*
1155 1145                           * Fall through and be preempted below.
1156 1146                           */
1157 1147                  }
1158 1148          }
1159 1149  
1160 1150          if (FX_HAS_CB(fxpp)) {
1161 1151                  clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
1162 1152                  pri_t   newpri = fxpp->fx_pri;
1163 1153                  FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie,
1164 1154                      &new_quantum, &newpri);
1165 1155                  FX_ADJUST_QUANTUM(new_quantum);
1166 1156                  if ((int)new_quantum != fxpp->fx_pquantum) {
1167 1157                          fxpp->fx_pquantum = (int)new_quantum;
1168 1158                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1169 1159                  }
1170 1160                  FX_ADJUST_PRI(newpri);
1171 1161                  fxpp->fx_pri = newpri;
1172 1162                  THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri);
1173 1163          }
1174 1164  
1175 1165          /*
1176 1166           * This thread may be placed on wait queue by CPU Caps. In this case we
1177 1167           * do not need to do anything until it is removed from the wait queue.
1178 1168           */
1179 1169          if (CPUCAPS_ENFORCE(t)) {
1180 1170                  return;
1181 1171          }
1182 1172  
1183 1173          if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) {
1184 1174                  fxpp->fx_timeleft = fxpp->fx_pquantum;
1185 1175                  fxpp->fx_flags &= ~FXBACKQ;
1186 1176                  setbackdq(t);
1187 1177          } else {
1188 1178                  setfrontdq(t);
1189 1179          }
1190 1180  }
1191 1181  
1192 1182  static void
1193 1183  fx_setrun(kthread_t *t)
1194 1184  {
1195 1185          fxproc_t *fxpp = (fxproc_t *)(t->t_cldata);
1196 1186  
1197 1187          ASSERT(THREAD_LOCK_HELD(t));    /* t should be in transition */
1198 1188          fxpp->fx_flags &= ~FXBACKQ;
1199 1189  
1200 1190          if (t->t_disp_time != ddi_get_lbolt())
1201 1191                  setbackdq(t);
1202 1192          else
1203 1193                  setfrontdq(t);
1204 1194  }
1205 1195  
1206 1196  
1207 1197  /*
1208 1198   * Prepare thread for sleep. We reset the thread priority so it will
1209 1199   * run at the kernel priority level when it wakes up.
1210 1200   */
1211 1201  static void
1212 1202  fx_sleep(kthread_t *t)
1213 1203  {
1214 1204          fxproc_t        *fxpp = (fxproc_t *)(t->t_cldata);
1215 1205  
1216 1206          ASSERT(t == curthread);
1217 1207          ASSERT(THREAD_LOCK_HELD(t));
1218 1208  
1219 1209          /*
1220 1210           * Account for time spent on CPU before going to sleep.
1221 1211           */
1222 1212          (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE);
1223 1213  
1224 1214          if (FX_HAS_CB(fxpp)) {
1225 1215                  FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie);
1226 1216          }
1227 1217          t->t_stime = ddi_get_lbolt();           /* time stamp for the swapper */
1228 1218  }
1229 1219  
1230 1220  
1231 1221  /*
1232 1222   * Return Values:
1233 1223   *
1234 1224   *      -1 if the thread is loaded or is not eligible to be swapped in.
1235 1225   *
1236 1226   * FX and RT threads are designed so that they don't swapout; however,
1237 1227   * it is possible that while the thread is swapped out and in another class, it
1238 1228   * can be changed to FX or RT.  Since these threads should be swapped in
1239 1229   * as soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin
1240 1230   * returns SHRT_MAX - 1, so that it gives deference to any swapped out
1241 1231   * RT threads.
1242 1232   */
1243 1233  /* ARGSUSED */
1244 1234  static pri_t
1245 1235  fx_swapin(kthread_t *t, int flags)
1246 1236  {
1247 1237          pri_t   tpri = -1;
1248 1238  
1249 1239          ASSERT(THREAD_LOCK_HELD(t));
1250 1240  
1251 1241          if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
1252 1242                  tpri = (pri_t)SHRT_MAX - 1;
1253 1243          }
1254 1244  
1255 1245          return (tpri);
1256 1246  }
1257 1247  
1258 1248  /*
1259 1249   * Return Values
1260 1250   *      -1 if the thread isn't loaded or is not eligible to be swapped out.
1261 1251   */
1262 1252  /* ARGSUSED */
1263 1253  static pri_t
1264 1254  fx_swapout(kthread_t *t, int flags)
1265 1255  {
1266 1256          ASSERT(THREAD_LOCK_HELD(t));
1267 1257  
1268 1258          return (-1);
1269 1259  
1270 1260  }
1271 1261  
1272 1262  /* ARGSUSED */
1273 1263  static void
1274 1264  fx_stop(kthread_t *t, int why, int what)
1275 1265  {
1276 1266          fxproc_t *fxpp = (fxproc_t *)(t->t_cldata);
1277 1267  
1278 1268          ASSERT(THREAD_LOCK_HELD(t));
1279 1269  
1280 1270          if (FX_HAS_CB(fxpp)) {
1281 1271                  FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie);
1282 1272          }
1283 1273  }
1284 1274  
1285 1275  /*
1286 1276   * Check for time slice expiration.  If time slice has expired
1287 1277   * set runrun to cause preemption.
1288 1278   */
1289 1279  static void
1290 1280  fx_tick(kthread_t *t)
1291 1281  {
1292 1282          boolean_t call_cpu_surrender = B_FALSE;
1293 1283          fxproc_t *fxpp;
1294 1284  
1295 1285          ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1296 1286  
1297 1287          thread_lock(t);
1298 1288  
1299 1289          fxpp = (fxproc_t *)(t->t_cldata);
1300 1290  
1301 1291          if (FX_HAS_CB(fxpp)) {
1302 1292                  clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
1303 1293                  pri_t   newpri = fxpp->fx_pri;
1304 1294                  FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie,
1305 1295                      &new_quantum, &newpri);
1306 1296                  FX_ADJUST_QUANTUM(new_quantum);
1307 1297                  if ((int)new_quantum != fxpp->fx_pquantum) {
1308 1298                          fxpp->fx_pquantum = (int)new_quantum;
1309 1299                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1310 1300                  }
1311 1301                  FX_ADJUST_PRI(newpri);
1312 1302                  if (newpri != fxpp->fx_pri) {
1313 1303                          fxpp->fx_pri = newpri;
1314 1304                          fx_change_priority(t, fxpp);
1315 1305                  }
1316 1306          }
1317 1307  
1318 1308          /*
1319 1309           * Keep track of thread's project CPU usage.  Note that projects
1320 1310           * get charged even when threads are running in the kernel.
1321 1311           */
1322 1312          call_cpu_surrender =  CPUCAPS_CHARGE(t, &fxpp->fx_caps,
1323 1313              CPUCAPS_CHARGE_ENFORCE);
1324 1314  
1325 1315          if ((fxpp->fx_pquantum != FX_TQINF) &&
1326 1316              (--fxpp->fx_timeleft <= 0)) {
1327 1317                  pri_t   new_pri;
1328 1318  
1329 1319                  /*
1330 1320                   * If we're doing preemption control and trying to
1331 1321                   * avoid preempting this thread, just note that
1332 1322                   * the thread should yield soon and let it keep
1333 1323                   * running (unless it's been a while).
1334 1324                   */
1335 1325                  if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1336 1326                          if (fxpp->fx_timeleft > -SC_MAX_TICKS) {
1337 1327                                  DTRACE_SCHED1(schedctl__nopreempt,
1338 1328                                      kthread_t *, t);
1339 1329                                  schedctl_set_yield(t, 1);
1340 1330                                  thread_unlock_nopreempt(t);
1341 1331                                  return;
1342 1332                          }
1343 1333                          TNF_PROBE_2(schedctl_failsafe,
1344 1334                              "schedctl FX fx_tick", /* CSTYLED */,
1345 1335                              tnf_pid, pid, ttoproc(t)->p_pid,
1346 1336                              tnf_lwpid, lwpid, t->t_tid);
1347 1337                  }
1348 1338                  new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri;
1349 1339                  ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri);
1350 1340                  /*
1351 1341                   * When the priority of a thread is changed,
1352 1342                   * it may be necessary to adjust its position
1353 1343                   * on a sleep queue or dispatch queue. Even
1354 1344                   * when the priority is not changed, we need
1355 1345                   * to preserve round robin on dispatch queue.
1356 1346                   * The function thread_change_pri accomplishes
1357 1347                   * this.
1358 1348                   */
1359 1349                  if (thread_change_pri(t, new_pri, 0)) {
1360 1350                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1361 1351                  } else {
1362 1352                          call_cpu_surrender = B_TRUE;
1363 1353                  }
1364 1354          } else if (t->t_state == TS_ONPROC &&
1365 1355              t->t_pri < t->t_disp_queue->disp_maxrunpri) {
1366 1356                  call_cpu_surrender = B_TRUE;
1367 1357          }
1368 1358  
1369 1359          if (call_cpu_surrender) {
1370 1360                  fxpp->fx_flags |= FXBACKQ;
1371 1361                  cpu_surrender(t);
1372 1362          }
1373 1363          thread_unlock_nopreempt(t);     /* clock thread can't be preempted */
1374 1364  }
1375 1365  
1376 1366  
1377 1367  static void
1378 1368  fx_trapret(kthread_t *t)
1379 1369  {
1380 1370          cpu_t           *cp = CPU;
1381 1371  
1382 1372          ASSERT(THREAD_LOCK_HELD(t));
1383 1373          ASSERT(t == curthread);
1384 1374          ASSERT(cp->cpu_dispthread == t);
1385 1375          ASSERT(t->t_state == TS_ONPROC);
1386 1376  }
1387 1377  
1388 1378  
1389 1379  /*
1390 1380   * Processes waking up go to the back of their queue.
1391 1381   */
1392 1382  static void
1393 1383  fx_wakeup(kthread_t *t)
1394 1384  {
1395 1385          fxproc_t        *fxpp = (fxproc_t *)(t->t_cldata);
1396 1386  
1397 1387          ASSERT(THREAD_LOCK_HELD(t));
1398 1388  
1399 1389          t->t_stime = ddi_get_lbolt();           /* time stamp for the swapper */
1400 1390          if (FX_HAS_CB(fxpp)) {
1401 1391                  clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
1402 1392                  pri_t   newpri = fxpp->fx_pri;
1403 1393                  FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie,
1404 1394                      &new_quantum, &newpri);
1405 1395                  FX_ADJUST_QUANTUM(new_quantum);
1406 1396                  if ((int)new_quantum != fxpp->fx_pquantum) {
1407 1397                          fxpp->fx_pquantum = (int)new_quantum;
1408 1398                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1409 1399                  }
1410 1400  
1411 1401                  FX_ADJUST_PRI(newpri);
1412 1402                  if (newpri != fxpp->fx_pri) {
1413 1403                          fxpp->fx_pri = newpri;
1414 1404                          THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri);
1415 1405                  }
1416 1406          }
1417 1407  
1418 1408          fxpp->fx_flags &= ~FXBACKQ;
1419 1409  
1420 1410          if (t->t_disp_time != ddi_get_lbolt())
1421 1411                  setbackdq(t);
1422 1412          else
1423 1413                  setfrontdq(t);
1424 1414  }
1425 1415  
1426 1416  
1427 1417  /*
1428 1418   * When a thread yields, put it on the back of the run queue.
1429 1419   */
1430 1420  static void
1431 1421  fx_yield(kthread_t *t)
1432 1422  {
1433 1423          fxproc_t        *fxpp = (fxproc_t *)(t->t_cldata);
1434 1424  
1435 1425          ASSERT(t == curthread);
1436 1426          ASSERT(THREAD_LOCK_HELD(t));
1437 1427  
1438 1428          /*
1439 1429           * Collect CPU usage spent before yielding CPU.
1440 1430           */
1441 1431          (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE);
1442 1432  
1443 1433          if (FX_HAS_CB(fxpp))  {
1444 1434                  clock_t new_quantum =  (clock_t)fxpp->fx_pquantum;
1445 1435                  pri_t   newpri = fxpp->fx_pri;
1446 1436                  FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie,
1447 1437                      &new_quantum, &newpri);
1448 1438                  FX_ADJUST_QUANTUM(new_quantum);
1449 1439                  if ((int)new_quantum != fxpp->fx_pquantum) {
1450 1440                          fxpp->fx_pquantum = (int)new_quantum;
1451 1441                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1452 1442                  }
1453 1443                  FX_ADJUST_PRI(newpri);
1454 1444                  fxpp->fx_pri = newpri;
1455 1445                  THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri);
1456 1446          }
1457 1447  
1458 1448          /*
1459 1449           * Clear the preemption control "yield" bit since the user is
1460 1450           * doing a yield.
1461 1451           */
1462 1452          if (t->t_schedctl)
1463 1453                  schedctl_set_yield(t, 0);
1464 1454  
1465 1455          if (fxpp->fx_timeleft <= 0) {
1466 1456                  /*
1467 1457                   * Time slice was artificially extended to avoid
1468 1458                   * preemption, so pretend we're preempting it now.
1469 1459                   */
1470 1460                  DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft);
1471 1461                  fxpp->fx_timeleft = fxpp->fx_pquantum;
1472 1462                  THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri);
1473 1463                  ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri);
1474 1464          }
1475 1465  
1476 1466          fxpp->fx_flags &= ~FXBACKQ;
1477 1467          setbackdq(t);
1478 1468  }
1479 1469  
1480 1470  /*
1481 1471   * Increment the nice value of the specified thread by incr and
1482 1472   * return the new value in *retvalp.
1483 1473   */
1484 1474  static int
1485 1475  fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1486 1476  {
1487 1477          int             newnice;
1488 1478          fxproc_t        *fxpp = (fxproc_t *)(t->t_cldata);
1489 1479          fxkparms_t      fxkparms;
1490 1480  
1491 1481          ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1492 1482  
1493 1483          /* If there's no change to priority, just return current setting */
1494 1484          if (incr == 0) {
1495 1485                  if (retvalp) {
1496 1486                          *retvalp = fxpp->fx_nice - NZERO;
1497 1487                  }
1498 1488                  return (0);
1499 1489          }
1500 1490  
1501 1491          if ((incr < 0 || incr > 2 * NZERO) &&
1502 1492              secpolicy_raisepriority(cr) != 0)
1503 1493                  return (EPERM);
1504 1494  
1505 1495          /*
1506 1496           * Specifying a nice increment greater than the upper limit of
1507 1497           * 2 * NZERO - 1 will result in the thread's nice value being
1508 1498           * set to the upper limit.  We check for this before computing
1509 1499           * the new value because otherwise we could get overflow
1510 1500           * if a privileged user specified some ridiculous increment.
1511 1501           */
1512 1502          if (incr > 2 * NZERO - 1)
1513 1503                  incr = 2 * NZERO - 1;
1514 1504  
1515 1505          newnice = fxpp->fx_nice + incr;
1516 1506          if (newnice > NZERO)
1517 1507                  newnice = NZERO;
1518 1508          else if (newnice < 0)
1519 1509                  newnice = 0;
1520 1510  
1521 1511          fxkparms.fx_uprilim = fxkparms.fx_upri =
1522 1512              -((newnice - NZERO) * fx_maxupri) / NZERO;
1523 1513  
1524 1514          fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI;
1525 1515  
1526 1516          fxkparms.fx_tqntm = FX_TQDEF;
1527 1517  
1528 1518          /*
1529 1519           * Reset the uprilim and upri values of the thread. Adjust
1530 1520           * time quantum accordingly.
1531 1521           */
1532 1522  
1533 1523          (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL);
1534 1524  
1535 1525          /*
1536 1526           * Although fx_parmsset already reset fx_nice it may
1537 1527           * not have been set to precisely the value calculated above
1538 1528           * because fx_parmsset determines the nice value from the
1539 1529           * user priority and we may have truncated during the integer
1540 1530           * conversion from nice value to user priority and back.
1541 1531           * We reset fx_nice to the value we calculated above.
1542 1532           */
1543 1533          fxpp->fx_nice = (char)newnice;
1544 1534  
1545 1535          if (retvalp)
1546 1536                  *retvalp = newnice - NZERO;
1547 1537  
1548 1538          return (0);
1549 1539  }
1550 1540  
1551 1541  /*
1552 1542   * Increment the priority of the specified thread by incr and
1553 1543   * return the new value in *retvalp.
1554 1544   */
1555 1545  static int
1556 1546  fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1557 1547  {
1558 1548          int             newpri;
1559 1549          fxproc_t        *fxpp = (fxproc_t *)(t->t_cldata);
1560 1550          fxkparms_t      fxkparms;
1561 1551  
1562 1552          ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1563 1553  
1564 1554          /* If there's no change to priority, just return current setting */
1565 1555          if (incr == 0) {
1566 1556                  *retvalp = fxpp->fx_pri;
1567 1557                  return (0);
1568 1558          }
1569 1559  
1570 1560          newpri = fxpp->fx_pri + incr;
1571 1561          if (newpri > fx_maxupri || newpri < 0)
1572 1562                  return (EINVAL);
1573 1563  
1574 1564          *retvalp = newpri;
1575 1565          fxkparms.fx_uprilim = fxkparms.fx_upri = newpri;
1576 1566          fxkparms.fx_tqntm = FX_NOCHANGE;
1577 1567          fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI;
1578 1568  
1579 1569          /*
1580 1570           * Reset the uprilim and upri values of the thread.
1581 1571           */
1582 1572          return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr));
1583 1573  }
1584 1574  
1585 1575  static void
1586 1576  fx_change_priority(kthread_t *t, fxproc_t *fxpp)
1587 1577  {
1588 1578          pri_t   new_pri;
1589 1579  
1590 1580          ASSERT(THREAD_LOCK_HELD(t));
1591 1581          new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri;
1592 1582          ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri);
1593 1583          t->t_cpri = fxpp->fx_pri;
1594 1584          if (t == curthread || t->t_state == TS_ONPROC) {
1595 1585                  /* curthread is always onproc */
1596 1586                  cpu_t   *cp = t->t_disp_queue->disp_cpu;
1597 1587                  THREAD_CHANGE_PRI(t, new_pri);
1598 1588                  if (t == cp->cpu_dispthread)
1599 1589                          cp->cpu_dispatch_pri = DISP_PRIO(t);
1600 1590                  if (DISP_MUST_SURRENDER(t)) {
1601 1591                          fxpp->fx_flags |= FXBACKQ;
1602 1592                          cpu_surrender(t);
1603 1593                  } else {
1604 1594                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1605 1595                  }
1606 1596          } else {
1607 1597                  /*
1608 1598                   * When the priority of a thread is changed,
1609 1599                   * it may be necessary to adjust its position
1610 1600                   * on a sleep queue or dispatch queue.
1611 1601                   * The function thread_change_pri accomplishes
1612 1602                   * this.
1613 1603                   */
1614 1604                  if (thread_change_pri(t, new_pri, 0)) {
1615 1605                          /*
1616 1606                           * The thread was on a run queue. Reset
1617 1607                           * its CPU timeleft from the quantum
1618 1608                           * associated with the new priority.
1619 1609                           */
1620 1610                          fxpp->fx_timeleft = fxpp->fx_pquantum;
1621 1611                  } else {
1622 1612                          fxpp->fx_flags |= FXBACKQ;
1623 1613                  }
1624 1614          }
1625 1615  }
1626 1616  
1627 1617  static int
1628 1618  fx_alloc(void **p, int flag)
1629 1619  {
1630 1620          void *bufp;
1631 1621  
1632 1622          bufp = kmem_alloc(sizeof (fxproc_t), flag);
1633 1623          if (bufp == NULL) {
1634 1624                  return (ENOMEM);
1635 1625          } else {
1636 1626                  *p = bufp;
1637 1627                  return (0);
1638 1628          }
1639 1629  }
1640 1630  
1641 1631  static void
1642 1632  fx_free(void *bufp)
1643 1633  {
1644 1634          if (bufp)
1645 1635                  kmem_free(bufp, sizeof (fxproc_t));
1646 1636  }
1647 1637  
1648 1638  /*
1649 1639   * Release the callback list mutex after successful lookup
1650 1640   */
1651 1641  void
1652 1642  fx_list_release(fxproc_t *fxpp)
1653 1643  {
1654 1644          int index = FX_CB_LIST_HASH(fxpp->fx_ktid);
1655 1645          kmutex_t *lockp = &fx_cb_list_lock[index];
1656 1646          mutex_exit(lockp);
1657 1647  }
1658 1648  
1659 1649  fxproc_t *
1660 1650  fx_list_lookup(kt_did_t ktid)
1661 1651  {
1662 1652          int index = FX_CB_LIST_HASH(ktid);
1663 1653          kmutex_t *lockp = &fx_cb_list_lock[index];
1664 1654          fxproc_t *fxpp;
1665 1655  
1666 1656          mutex_enter(lockp);
1667 1657  
1668 1658          for (fxpp = fx_cb_plisthead[index].fx_cb_next;
1669 1659              fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) {
1670 1660                  if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid &&
1671 1661                      fxpp->fx_callback != NULL) {
1672 1662                          /*
1673 1663                           * The caller is responsible for calling
1674 1664                           * fx_list_release to drop the lock upon
1675 1665                           * successful lookup
1676 1666                           */
1677 1667                          return (fxpp);
1678 1668                  }
1679 1669          }
1680 1670          mutex_exit(lockp);
1681 1671          return ((fxproc_t *)NULL);
1682 1672  }
1683 1673  
1684 1674  
1685 1675  /*
1686 1676   * register a callback set of routines for current thread
1687 1677   * thread should already be in FX class
1688 1678   */
1689 1679  int
1690 1680  fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie,
1691 1681          pri_t pri, clock_t quantum)
1692 1682  {
1693 1683  
1694 1684          fxproc_t        *fxpp;
1695 1685  
1696 1686          if (fx_callback == NULL)
1697 1687                  return (EINVAL);
1698 1688  
1699 1689          if (secpolicy_dispadm(CRED()) != 0)
1700 1690                  return (EPERM);
1701 1691  
1702 1692          if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV)
1703 1693                  return (EINVAL);
1704 1694  
1705 1695          if (!FX_ISVALID(pri, quantum))
1706 1696                  return (EINVAL);
1707 1697  
1708 1698          thread_lock(curthread);         /* get dispatcher lock on thread */
1709 1699  
1710 1700          if (curthread->t_cid != fx_cid) {
1711 1701                  thread_unlock(curthread);
1712 1702                  return (EINVAL);
1713 1703          }
1714 1704  
1715 1705          fxpp = (fxproc_t *)(curthread->t_cldata);
1716 1706          ASSERT(fxpp != NULL);
1717 1707          if (FX_HAS_CB(fxpp)) {
1718 1708                  thread_unlock(curthread);
1719 1709                  return (EINVAL);
1720 1710          }
1721 1711  
1722 1712          fxpp->fx_callback = fx_callback;
1723 1713          fxpp->fx_cookie = cookie;
1724 1714  
1725 1715          if (pri != FX_CB_NOCHANGE) {
1726 1716                  fxpp->fx_pri = pri;
1727 1717                  FX_ADJUST_PRI(fxpp->fx_pri);
1728 1718                  if (quantum == FX_TQDEF) {
1729 1719                          fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
1730 1720                  } else if (quantum == FX_TQINF) {
1731 1721                          fxpp->fx_pquantum = FX_TQINF;
1732 1722                  } else if (quantum != FX_NOCHANGE) {
1733 1723                          FX_ADJUST_QUANTUM(quantum);
1734 1724                          fxpp->fx_pquantum = quantum;
1735 1725                  }
1736 1726          } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) {
1737 1727                  if (quantum == FX_TQINF)
1738 1728                          fxpp->fx_pquantum = FX_TQINF;
1739 1729                  else {
1740 1730                          FX_ADJUST_QUANTUM(quantum);
1741 1731                          fxpp->fx_pquantum = quantum;
1742 1732                  }
1743 1733          }
1744 1734  
1745 1735          fxpp->fx_ktid = ddi_get_kt_did();
1746 1736  
1747 1737          fx_change_priority(curthread, fxpp);
1748 1738  
1749 1739          thread_unlock(curthread);
1750 1740  
1751 1741          /*
1752 1742           * Link new structure into fxproc list.
1753 1743           */
1754 1744          FX_CB_LIST_INSERT(fxpp);
1755 1745          return (0);
1756 1746  }
1757 1747  
1758 1748  /* unregister a callback set of routines for current thread */
1759 1749  int
1760 1750  fx_unregister_callbacks()
1761 1751  {
1762 1752          fxproc_t        *fxpp;
1763 1753  
1764 1754          if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) {
1765 1755                  /*
1766 1756                   * did not have a registered callback;
1767 1757                   */
1768 1758                  return (EINVAL);
1769 1759          }
1770 1760  
1771 1761          thread_lock(fxpp->fx_tp);
1772 1762          fxpp->fx_callback = NULL;
1773 1763          fxpp->fx_cookie = NULL;
1774 1764          thread_unlock(fxpp->fx_tp);
1775 1765          fx_list_release(fxpp);
1776 1766  
1777 1767          FX_CB_LIST_DELETE(fxpp);
1778 1768          return (0);
1779 1769  }
1780 1770  
1781 1771  /*
1782 1772   * modify priority and/or quantum value of a thread with callback
1783 1773   */
1784 1774  int
1785 1775  fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri)
1786 1776  {
1787 1777          fxproc_t        *fxpp;
1788 1778  
1789 1779          if (!FX_ISVALID(pri, quantum))
1790 1780                  return (EINVAL);
1791 1781  
1792 1782          if ((fxpp = fx_list_lookup(ktid)) == NULL) {
1793 1783                  /*
1794 1784                   * either thread had exited or did not have a registered
1795 1785                   * callback;
1796 1786                   */
1797 1787                  return (ESRCH);
1798 1788          }
1799 1789  
1800 1790          thread_lock(fxpp->fx_tp);
1801 1791  
1802 1792          if (pri != FX_CB_NOCHANGE) {
1803 1793                  fxpp->fx_pri = pri;
1804 1794                  FX_ADJUST_PRI(fxpp->fx_pri);
1805 1795                  if (quantum == FX_TQDEF) {
1806 1796                          fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum;
1807 1797                  } else if (quantum == FX_TQINF) {
1808 1798                          fxpp->fx_pquantum = FX_TQINF;
1809 1799                  } else if (quantum != FX_NOCHANGE) {
1810 1800                          FX_ADJUST_QUANTUM(quantum);
1811 1801                          fxpp->fx_pquantum = quantum;
1812 1802                  }
1813 1803          } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) {
1814 1804                  if (quantum == FX_TQINF) {
1815 1805                          fxpp->fx_pquantum = FX_TQINF;
1816 1806                  } else {
1817 1807                          FX_ADJUST_QUANTUM(quantum);
1818 1808                          fxpp->fx_pquantum = quantum;
1819 1809                  }
1820 1810          }
1821 1811  
1822 1812          fx_change_priority(fxpp->fx_tp, fxpp);
1823 1813  
1824 1814          thread_unlock(fxpp->fx_tp);
1825 1815          fx_list_release(fxpp);
1826 1816          return (0);
1827 1817  }
1828 1818  
1829 1819  
1830 1820  /*
1831 1821   * return an iblock cookie for mutex initialization to be used in callbacks
1832 1822   */
1833 1823  void *
1834 1824  fx_get_mutex_cookie()
1835 1825  {
1836 1826          return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL));
1837 1827  }
1838 1828  
1839 1829  /*
1840 1830   * return maximum relative priority
1841 1831   */
1842 1832  pri_t
1843 1833  fx_get_maxpri()
1844 1834  {
1845 1835          return (fx_maxumdpri);
1846 1836  }
  
    | 
      ↓ open down ↓ | 
    1753 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX