1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2015 Joyent, Inc.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/cred.h>
  35 #include <sys/proc.h>
  36 #include <sys/pcb.h>
  37 #include <sys/signal.h>
  38 #include <sys/user.h>
  39 #include <sys/priocntl.h>
  40 #include <sys/class.h>
  41 #include <sys/disp.h>
  42 #include <sys/procset.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/debug.h>
  45 #include <sys/rt.h>
  46 #include <sys/rtpriocntl.h>
  47 #include <sys/kmem.h>
  48 #include <sys/systm.h>
  49 #include <sys/schedctl.h>
  50 #include <sys/errno.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/time.h>
  54 #include <sys/policy.h>
  55 #include <sys/sdt.h>
  56 #include <sys/cpupart.h>
  57 #include <sys/modctl.h>
  58 
  59 static pri_t    rt_init(id_t, int, classfuncs_t **);
  60 
  61 static struct sclass csw = {
  62         "RT",
  63         rt_init,
  64         0
  65 };
  66 
  67 static struct modlsched modlsched = {
  68         &mod_schedops, "realtime scheduling class", &csw
  69 };
  70 
  71 static struct modlinkage modlinkage = {
  72         MODREV_1, (void *)&modlsched, NULL
  73 };
  74 
  75 int
  76 _init()
  77 {
  78         return (mod_install(&modlinkage));
  79 }
  80 
  81 int
  82 _fini()
  83 {
  84         return (EBUSY);         /* don't remove RT for now */
  85 }
  86 
  87 int
  88 _info(struct modinfo *modinfop)
  89 {
  90         return (mod_info(&modlinkage, modinfop));
  91 }
  92 
  93 
  94 /*
  95  * Class specific code for the real-time class
  96  */
  97 
  98 /*
  99  * Extern declarations for variables defined in the rt master file
 100  */
 101 #define RTMAXPRI 59
 102 
 103 pri_t rt_maxpri = RTMAXPRI;     /* maximum real-time priority */
 104 rtdpent_t *rt_dptbl;      /* real-time dispatcher parameter table */
 105 
 106 static int      rt_admin(caddr_t, cred_t *);
 107 static int      rt_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
 108 static int      rt_fork(kthread_t *, kthread_t *, void *);
 109 static int      rt_getclinfo(void *);
 110 static int      rt_getclpri(pcpri_t *);
 111 static int      rt_parmsin(void *);
 112 static int      rt_parmsout(void *, pc_vaparms_t *);
 113 static int      rt_vaparmsin(void *, pc_vaparms_t *);
 114 static int      rt_vaparmsout(void *, pc_vaparms_t *);
 115 static int      rt_parmsset(kthread_t *, void *, id_t, cred_t *);
 116 static int      rt_donice(kthread_t *, cred_t *, int, int *);
 117 static int      rt_doprio(kthread_t *, cred_t *, int, int *);
 118 static void     rt_exitclass(void *);
 119 static int      rt_canexit(kthread_t *, cred_t *);
 120 static void     rt_forkret(kthread_t *, kthread_t *);
 121 static void     rt_nullsys();
 122 static void     rt_parmsget(kthread_t *, void *);
 123 static void     rt_preempt(kthread_t *);
 124 static void     rt_setrun(kthread_t *);
 125 static void     rt_tick(kthread_t *);
 126 static void     rt_wakeup(kthread_t *);
 127 static pri_t    rt_swapin(kthread_t *, int);
 128 static pri_t    rt_swapout(kthread_t *, int);
 129 static pri_t    rt_globpri(kthread_t *);
 130 static void     rt_yield(kthread_t *);
 131 static int      rt_alloc(void **, int);
 132 static void     rt_free(void *);
 133 
 134 static void     rt_change_priority(kthread_t *, rtproc_t *);
 135 
 136 static id_t     rt_cid;         /* real-time class ID */
 137 static rtproc_t rt_plisthead;   /* dummy rtproc at head of rtproc list */
 138 static kmutex_t rt_dptblock;    /* protects realtime dispatch table */
 139 static kmutex_t rt_list_lock;   /* protects RT thread list */
 140 
 141 extern rtdpent_t *rt_getdptbl(void);
 142 
 143 static struct classfuncs rt_classfuncs = {
 144         /* class ops */
 145         rt_admin,
 146         rt_getclinfo,
 147         rt_parmsin,
 148         rt_parmsout,
 149         rt_vaparmsin,
 150         rt_vaparmsout,
 151         rt_getclpri,
 152         rt_alloc,
 153         rt_free,
 154         /* thread ops */
 155         rt_enterclass,
 156         rt_exitclass,
 157         rt_canexit,
 158         rt_fork,
 159         rt_forkret,
 160         rt_parmsget,
 161         rt_parmsset,
 162         rt_nullsys,     /* stop */
 163         rt_nullsys,     /* exit */
 164         rt_nullsys,     /* active */
 165         rt_nullsys,     /* inactive */
 166         rt_swapin,
 167         rt_swapout,
 168         rt_nullsys,     /* trapret */
 169         rt_preempt,
 170         rt_setrun,
 171         rt_nullsys,     /* sleep */
 172         rt_tick,
 173         rt_wakeup,
 174         rt_donice,
 175         rt_globpri,
 176         rt_nullsys,     /* set_process_group */
 177         rt_yield,
 178         rt_doprio,
 179 };
 180 
 181 /*
 182  * Real-time class initialization. Called by dispinit() at boot time.
 183  * We can ignore the clparmsz argument since we know that the smallest
 184  * possible parameter buffer is big enough for us.
 185  */
 186 /* ARGSUSED */
 187 pri_t
 188 rt_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 189 {
 190         rt_dptbl = rt_getdptbl();
 191         rt_cid = cid;   /* Record our class ID */
 192 
 193         /*
 194          * Initialize the rtproc list.
 195          */
 196         rt_plisthead.rt_next = rt_plisthead.rt_prev = &rt_plisthead;
 197 
 198         /*
 199          * We're required to return a pointer to our classfuncs
 200          * structure and the highest global priority value we use.
 201          */
 202         *clfuncspp = &rt_classfuncs;
 203         mutex_init(&rt_dptblock, NULL, MUTEX_DEFAULT, NULL);
 204         mutex_init(&rt_list_lock, NULL, MUTEX_DEFAULT, NULL);
 205         return (rt_dptbl[rt_maxpri].rt_globpri);
 206 }
 207 
 208 /*
 209  * Get or reset the rt_dptbl values per the user's request.
 210  */
 211 /* ARGSUSED */
 212 static int
 213 rt_admin(caddr_t uaddr, cred_t *reqpcredp)
 214 {
 215         rtadmin_t       rtadmin;
 216         rtdpent_t       *tmpdpp;
 217         size_t          userdpsz;
 218         size_t          rtdpsz;
 219         int             i;
 220 
 221         if (get_udatamodel() == DATAMODEL_NATIVE) {
 222                 if (copyin(uaddr, &rtadmin, sizeof (rtadmin_t)))
 223                         return (EFAULT);
 224         }
 225 #ifdef _SYSCALL32_IMPL
 226         else {
 227                 /* rtadmin struct from ILP32 callers */
 228                 rtadmin32_t rtadmin32;
 229                 if (copyin(uaddr, &rtadmin32, sizeof (rtadmin32_t)))
 230                         return (EFAULT);
 231                 rtadmin.rt_dpents =
 232                     (struct rtdpent *)(uintptr_t)rtadmin32.rt_dpents;
 233                 rtadmin.rt_ndpents = rtadmin32.rt_ndpents;
 234                 rtadmin.rt_cmd = rtadmin32.rt_cmd;
 235         }
 236 #endif /* _SYSCALL32_IMPL */
 237 
 238         rtdpsz = (rt_maxpri + 1) * sizeof (rtdpent_t);
 239 
 240         switch (rtadmin.rt_cmd) {
 241 
 242         case RT_GETDPSIZE:
 243                 rtadmin.rt_ndpents = rt_maxpri + 1;
 244 
 245                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 246                         if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
 247                                 return (EFAULT);
 248                 }
 249 #ifdef _SYSCALL32_IMPL
 250                 else {
 251                         /* return rtadmin struct to ILP32 callers */
 252                         rtadmin32_t rtadmin32;
 253                         rtadmin32.rt_dpents =
 254                             (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
 255                         rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
 256                         rtadmin32.rt_cmd = rtadmin.rt_cmd;
 257                         if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
 258                                 return (EFAULT);
 259                 }
 260 #endif /* _SYSCALL32_IMPL */
 261 
 262                 break;
 263 
 264         case RT_GETDPTBL:
 265                 userdpsz = MIN(rtadmin.rt_ndpents * sizeof (rtdpent_t),
 266                     rtdpsz);
 267                 if (copyout(rt_dptbl, rtadmin.rt_dpents, userdpsz))
 268                         return (EFAULT);
 269                 rtadmin.rt_ndpents = userdpsz / sizeof (rtdpent_t);
 270 
 271                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 272                         if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
 273                                 return (EFAULT);
 274                 }
 275 #ifdef _SYSCALL32_IMPL
 276                 else {
 277                         /* return rtadmin struct to ILP32 callers */
 278                         rtadmin32_t rtadmin32;
 279                         rtadmin32.rt_dpents =
 280                             (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
 281                         rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
 282                         rtadmin32.rt_cmd = rtadmin.rt_cmd;
 283                         if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
 284                                 return (EFAULT);
 285                 }
 286 #endif /* _SYSCALL32_IMPL */
 287                 break;
 288 
 289         case RT_SETDPTBL:
 290                 /*
 291                  * We require that the requesting process has sufficient
 292                  * priveleges.  We also require that the table supplied by
 293                  * the user exactly match the current rt_dptbl in size.
 294                  */
 295                 if (secpolicy_dispadm(reqpcredp) != 0)
 296                         return (EPERM);
 297                 if (rtadmin.rt_ndpents * sizeof (rtdpent_t) != rtdpsz)
 298                         return (EINVAL);
 299 
 300                 /*
 301                  * We read the user supplied table into a temporary buffer
 302                  * where the time quantum values are validated before
 303                  * being copied to the rt_dptbl.
 304                  */
 305                 tmpdpp = kmem_alloc(rtdpsz, KM_SLEEP);
 306                 if (copyin(rtadmin.rt_dpents, tmpdpp, rtdpsz)) {
 307                         kmem_free(tmpdpp, rtdpsz);
 308                         return (EFAULT);
 309                 }
 310                 for (i = 0; i < rtadmin.rt_ndpents; i++) {
 311 
 312                         /*
 313                          * Validate the user supplied time quantum values.
 314                          */
 315                         if (tmpdpp[i].rt_quantum <= 0 &&
 316                             tmpdpp[i].rt_quantum != RT_TQINF) {
 317                                 kmem_free(tmpdpp, rtdpsz);
 318                                 return (EINVAL);
 319                         }
 320                 }
 321 
 322                 /*
 323                  * Copy the user supplied values over the current rt_dptbl
 324                  * values.  The rt_globpri member is read-only so we don't
 325                  * overwrite it.
 326                  */
 327                 mutex_enter(&rt_dptblock);
 328                 for (i = 0; i < rtadmin.rt_ndpents; i++)
 329                         rt_dptbl[i].rt_quantum = tmpdpp[i].rt_quantum;
 330                 mutex_exit(&rt_dptblock);
 331                 kmem_free(tmpdpp, rtdpsz);
 332                 break;
 333 
 334         default:
 335                 return (EINVAL);
 336         }
 337         return (0);
 338 }
 339 
 340 
 341 /*
 342  * Allocate a real-time class specific proc structure and
 343  * initialize it with the parameters supplied. Also move thread
 344  * to specified real-time priority.
 345  */
 346 /* ARGSUSED */
 347 static int
 348 rt_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
 349     void *bufp)
 350 {
 351         rtkparms_t *rtkparmsp = (rtkparms_t *)parmsp;
 352         rtproc_t *rtpp;
 353 
 354         /*
 355          * For a thread to enter the real-time class the thread
 356          * which initiates the request must be privileged.
 357          * This may have been checked previously but if our
 358          * caller passed us a credential structure we assume it
 359          * hasn't and we check it here.
 360          */
 361         if (reqpcredp != NULL && secpolicy_setpriority(reqpcredp) != 0)
 362                 return (EPERM);
 363 
 364         rtpp = (rtproc_t *)bufp;
 365         ASSERT(rtpp != NULL);
 366 
 367         /*
 368          * If this thread's lwp is swapped out, it will be brought in
 369          * when it is put onto the runqueue.
 370          *
 371          * Now, Initialize the rtproc structure.
 372          */
 373         if (rtkparmsp == NULL) {
 374                 /*
 375                  * Use default values
 376                  */
 377                 rtpp->rt_pri = 0;
 378                 rtpp->rt_pquantum = rt_dptbl[0].rt_quantum;
 379                 rtpp->rt_tqsignal = 0;
 380         } else {
 381                 /*
 382                  * Use supplied values
 383                  */
 384                 if ((rtkparmsp->rt_cflags & RT_DOPRI) == 0)
 385                         rtpp->rt_pri = 0;
 386                 else
 387                         rtpp->rt_pri = rtkparmsp->rt_pri;
 388 
 389                 if (rtkparmsp->rt_tqntm == RT_TQINF)
 390                         rtpp->rt_pquantum = RT_TQINF;
 391                 else if (rtkparmsp->rt_tqntm == RT_TQDEF ||
 392                     (rtkparmsp->rt_cflags & RT_DOTQ) == 0)
 393                         rtpp->rt_pquantum = rt_dptbl[rtpp->rt_pri].rt_quantum;
 394                 else
 395                         rtpp->rt_pquantum = rtkparmsp->rt_tqntm;
 396 
 397                 if ((rtkparmsp->rt_cflags & RT_DOSIG) == 0)
 398                         rtpp->rt_tqsignal = 0;
 399                 else
 400                         rtpp->rt_tqsignal = rtkparmsp->rt_tqsig;
 401         }
 402         rtpp->rt_flags = 0;
 403         rtpp->rt_tp = t;
 404         /*
 405          * Reset thread priority
 406          */
 407         thread_lock(t);
 408         t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 409         t->t_cid = cid;
 410         t->t_cldata = (void *)rtpp;
 411         t->t_schedflag &= ~TS_RUNQMATCH;
 412         rt_change_priority(t, rtpp);
 413         thread_unlock(t);
 414         /*
 415          * Link new structure into rtproc list
 416          */
 417         mutex_enter(&rt_list_lock);
 418         rtpp->rt_next = rt_plisthead.rt_next;
 419         rtpp->rt_prev = &rt_plisthead;
 420         rt_plisthead.rt_next->rt_prev = rtpp;
 421         rt_plisthead.rt_next = rtpp;
 422         mutex_exit(&rt_list_lock);
 423         return (0);
 424 }
 425 
 426 
 427 /*
 428  * Free rtproc structure of thread.
 429  */
 430 static void
 431 rt_exitclass(void *procp)
 432 {
 433         rtproc_t *rtprocp = (rtproc_t *)procp;
 434 
 435         mutex_enter(&rt_list_lock);
 436         rtprocp->rt_prev->rt_next = rtprocp->rt_next;
 437         rtprocp->rt_next->rt_prev = rtprocp->rt_prev;
 438         mutex_exit(&rt_list_lock);
 439         kmem_free(rtprocp, sizeof (rtproc_t));
 440 }
 441 
 442 
 443 /*
 444  * Allocate and initialize real-time class specific
 445  * proc structure for child.
 446  */
 447 /* ARGSUSED */
 448 static int
 449 rt_fork(kthread_t *t, kthread_t *ct, void *bufp)
 450 {
 451         rtproc_t *prtpp;
 452         rtproc_t *crtpp;
 453 
 454         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 455 
 456         /*
 457          * Initialize child's rtproc structure
 458          */
 459         crtpp = (rtproc_t *)bufp;
 460         ASSERT(crtpp != NULL);
 461         prtpp = (rtproc_t *)t->t_cldata;
 462         thread_lock(t);
 463         crtpp->rt_timeleft = crtpp->rt_pquantum = prtpp->rt_pquantum;
 464         crtpp->rt_pri = prtpp->rt_pri;
 465         crtpp->rt_flags = prtpp->rt_flags & ~RTBACKQ;
 466         crtpp->rt_tqsignal = prtpp->rt_tqsignal;
 467 
 468         crtpp->rt_tp = ct;
 469         thread_unlock(t);
 470 
 471         /*
 472          * Link new structure into rtproc list
 473          */
 474         ct->t_cldata = (void *)crtpp;
 475         mutex_enter(&rt_list_lock);
 476         crtpp->rt_next = rt_plisthead.rt_next;
 477         crtpp->rt_prev = &rt_plisthead;
 478         rt_plisthead.rt_next->rt_prev = crtpp;
 479         rt_plisthead.rt_next = crtpp;
 480         mutex_exit(&rt_list_lock);
 481         return (0);
 482 }
 483 
 484 
 485 /*
 486  * The child goes to the back of its dispatcher queue while the
 487  * parent continues to run after a real time thread forks.
 488  */
 489 /* ARGSUSED */
 490 static void
 491 rt_forkret(kthread_t *t, kthread_t *ct)
 492 {
 493         proc_t *pp = ttoproc(t);
 494         proc_t *cp = ttoproc(ct);
 495 
 496         ASSERT(t == curthread);
 497         ASSERT(MUTEX_HELD(&pidlock));
 498 
 499         /*
 500          * Grab the child's p_lock before dropping pidlock to ensure
 501          * the process does not disappear before we set it running.
 502          */
 503         mutex_enter(&cp->p_lock);
 504         mutex_exit(&pidlock);
 505         continuelwps(cp);
 506         mutex_exit(&cp->p_lock);
 507 
 508         mutex_enter(&pp->p_lock);
 509         continuelwps(pp);
 510         mutex_exit(&pp->p_lock);
 511 }
 512 
 513 
 514 /*
 515  * Get information about the real-time class into the buffer
 516  * pointed to by rtinfop.  The maximum configured real-time
 517  * priority is the only information we supply.  We ignore the
 518  * class and credential arguments because anyone can have this
 519  * information.
 520  */
 521 /* ARGSUSED */
 522 static int
 523 rt_getclinfo(void *infop)
 524 {
 525         rtinfo_t *rtinfop = (rtinfo_t *)infop;
 526         rtinfop->rt_maxpri = rt_maxpri;
 527         return (0);
 528 }
 529 
 530 /*
 531  * Return the user mode scheduling priority range.
 532  */
 533 static int
 534 rt_getclpri(pcpri_t *pcprip)
 535 {
 536         pcprip->pc_clpmax = rt_maxpri;
 537         pcprip->pc_clpmin = 0;
 538         return (0);
 539 }
 540 
 541 static void
 542 rt_nullsys()
 543 {
 544 }
 545 
 546 /* ARGSUSED */
 547 static int
 548 rt_canexit(kthread_t *t, cred_t *cred)
 549 {
 550         /*
 551          * Thread can always leave RT class
 552          */
 553         return (0);
 554 }
 555 
 556 /*
 557  * Get the real-time scheduling parameters of the thread pointed to by
 558  * rtprocp into the buffer pointed to by rtkparmsp.
 559  */
 560 static void
 561 rt_parmsget(kthread_t *t, void *parmsp)
 562 {
 563         rtproc_t        *rtprocp = (rtproc_t *)t->t_cldata;
 564         rtkparms_t      *rtkparmsp = (rtkparms_t *)parmsp;
 565 
 566         rtkparmsp->rt_pri = rtprocp->rt_pri;
 567         rtkparmsp->rt_tqntm = rtprocp->rt_pquantum;
 568         rtkparmsp->rt_tqsig = rtprocp->rt_tqsignal;
 569 }
 570 
 571 
 572 
 573 /*
 574  * Check the validity of the real-time parameters in the buffer
 575  * pointed to by rtprmsp.
 576  * We convert the rtparms buffer from the user supplied format to
 577  * our internal format (i.e. time quantum expressed in ticks).
 578  */
 579 static int
 580 rt_parmsin(void *prmsp)
 581 {
 582         rtparms_t *rtprmsp = (rtparms_t *)prmsp;
 583         longlong_t      ticks;
 584         uint_t          cflags;
 585 
 586         /*
 587          * First check the validity of parameters and convert
 588          * the buffer to kernel format.
 589          */
 590         if ((rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri) &&
 591             rtprmsp->rt_pri != RT_NOCHANGE)
 592                 return (EINVAL);
 593 
 594         cflags = (rtprmsp->rt_pri != RT_NOCHANGE ? RT_DOPRI : 0);
 595 
 596         if ((rtprmsp->rt_tqsecs == 0 && rtprmsp->rt_tqnsecs == 0) ||
 597             rtprmsp->rt_tqnsecs >= NANOSEC)
 598                 return (EINVAL);
 599 
 600         if (rtprmsp->rt_tqnsecs != RT_NOCHANGE)
 601                 cflags |= RT_DOTQ;
 602 
 603         if (rtprmsp->rt_tqnsecs >= 0) {
 604                 if ((ticks = SEC_TO_TICK((longlong_t)rtprmsp->rt_tqsecs) +
 605                     NSEC_TO_TICK_ROUNDUP(rtprmsp->rt_tqnsecs)) > INT_MAX)
 606                         return (ERANGE);
 607 
 608                 ((rtkparms_t *)rtprmsp)->rt_tqntm = (int)ticks;
 609         } else {
 610                 if (rtprmsp->rt_tqnsecs != RT_NOCHANGE &&
 611                     rtprmsp->rt_tqnsecs != RT_TQINF &&
 612                     rtprmsp->rt_tqnsecs != RT_TQDEF)
 613                         return (EINVAL);
 614 
 615                 ((rtkparms_t *)rtprmsp)->rt_tqntm = rtprmsp->rt_tqnsecs;
 616         }
 617         ((rtkparms_t *)rtprmsp)->rt_cflags = cflags;
 618 
 619         return (0);
 620 }
 621 
 622 
 623 /*
 624  * Check the validity of the real-time parameters in the pc_vaparms_t
 625  * structure vaparmsp and put them in the buffer pointed to by rtprmsp.
 626  * pc_vaparms_t contains (key, value) pairs of parameter.
 627  * rt_vaparmsin() is the variable parameter version of rt_parmsin().
 628  */
 629 static int
 630 rt_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
 631 {
 632         uint_t          secs = 0;
 633         uint_t          cnt;
 634         int             nsecs = 0;
 635         int             priflag, secflag, nsecflag, sigflag;
 636         longlong_t      ticks;
 637         rtkparms_t      *rtprmsp = (rtkparms_t *)prmsp;
 638         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 639 
 640 
 641         /*
 642          * First check the validity of parameters and convert them
 643          * from the user supplied format to the internal format.
 644          */
 645         priflag = secflag = nsecflag = sigflag = 0;
 646         rtprmsp->rt_cflags = 0;
 647 
 648         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 649                 return (EINVAL);
 650 
 651         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 652 
 653                 switch (vpp->pc_key) {
 654                 case RT_KY_PRI:
 655                         if (priflag++)
 656                                 return (EINVAL);
 657                         rtprmsp->rt_cflags |= RT_DOPRI;
 658                         rtprmsp->rt_pri = (pri_t)vpp->pc_parm;
 659                         if (rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri)
 660                                 return (EINVAL);
 661                         break;
 662 
 663                 case RT_KY_TQSECS:
 664                         if (secflag++)
 665                                 return (EINVAL);
 666                         rtprmsp->rt_cflags |= RT_DOTQ;
 667                         secs = (uint_t)vpp->pc_parm;
 668                         break;
 669 
 670                 case RT_KY_TQNSECS:
 671                         if (nsecflag++)
 672                                 return (EINVAL);
 673                         rtprmsp->rt_cflags |= RT_DOTQ;
 674                         nsecs = (int)vpp->pc_parm;
 675                         break;
 676 
 677                 case RT_KY_TQSIG:
 678                         if (sigflag++)
 679                                 return (EINVAL);
 680                         rtprmsp->rt_cflags |= RT_DOSIG;
 681                         rtprmsp->rt_tqsig = (int)vpp->pc_parm;
 682                         if (rtprmsp->rt_tqsig < 0 || rtprmsp->rt_tqsig >= NSIG)
 683                                 return (EINVAL);
 684                         break;
 685 
 686                 default:
 687                         return (EINVAL);
 688                 }
 689         }
 690 
 691         if (vaparmsp->pc_vaparmscnt == 0) {
 692                 /*
 693                  * Use default parameters.
 694                  */
 695                 rtprmsp->rt_pri = 0;
 696                 rtprmsp->rt_tqntm = RT_TQDEF;
 697                 rtprmsp->rt_tqsig = 0;
 698                 rtprmsp->rt_cflags = RT_DOPRI | RT_DOTQ | RT_DOSIG;
 699         } else if ((rtprmsp->rt_cflags & RT_DOTQ) != 0) {
 700                 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
 701                         return (EINVAL);
 702 
 703                 if (nsecs >= 0) {
 704                         if ((ticks = SEC_TO_TICK((longlong_t)secs) +
 705                             NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
 706                                 return (ERANGE);
 707 
 708                         rtprmsp->rt_tqntm = (int)ticks;
 709                 } else {
 710                         if (nsecs != RT_TQINF && nsecs != RT_TQDEF)
 711                                 return (EINVAL);
 712                         rtprmsp->rt_tqntm = nsecs;
 713                 }
 714         }
 715 
 716         return (0);
 717 }
 718 
 719 /*
 720  * Do required processing on the real-time parameter buffer
 721  * before it is copied out to the user.
 722  * All we have to do is convert the buffer from kernel to user format
 723  * (i.e. convert time quantum from ticks to seconds-nanoseconds).
 724  */
 725 /* ARGSUSED */
 726 static int
 727 rt_parmsout(void *prmsp, pc_vaparms_t *vaparmsp)
 728 {
 729         rtkparms_t      *rtkprmsp = (rtkparms_t *)prmsp;
 730 
 731         if (vaparmsp != NULL)
 732                 return (0);
 733 
 734         if (rtkprmsp->rt_tqntm < 0) {
 735                 /*
 736                  * Quantum field set to special value (e.g. RT_TQINF)
 737                  */
 738                 ((rtparms_t *)rtkprmsp)->rt_tqnsecs = rtkprmsp->rt_tqntm;
 739                 ((rtparms_t *)rtkprmsp)->rt_tqsecs = 0;
 740         } else {
 741                 /* Convert quantum from ticks to seconds-nanoseconds */
 742 
 743                 timestruc_t ts;
 744                 TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
 745                 ((rtparms_t *)rtkprmsp)->rt_tqsecs = ts.tv_sec;
 746                 ((rtparms_t *)rtkprmsp)->rt_tqnsecs = ts.tv_nsec;
 747         }
 748 
 749         return (0);
 750 }
 751 
 752 
 753 /*
 754  * Copy all selected real-time class parameters to the user.
 755  * The parameters are specified by a key.
 756  */
 757 static int
 758 rt_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
 759 {
 760         rtkparms_t      *rtkprmsp = (rtkparms_t *)prmsp;
 761         timestruc_t     ts;
 762         uint_t          cnt;
 763         uint_t          secs;
 764         int             nsecs;
 765         int             priflag, secflag, nsecflag, sigflag;
 766         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 767 
 768         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
 769 
 770         priflag = secflag = nsecflag = sigflag = 0;
 771 
 772         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 773                 return (EINVAL);
 774 
 775         if (rtkprmsp->rt_tqntm < 0) {
 776                 /*
 777                  * Quantum field set to special value (e.g. RT_TQINF).
 778                  */
 779                 secs = 0;
 780                 nsecs = rtkprmsp->rt_tqntm;
 781         } else {
 782                 /*
 783                  * Convert quantum from ticks to seconds-nanoseconds.
 784                  */
 785                 TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
 786                 secs = ts.tv_sec;
 787                 nsecs = ts.tv_nsec;
 788         }
 789 
 790 
 791         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 792 
 793                 switch (vpp->pc_key) {
 794                 case RT_KY_PRI:
 795                         if (priflag++)
 796                                 return (EINVAL);
 797                         if (copyout(&rtkprmsp->rt_pri,
 798                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
 799                                 return (EFAULT);
 800                         break;
 801 
 802                 case RT_KY_TQSECS:
 803                         if (secflag++)
 804                                 return (EINVAL);
 805                         if (copyout(&secs, (caddr_t)(uintptr_t)vpp->pc_parm,
 806                             sizeof (uint_t)))
 807                                 return (EFAULT);
 808                         break;
 809 
 810                 case RT_KY_TQNSECS:
 811                         if (nsecflag++)
 812                                 return (EINVAL);
 813                         if (copyout(&nsecs, (caddr_t)(uintptr_t)vpp->pc_parm,
 814                             sizeof (int)))
 815                                 return (EFAULT);
 816                         break;
 817 
 818                 case RT_KY_TQSIG:
 819                         if (sigflag++)
 820                                 return (EINVAL);
 821                         if (copyout(&rtkprmsp->rt_tqsig,
 822                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
 823                                 return (EFAULT);
 824                         break;
 825 
 826                 default:
 827                         return (EINVAL);
 828                 }
 829         }
 830 
 831         return (0);
 832 }
 833 
 834 
 835 /*
 836  * Set the scheduling parameters of the thread pointed to by rtprocp
 837  * to those specified in the buffer pointed to by rtkprmsp.
 838  * Note that the parameters are expected to be in kernel format
 839  * (i.e. time quantm expressed in ticks).  Real time parameters copied
 840  * in from the user should be processed by rt_parmsin() before they are
 841  * passed to this function.
 842  */
 843 static int
 844 rt_parmsset(kthread_t *tx, void *prmsp, id_t reqpcid, cred_t *reqpcredp)
 845 {
 846         rtkparms_t *rtkprmsp = (rtkparms_t *)prmsp;
 847         rtproc_t *rtpp = (rtproc_t *)tx->t_cldata;
 848 
 849         ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
 850 
 851         /*
 852          * Basic permissions enforced by generic kernel code
 853          * for all classes require that a thread attempting
 854          * to change the scheduling parameters of a target thread
 855          * be privileged or have a real or effective UID
 856          * matching that of the target thread. We are not
 857          * called unless these basic permission checks have
 858          * already passed. The real-time class requires in addition
 859          * that the requesting thread be real-time unless it is privileged.
 860          * This may also have been checked previously but if our caller
 861          * passes us a credential structure we assume it hasn't and
 862          * we check it here.
 863          */
 864         if (reqpcredp != NULL && reqpcid != rt_cid &&
 865             secpolicy_raisepriority(reqpcredp) != 0)
 866                 return (EPERM);
 867 
 868         thread_lock(tx);
 869         if ((rtkprmsp->rt_cflags & RT_DOPRI) != 0) {
 870                 rtpp->rt_pri = rtkprmsp->rt_pri;
 871                 rt_change_priority(tx, rtpp);
 872         }
 873         if (rtkprmsp->rt_tqntm == RT_TQINF)
 874                 rtpp->rt_pquantum = RT_TQINF;
 875         else if (rtkprmsp->rt_tqntm == RT_TQDEF)
 876                 rtpp->rt_timeleft = rtpp->rt_pquantum =
 877                     rt_dptbl[rtpp->rt_pri].rt_quantum;
 878         else if ((rtkprmsp->rt_cflags & RT_DOTQ) != 0)
 879                 rtpp->rt_timeleft = rtpp->rt_pquantum = rtkprmsp->rt_tqntm;
 880 
 881         if ((rtkprmsp->rt_cflags & RT_DOSIG) != 0)
 882                 rtpp->rt_tqsignal = rtkprmsp->rt_tqsig;
 883 
 884         thread_unlock(tx);
 885         return (0);
 886 }
 887 
 888 
 889 /*
 890  * Arrange for thread to be placed in appropriate location
 891  * on dispatcher queue.  Runs at splhi() since the clock
 892  * interrupt can cause RTBACKQ to be set.
 893  */
 894 static void
 895 rt_preempt(kthread_t *t)
 896 {
 897         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
 898         klwp_t *lwp;
 899 
 900         ASSERT(THREAD_LOCK_HELD(t));
 901 
 902         /*
 903          * If the state is user I allow swapping because I know I won't
 904          * be holding any locks.
 905          */
 906         if ((lwp = curthread->t_lwp) != NULL && lwp->lwp_state == LWP_USER)
 907                 t->t_schedflag &= ~TS_DONT_SWAP;
 908         if ((rtpp->rt_flags & RTBACKQ) != 0) {
 909                 rtpp->rt_timeleft = rtpp->rt_pquantum;
 910                 rtpp->rt_flags &= ~RTBACKQ;
 911                 setbackdq(t);
 912         } else
 913                 setfrontdq(t);
 914 
 915 }
 916 
 917 /*
 918  * Return the global priority associated with this rt_pri.
 919  */
 920 static pri_t
 921 rt_globpri(kthread_t *t)
 922 {
 923         rtproc_t *rtprocp = (rtproc_t *)t->t_cldata;
 924         return (rt_dptbl[rtprocp->rt_pri].rt_globpri);
 925 }
 926 
 927 static void
 928 rt_setrun(kthread_t *t)
 929 {
 930         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
 931 
 932         ASSERT(THREAD_LOCK_HELD(t));
 933 
 934         rtpp->rt_timeleft = rtpp->rt_pquantum;
 935         rtpp->rt_flags &= ~RTBACKQ;
 936         setbackdq(t);
 937 }
 938 
 939 /*
 940  * Returns the priority of the thread, -1 if the thread is loaded or ineligible
 941  * for swapin.
 942  *
 943  * FX and RT threads are designed so that they don't swapout; however, it
 944  * is possible that while the thread is swapped out and in another class, it
 945  * can be changed to FX or RT.  Since these threads should be swapped in as
 946  * soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin
 947  * returns SHRT_MAX - 1, so that it gives deference to any swapped out RT
 948  * threads.
 949  */
 950 /* ARGSUSED */
 951 static pri_t
 952 rt_swapin(kthread_t *t, int flags)
 953 {
 954         pri_t   tpri = -1;
 955 
 956         ASSERT(THREAD_LOCK_HELD(t));
 957 
 958         if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
 959                 tpri = (pri_t)SHRT_MAX;
 960         }
 961 
 962         return (tpri);
 963 }
 964 
 965 /*
 966  * Return an effective priority for swapout.
 967  */
 968 /* ARGSUSED */
 969 static pri_t
 970 rt_swapout(kthread_t *t, int flags)
 971 {
 972         ASSERT(THREAD_LOCK_HELD(t));
 973 
 974         return (-1);
 975 }
 976 
 977 /*
 978  * Check for time slice expiration (unless thread has infinite time
 979  * slice).  If time slice has expired arrange for thread to be preempted
 980  * and placed on back of queue.
 981  */
 982 static void
 983 rt_tick(kthread_t *t)
 984 {
 985         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
 986 
 987         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
 988 
 989         thread_lock(t);
 990         if ((rtpp->rt_pquantum != RT_TQINF && --rtpp->rt_timeleft == 0) ||
 991             (t->t_state == TS_ONPROC && DISP_MUST_SURRENDER(t))) {
 992                 if (rtpp->rt_timeleft == 0 && rtpp->rt_tqsignal) {
 993                         thread_unlock(t);
 994                         sigtoproc(ttoproc(t), t, rtpp->rt_tqsignal);
 995                         thread_lock(t);
 996                 }
 997                 rtpp->rt_flags |= RTBACKQ;
 998                 cpu_surrender(t);
 999         }
1000         thread_unlock(t);
1001 }
1002 
1003 
1004 /*
1005  * Place the thread waking up on the dispatcher queue.
1006  */
1007 static void
1008 rt_wakeup(kthread_t *t)
1009 {
1010         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1011 
1012         ASSERT(THREAD_LOCK_HELD(t));
1013 
1014         rtpp->rt_timeleft = rtpp->rt_pquantum;
1015         rtpp->rt_flags &= ~RTBACKQ;
1016         setbackdq(t);
1017 }
1018 
1019 static void
1020 rt_yield(kthread_t *t)
1021 {
1022         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1023 
1024         ASSERT(t == curthread);
1025         ASSERT(THREAD_LOCK_HELD(t));
1026 
1027         rtpp->rt_flags &= ~RTBACKQ;
1028         setbackdq(t);
1029 }
1030 
1031 /* ARGSUSED */
1032 static int
1033 rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1034 {
1035         return (EINVAL);
1036 }
1037 
1038 /*
1039  * Increment the priority of the specified thread by incr and
1040  * return the new value in *retvalp.
1041  */
1042 static int
1043 rt_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1044 {
1045         int newpri;
1046         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1047         rtkparms_t rtkparms;
1048 
1049         /* If there's no change to the priority, just return current setting */
1050         if (incr == 0) {
1051                 *retvalp = rtpp->rt_pri;
1052                 return (0);
1053         }
1054 
1055         newpri = rtpp->rt_pri + incr;
1056         if (newpri > rt_maxpri || newpri < 0)
1057                 return (EINVAL);
1058 
1059         *retvalp = newpri;
1060         rtkparms.rt_pri = newpri;
1061         rtkparms.rt_tqntm = RT_NOCHANGE;
1062         rtkparms.rt_tqsig = 0;
1063         rtkparms.rt_cflags = RT_DOPRI;
1064         return (rt_parmsset(t, &rtkparms, rt_cid, cr));
1065 }
1066 
1067 static int
1068 rt_alloc(void **p, int flag)
1069 {
1070         void *bufp;
1071         bufp = kmem_alloc(sizeof (rtproc_t), flag);
1072         if (bufp == NULL) {
1073                 return (ENOMEM);
1074         } else {
1075                 *p = bufp;
1076                 return (0);
1077         }
1078 }
1079 
1080 static void
1081 rt_free(void *bufp)
1082 {
1083         if (bufp)
1084                 kmem_free(bufp, sizeof (rtproc_t));
1085 }
1086 
1087 static void
1088 rt_change_priority(kthread_t *t, rtproc_t *rtpp)
1089 {
1090         pri_t new_pri;
1091 
1092         ASSERT(THREAD_LOCK_HELD(t));
1093 
1094         new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri;
1095 
1096         t->t_cpri = rtpp->rt_pri;
1097         if (t == curthread || t->t_state == TS_ONPROC) {
1098                 cpu_t   *cp = t->t_disp_queue->disp_cpu;
1099                 THREAD_CHANGE_PRI(t, new_pri);
1100                 if (t == cp->cpu_dispthread)
1101                         cp->cpu_dispatch_pri = DISP_PRIO(t);
1102                 if (DISP_MUST_SURRENDER(t)) {
1103                         rtpp->rt_flags |= RTBACKQ;
1104                         cpu_surrender(t);
1105                 } else {
1106                         rtpp->rt_timeleft = rtpp->rt_pquantum;
1107                 }
1108         } else {
1109                 /*
1110                  * When the priority of a thread is changed,
1111                  * it may be necessary to adjust its position
1112                  * on a sleep queue or dispatch queue.  The
1113                  * function thread_change_pri() accomplishes this.
1114                  */
1115                 if (thread_change_pri(t, new_pri, 0)) {
1116                         /*
1117                          * The thread was on a run queue.
1118                          * Reset its CPU timeleft.
1119                          */
1120                         rtpp->rt_timeleft = rtpp->rt_pquantum;
1121                 } else {
1122                         rtpp->rt_flags |= RTBACKQ;
1123                 }
1124         }
1125 }