webrev-prealloc Wdiff usr/src/uts/common/os/schedctl.c

Print this page

13902 Fix for 13717 may break 8-disk raidz2
13915 installctx() blocking allocate causes problems
Portions contributed by: Jerry Jelinek <gjelinek@gmail.com>
Change-Id: I934d69946cec42630fc541fa8c7385b862b69ca2

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/schedctl.c
          +++ new/usr/src/uts/common/os/schedctl.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.

↓ open down ↓

14 lines elided

↑ open up ↑

  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25      - * Copyright 2016 Joyent, Inc.
       25 + * Copyright 2021 Joyent, Inc.
  26   26   */
  27   27  
  28   28  #include <sys/types.h>
  29   29  #include <sys/systm.h>
  30   30  #include <sys/schedctl.h>
  31   31  #include <sys/proc.h>
  32   32  #include <sys/thread.h>
  33   33  #include <sys/class.h>
  34   34  #include <sys/cred.h>
  35   35  #include <sys/kmem.h>

  36   36  #include <sys/cmn_err.h>
  37   37  #include <sys/stack.h>
  38   38  #include <sys/debug.h>
  39   39  #include <sys/cpuvar.h>
  40   40  #include <sys/sobject.h>
  41   41  #include <sys/door.h>
  42   42  #include <sys/modctl.h>
  43   43  #include <sys/syscall.h>
  44   44  #include <sys/sysmacros.h>
  45   45  #include <sys/vmsystm.h>
  46   46  #include <sys/mman.h>
  47   47  #include <sys/vnode.h>
  48   48  #include <sys/swap.h>
  49   49  #include <sys/lwp.h>
  50   50  #include <sys/bitmap.h>
  51   51  #include <sys/atomic.h>
  52   52  #include <sys/fcntl.h>
  53   53  #include <vm/seg_kp.h>
  54   54  #include <vm/seg_vn.h>
  55   55  #include <vm/as.h>
  56   56  #include <fs/fs_subr.h>
  57   57  
  58   58  /*
  59   59   * Page handling structures.  This is set up as a list of per-page
  60   60   * control structures (sc_page_ctl), with p->p_pagep pointing to
  61   61   * the first.  The per-page structures point to the actual pages
  62   62   * and contain pointers to the user address for each mapped page.
  63   63   *
  64   64   * All data is protected by p->p_sc_lock.  Since this lock is
  65   65   * held while waiting for memory, schedctl_shared_alloc() should
  66   66   * not be called while holding p_lock.
  67   67   */
  68   68  
  69   69  typedef struct sc_page_ctl {
  70   70          struct sc_page_ctl *spc_next;
  71   71          sc_shared_t     *spc_base;      /* base of kernel page */
  72   72          sc_shared_t     *spc_end;       /* end of usable space */
  73   73          ulong_t         *spc_map;       /* bitmap of allocated space on page */
  74   74          size_t          spc_space;      /* amount of space on page */
  75   75          caddr_t         spc_uaddr;      /* user-level address of the page */
  76   76          struct anon_map *spc_amp;       /* anonymous memory structure */
  77   77  } sc_page_ctl_t;
  78   78  
  79   79  static size_t   sc_pagesize;            /* size of usable space on page */
  80   80  static size_t   sc_bitmap_len;          /* # of bits in allocation bitmap */
  81   81  static size_t   sc_bitmap_words;        /* # of words in allocation bitmap */
  82   82  
  83   83  /* Context ops */
  84   84  static void     schedctl_save(sc_shared_t *);
  85   85  static void     schedctl_restore(sc_shared_t *);
  86   86  static void     schedctl_fork(kthread_t *, kthread_t *);
  87   87  
  88   88  /* Functions for handling shared pages */
  89   89  static int      schedctl_shared_alloc(sc_shared_t **, uintptr_t *);
  90   90  static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *);
  91   91  static int      schedctl_map(struct anon_map *, caddr_t *, caddr_t);
  92   92  static int      schedctl_getpage(struct anon_map **, caddr_t *);
  93   93  static void     schedctl_freepage(struct anon_map *, caddr_t);
  94   94  
  95   95  /*
  96   96   * System call interface to scheduler activations.
  97   97   * This always operates on the current lwp.
  98   98   */
  99   99  caddr_t
 100  100  schedctl(void)
 101  101  {
 102  102          kthread_t       *t = curthread;
 103  103          sc_shared_t     *ssp;
 104  104          uintptr_t       uaddr;
 105  105          int             error;

↓ open down ↓

70 lines elided

↑ open up ↑

 106  106  
 107  107          if (t->t_schedctl == NULL) {
 108  108                  /*
 109  109                   * Allocate and initialize the shared structure.
 110  110                   */
 111  111                  if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0)
 112  112                          return ((caddr_t)(uintptr_t)set_errno(error));
 113  113                  bzero(ssp, sizeof (*ssp));
 114  114  
 115  115                  installctx(t, ssp, schedctl_save, schedctl_restore,
 116      -                    schedctl_fork, NULL, NULL, NULL);
      116 +                    schedctl_fork, NULL, NULL, NULL, NULL);
 117  117  
 118  118                  thread_lock(t); /* protect against ts_tick and ts_update */
 119  119                  t->t_schedctl = ssp;
 120  120                  t->t_sc_uaddr = uaddr;
 121  121                  ssp->sc_cid = t->t_cid;
 122  122                  ssp->sc_cpri = t->t_cpri;
 123  123                  ssp->sc_priority = DISP_PRIO(t);
 124  124                  thread_unlock(t);
 125  125          }
 126  126

 127  127          return ((caddr_t)t->t_sc_uaddr);
 128  128  }
 129  129  
 130  130  
 131  131  /*
 132  132   * Clean up scheduler activations state associated with an exiting
 133  133   * (or execing) lwp.  t is always the current thread.
 134  134   */
 135  135  void
 136  136  schedctl_lwp_cleanup(kthread_t *t)
 137  137  {
 138  138          sc_shared_t     *ssp = t->t_schedctl;
 139  139          proc_t          *p = ttoproc(t);
 140  140          sc_page_ctl_t   *pagep;
 141  141          index_t         index;
 142  142  
 143  143          ASSERT(MUTEX_NOT_HELD(&p->p_lock));
 144  144  
 145  145          thread_lock(t);         /* protect against ts_tick and ts_update */
 146  146          t->t_schedctl = NULL;
 147  147          t->t_sc_uaddr = 0;
 148  148          thread_unlock(t);
 149  149  
 150  150          /*
 151  151           * Remove the context op to avoid the final call to
 152  152           * schedctl_save when switching away from this lwp.
 153  153           */
 154  154          (void) removectx(t, ssp, schedctl_save, schedctl_restore,
 155  155              schedctl_fork, NULL, NULL, NULL);
 156  156  
 157  157          /*
 158  158           * Do not unmap the shared page until the process exits.
 159  159           * User-level library code relies on this for adaptive mutex locking.
 160  160           */
 161  161          mutex_enter(&p->p_sc_lock);
 162  162          ssp->sc_state = SC_FREE;
 163  163          pagep = schedctl_page_lookup(ssp);
 164  164          index = (index_t)(ssp - pagep->spc_base);
 165  165          BT_CLEAR(pagep->spc_map, index);
 166  166          pagep->spc_space += sizeof (sc_shared_t);
 167  167          mutex_exit(&p->p_sc_lock);
 168  168  }
 169  169  
 170  170  
 171  171  /*
 172  172   * Cleanup the list of schedctl shared pages for the process.
 173  173   * Called from exec() and exit() system calls.
 174  174   */
 175  175  void
 176  176  schedctl_proc_cleanup(void)
 177  177  {
 178  178          proc_t *p = curproc;
 179  179          sc_page_ctl_t *pagep;
 180  180          sc_page_ctl_t *next;
 181  181  
 182  182          ASSERT(p->p_lwpcnt == 1);       /* we are single-threaded now */
 183  183          ASSERT(curthread->t_schedctl == NULL);
 184  184  
 185  185          /*
 186  186           * Since we are single-threaded, we don't have to hold p->p_sc_lock.
 187  187           */
 188  188          pagep = p->p_pagep;
 189  189          p->p_pagep = NULL;
 190  190          while (pagep != NULL) {
 191  191                  ASSERT(pagep->spc_space == sc_pagesize);
 192  192                  next = pagep->spc_next;
 193  193                  /*
 194  194                   * Unmap the user space and free the mapping structure.
 195  195                   */
 196  196                  (void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE);
 197  197                  schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base));
 198  198                  kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words);
 199  199                  kmem_free(pagep, sizeof (sc_page_ctl_t));
 200  200                  pagep = next;
 201  201          }
 202  202  }
 203  203  
 204  204  
 205  205  /*
 206  206   * Called by resume just before switching away from the current thread.
 207  207   * Save new thread state.
 208  208   */
 209  209  static void
 210  210  schedctl_save(sc_shared_t *ssp)
 211  211  {
 212  212          ssp->sc_state = curthread->t_state;
 213  213  }
 214  214  
 215  215  
 216  216  /*
 217  217   * Called by resume after switching to the current thread.
 218  218   * Save new thread state and CPU.
 219  219   */
 220  220  static void
 221  221  schedctl_restore(sc_shared_t *ssp)
 222  222  {
 223  223          ssp->sc_state = SC_ONPROC;
 224  224          ssp->sc_cpu = CPU->cpu_id;
 225  225  }
 226  226  
 227  227  
 228  228  /*
 229  229   * On fork, remove inherited mappings from the child's address space.
 230  230   * The child's threads must call schedctl() to get new shared mappings.
 231  231   */
 232  232  static void
 233  233  schedctl_fork(kthread_t *pt, kthread_t *ct)
 234  234  {
 235  235          proc_t *pp = ttoproc(pt);
 236  236          proc_t *cp = ttoproc(ct);
 237  237          sc_page_ctl_t *pagep;
 238  238  
 239  239          ASSERT(ct->t_schedctl == NULL);
 240  240  
 241  241          /*
 242  242           * Do this only once, whether we are doing fork1() or forkall().
 243  243           * Don't do it at all if the child process is a child of vfork()
 244  244           * because a child of vfork() borrows the parent's address space.
 245  245           */
 246  246          if (pt != curthread || (cp->p_flag & SVFORK))
 247  247                  return;
 248  248  
 249  249          mutex_enter(&pp->p_sc_lock);
 250  250          for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next)
 251  251                  (void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE);
 252  252          mutex_exit(&pp->p_sc_lock);
 253  253  }
 254  254  
 255  255  
 256  256  /*
 257  257   * Returns non-zero if the specified thread shouldn't be preempted at this time.
 258  258   * Called by ts_preempt(), ts_tick(), and ts_update().
 259  259   */
 260  260  int
 261  261  schedctl_get_nopreempt(kthread_t *t)
 262  262  {
 263  263          ASSERT(THREAD_LOCK_HELD(t));
 264  264          return (t->t_schedctl->sc_preemptctl.sc_nopreempt);
 265  265  }
 266  266  
 267  267  
 268  268  /*
 269  269   * Sets the value of the nopreempt field for the specified thread.
 270  270   * Called by ts_preempt() to clear the field on preemption.
 271  271   */
 272  272  void
 273  273  schedctl_set_nopreempt(kthread_t *t, short val)
 274  274  {
 275  275          ASSERT(THREAD_LOCK_HELD(t));
 276  276          t->t_schedctl->sc_preemptctl.sc_nopreempt = val;
 277  277  }
 278  278  
 279  279  
 280  280  /*
 281  281   * Sets the value of the yield field for the specified thread.
 282  282   * Called by ts_preempt() and ts_tick() to set the field, and
 283  283   * ts_yield() to clear it.
 284  284   * The kernel never looks at this field so we don't need a
 285  285   * schedctl_get_yield() function.
 286  286   */
 287  287  void
 288  288  schedctl_set_yield(kthread_t *t, short val)
 289  289  {
 290  290          ASSERT(THREAD_LOCK_HELD(t));
 291  291          t->t_schedctl->sc_preemptctl.sc_yield = val;
 292  292  }
 293  293  
 294  294  
 295  295  /*
 296  296   * Sets the values of the cid and priority fields for the specified thread.
 297  297   * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI().
 298  298   * Called following calls to CL_FORKRET() and CL_ENTERCLASS().
 299  299   */
 300  300  void
 301  301  schedctl_set_cidpri(kthread_t *t)
 302  302  {
 303  303          sc_shared_t *tdp = t->t_schedctl;
 304  304  
 305  305          if (tdp != NULL) {
 306  306                  tdp->sc_cid = t->t_cid;
 307  307                  tdp->sc_cpri = t->t_cpri;
 308  308                  tdp->sc_priority = DISP_PRIO(t);
 309  309          }
 310  310  }
 311  311  
 312  312  
 313  313  /*
 314  314   * Returns non-zero if the specified thread has requested that all
 315  315   * signals be blocked.  Called by signal-related code that tests
 316  316   * the signal mask of a thread that may not be the current thread
 317  317   * and where the process's p_lock cannot be acquired.
 318  318   */
 319  319  int
 320  320  schedctl_sigblock(kthread_t *t)
 321  321  {
 322  322          sc_shared_t *tdp = t->t_schedctl;
 323  323  
 324  324          if (tdp != NULL)
 325  325                  return (tdp->sc_sigblock);
 326  326          return (0);
 327  327  }
 328  328  
 329  329  
 330  330  /*
 331  331   * If the sc_sigblock field is set for the specified thread, set its signal
 332  332   * mask to block all maskable signals, then clear the sc_sigblock field.  This
 333  333   * accomplishes what user-level code requested to be done when it set
 334  334   * tdp->sc_shared->sc_sigblock non-zero.
 335  335   *
 336  336   * This is generally called by signal-related code in the current thread.  In
 337  337   * order to call against a thread other than curthread, p_lock for the
 338  338   * containing process must be held.  Even then, the caller is not protected
 339  339   * from races with the thread in question updating its own fields.  It is the
 340  340   * responsibility of the caller to perform additional synchronization.
 341  341   *
 342  342   */
 343  343  void
 344  344  schedctl_finish_sigblock(kthread_t *t)
 345  345  {
 346  346          sc_shared_t *tdp = t->t_schedctl;
 347  347  
 348  348          ASSERT(t == curthread || MUTEX_HELD(&ttoproc(t)->p_lock));
 349  349  
 350  350          if (tdp != NULL && tdp->sc_sigblock) {
 351  351                  t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0;
 352  352                  t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1;
 353  353                  t->t_hold.__sigbits[2] = FILLSET2 & ~CANTMASK2;
 354  354                  tdp->sc_sigblock = 0;
 355  355          }
 356  356  }
 357  357  
 358  358  
 359  359  /*
 360  360   * Return non-zero if the current thread has declared that it has
 361  361   * a cancellation pending and that cancellation is not disabled.
 362  362   * If SIGCANCEL is blocked, we must be going over the wire in an
 363  363   * NFS transaction (sigintr() was called); return zero in this case.
 364  364   */
 365  365  int
 366  366  schedctl_cancel_pending(void)
 367  367  {
 368  368          sc_shared_t *tdp = curthread->t_schedctl;
 369  369  
 370  370          if (tdp != NULL &&
 371  371              (tdp->sc_flgs & SC_CANCEL_FLG) &&
 372  372              !tdp->sc_sigblock &&
 373  373              !sigismember(&curthread->t_hold, SIGCANCEL))
 374  374                  return (1);
 375  375          return (0);
 376  376  }
 377  377  
 378  378  
 379  379  /*
 380  380   * Inform libc that the kernel returned EINTR from some system call
 381  381   * due to there being a cancellation pending (SC_CANCEL_FLG set or
 382  382   * we received an SI_LWP SIGCANCEL while in a system call), rather
 383  383   * than because of some other signal.  User-level code can try to
 384  384   * recover from receiving other signals, but it can't recover from
 385  385   * being cancelled.
 386  386   */
 387  387  void
 388  388  schedctl_cancel_eintr(void)
 389  389  {
 390  390          sc_shared_t *tdp = curthread->t_schedctl;
 391  391  
 392  392          if (tdp != NULL)
 393  393                  tdp->sc_flgs |= SC_EINTR_FLG;
 394  394  }
 395  395  
 396  396  
 397  397  /*
 398  398   * Return non-zero if the current thread has declared that
 399  399   * it is calling into the kernel to park, else return zero.
 400  400   */
 401  401  int
 402  402  schedctl_is_park(void)
 403  403  {
 404  404          sc_shared_t *tdp = curthread->t_schedctl;
 405  405  
 406  406          if (tdp != NULL)
 407  407                  return ((tdp->sc_flgs & SC_PARK_FLG) != 0);
 408  408          /*
 409  409           * If we're here and there is no shared memory (how could
 410  410           * that happen?) then just assume we really are here to park.
 411  411           */
 412  412          return (1);
 413  413  }
 414  414  
 415  415  
 416  416  /*
 417  417   * Declare thread is parking.
 418  418   *
 419  419   * libc will set "sc_flgs |= SC_PARK_FLG" before calling lwpsys_park(0, tid)
 420  420   * in order to declare that the thread is calling into the kernel to park.
 421  421   *
 422  422   * This interface exists ONLY to support older versions of libthread which
 423  423   * are not aware of the SC_PARK_FLG flag.
 424  424   *
 425  425   * Older versions of libthread which are not aware of the SC_PARK_FLG flag
 426  426   * need to be modified or emulated to call lwpsys_park(4, ...) instead of
 427  427   * lwpsys_park(0, ...).  This will invoke schedctl_set_park() before
 428  428   * lwp_park() to declare that the thread is parking.
 429  429   */
 430  430  void
 431  431  schedctl_set_park(void)
 432  432  {
 433  433          sc_shared_t *tdp = curthread->t_schedctl;
 434  434          if (tdp != NULL)
 435  435                  tdp->sc_flgs |= SC_PARK_FLG;
 436  436  }
 437  437  
 438  438  
 439  439  /*
 440  440   * Clear the parking flag on return from parking in the kernel.
 441  441   */
 442  442  void
 443  443  schedctl_unpark(void)
 444  444  {
 445  445          sc_shared_t *tdp = curthread->t_schedctl;
 446  446  
 447  447          if (tdp != NULL)
 448  448                  tdp->sc_flgs &= ~SC_PARK_FLG;
 449  449  }
 450  450  
 451  451  
 452  452  /*
 453  453   * Page handling code.
 454  454   */
 455  455  
 456  456  void
 457  457  schedctl_init(void)
 458  458  {
 459  459          /*
 460  460           * Amount of page that can hold sc_shared_t structures.  If
 461  461           * sizeof (sc_shared_t) is a power of 2, this should just be
 462  462           * PAGESIZE.
 463  463           */
 464  464          sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t));
 465  465  
 466  466          /*
 467  467           * Allocation bitmap is one bit per struct on a page.
 468  468           */
 469  469          sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t);
 470  470          sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL);
 471  471  }
 472  472  
 473  473  
 474  474  static int
 475  475  schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp)
 476  476  {
 477  477          proc_t          *p = curproc;
 478  478          sc_page_ctl_t   *pagep;
 479  479          sc_shared_t     *ssp;
 480  480          caddr_t         base;
 481  481          index_t         index;
 482  482          int             error;
 483  483  
 484  484          ASSERT(MUTEX_NOT_HELD(&p->p_lock));
 485  485          mutex_enter(&p->p_sc_lock);
 486  486  
 487  487          /*
 488  488           * Try to find space for the new data in existing pages
 489  489           * within the process's list of shared pages.
 490  490           */
 491  491          for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next)
 492  492                  if (pagep->spc_space != 0)
 493  493                          break;
 494  494  
 495  495          if (pagep != NULL)
 496  496                  base = pagep->spc_uaddr;
 497  497          else {
 498  498                  struct anon_map *amp;
 499  499                  caddr_t kaddr;
 500  500  
 501  501                  /*
 502  502                   * No room, need to allocate a new page.  Also set up
 503  503                   * a mapping to the kernel address space for the new
 504  504                   * page and lock it in memory.
 505  505                   */
 506  506                  if ((error = schedctl_getpage(&amp, &kaddr)) != 0) {
 507  507                          mutex_exit(&p->p_sc_lock);
 508  508                          return (error);
 509  509                  }
 510  510                  if ((error = schedctl_map(amp, &base, kaddr)) != 0) {
 511  511                          schedctl_freepage(amp, kaddr);
 512  512                          mutex_exit(&p->p_sc_lock);
 513  513                          return (error);
 514  514                  }
 515  515  
 516  516                  /*
 517  517                   * Allocate and initialize the page control structure.
 518  518                   */
 519  519                  pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP);
 520  520                  pagep->spc_amp = amp;
 521  521                  pagep->spc_base = (sc_shared_t *)kaddr;
 522  522                  pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize);
 523  523                  pagep->spc_uaddr = base;
 524  524  
 525  525                  pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words,
 526  526                      KM_SLEEP);
 527  527                  pagep->spc_space = sc_pagesize;
 528  528  
 529  529                  pagep->spc_next = p->p_pagep;
 530  530                  p->p_pagep = pagep;
 531  531          }
 532  532  
 533  533          /*
 534  534           * Got a page, now allocate space for the data.  There should
 535  535           * be space unless something's wrong.
 536  536           */
 537  537          ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t));
 538  538          index = bt_availbit(pagep->spc_map, sc_bitmap_len);
 539  539          ASSERT(index != -1);
 540  540  
 541  541          /*
 542  542           * Get location with pointer arithmetic.  spc_base is of type
 543  543           * sc_shared_t *.  Mark as allocated.
 544  544           */
 545  545          ssp = pagep->spc_base + index;
 546  546          BT_SET(pagep->spc_map, index);
 547  547          pagep->spc_space -= sizeof (sc_shared_t);
 548  548  
 549  549          mutex_exit(&p->p_sc_lock);
 550  550  
 551  551          /*
 552  552           * Return kernel and user addresses.
 553  553           */
 554  554          *kaddrp = ssp;
 555  555          *uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET);
 556  556          return (0);
 557  557  }
 558  558  
 559  559  
 560  560  /*
 561  561   * Find the page control structure corresponding to a kernel address.
 562  562   */
 563  563  static sc_page_ctl_t *
 564  564  schedctl_page_lookup(sc_shared_t *ssp)
 565  565  {
 566  566          proc_t *p = curproc;
 567  567          sc_page_ctl_t *pagep;
 568  568  
 569  569          ASSERT(MUTEX_HELD(&p->p_sc_lock));
 570  570          for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) {
 571  571                  if (ssp >= pagep->spc_base && ssp < pagep->spc_end)
 572  572                          return (pagep);
 573  573          }
 574  574          return (NULL);          /* This "can't happen".  Should we panic? */
 575  575  }
 576  576  
 577  577  
 578  578  /*
 579  579   * This function is called when a page needs to be mapped into a
 580  580   * process's address space.  Allocate the user address space and
 581  581   * set up the mapping to the page.  Assumes the page has already
 582  582   * been allocated and locked in memory via schedctl_getpage.
 583  583   */
 584  584  static int
 585  585  schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr)
 586  586  {
 587  587          caddr_t addr = NULL;
 588  588          struct as *as = curproc->p_as;
 589  589          struct segvn_crargs vn_a;
 590  590          int error;
 591  591  
 592  592          as_rangelock(as);
 593  593          /* pass address of kernel mapping as offset to avoid VAC conflicts */
 594  594          map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0);
 595  595          if (addr == NULL) {
 596  596                  as_rangeunlock(as);
 597  597                  return (ENOMEM);
 598  598          }
 599  599  
 600  600          /*
 601  601           * Use segvn to set up the mapping to the page.
 602  602           */
 603  603          vn_a.vp = NULL;
 604  604          vn_a.offset = 0;
 605  605          vn_a.cred = NULL;
 606  606          vn_a.type = MAP_SHARED;
 607  607          vn_a.prot = vn_a.maxprot = PROT_ALL;
 608  608          vn_a.flags = 0;
 609  609          vn_a.amp = amp;
 610  610          vn_a.szc = 0;
 611  611          vn_a.lgrp_mem_policy_flags = 0;
 612  612          error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a);
 613  613          as_rangeunlock(as);
 614  614  
 615  615          if (error)
 616  616                  return (error);
 617  617  
 618  618          *uaddrp = addr;
 619  619          return (0);
 620  620  }
 621  621  
 622  622  
 623  623  /*
 624  624   * Allocate a new page from anonymous memory.  Also, create a kernel
 625  625   * mapping to the page and lock the page in memory.
 626  626   */
 627  627  static int
 628  628  schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr)
 629  629  {
 630  630          struct anon_map *amp;
 631  631          caddr_t kaddr;
 632  632  
 633  633          /*
 634  634           * Set up anonymous memory struct.  No swap reservation is
 635  635           * needed since the page will be locked into memory.
 636  636           */
 637  637          amp = anonmap_alloc(PAGESIZE, 0, ANON_SLEEP);
 638  638  
 639  639          /*
 640  640           * Allocate the page.
 641  641           */
 642  642          kaddr = segkp_get_withanonmap(segkp, PAGESIZE,
 643  643              KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp);
 644  644          if (kaddr == NULL) {
 645  645                  amp->refcnt--;
 646  646                  anonmap_free(amp);
 647  647                  return (ENOMEM);
 648  648          }
 649  649  
 650  650          /*
 651  651           * The page is left SE_SHARED locked so that it won't be
 652  652           * paged out or relocated (KPD_LOCKED above).
 653  653           */
 654  654  
 655  655          *newamp = amp;
 656  656          *newaddr = kaddr;
 657  657          return (0);
 658  658  }
 659  659  
 660  660  
 661  661  /*
 662  662   * Take the necessary steps to allow a page to be released.
 663  663   * This is called when the process is doing exit() or exec().
 664  664   * There should be no accesses to the page after this.
 665  665   * The kernel mapping of the page is released and the page is unlocked.
 666  666   */
 667  667  static void
 668  668  schedctl_freepage(struct anon_map *amp, caddr_t kaddr)
 669  669  {
 670  670          /*
 671  671           * Release the lock on the page and remove the kernel mapping.
 672  672           */
 673  673          ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
 674  674          segkp_release(segkp, kaddr);
 675  675  
 676  676          /*
 677  677           * Decrement the refcnt so the anon_map structure will be freed.
 678  678           */
 679  679          if (--amp->refcnt == 0) {
 680  680                  /*
 681  681                   * The current process no longer has the page mapped, so
 682  682                   * we have to free everything rather than letting as_free
 683  683                   * do the work.
 684  684                   */
 685  685                  anonmap_purge(amp);
 686  686                  anon_free(amp->ahp, 0, PAGESIZE);
 687  687                  ANON_LOCK_EXIT(&amp->a_rwlock);
 688  688                  anonmap_free(amp);
 689  689          } else {
 690  690                  ANON_LOCK_EXIT(&amp->a_rwlock);
 691  691          }
 692  692  }

↓ open down ↓

566 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX