Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/vm/seg_umap.c
          +++ new/usr/src/uts/common/vm/seg_umap.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2016 Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * VM - Kernel-to-user mapping segment
  18   18   *
  19   19   * The umap segment driver was primarily designed to facilitate the comm page:
  20   20   * a portion of kernel memory shared with userspace so that certain (namely
  21   21   * clock-related) actions could operate without making an expensive trip into
  22   22   * the kernel.
  23   23   *
  24   24   * Since the initial requirements for the comm page are slim, advanced features
  25   25   * of the segment driver such as per-page protection have been left
  26   26   * unimplemented at this time.
  27   27   */
  28   28  
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/errno.h>
  33   33  #include <sys/cred.h>
  34   34  #include <sys/kmem.h>
  35   35  #include <sys/lgrp.h>
  36   36  #include <sys/mman.h>
  37   37  
  38   38  #include <vm/hat.h>
  39   39  #include <vm/as.h>
  40   40  #include <vm/seg.h>
  41   41  #include <vm/seg_kmem.h>
  42   42  #include <vm/seg_umap.h>
  43   43  
  44   44  
  45   45  static boolean_t segumap_verify_safe(caddr_t, size_t);
  46   46  static int segumap_dup(struct seg *, struct seg *);
  47   47  static int segumap_unmap(struct seg *, caddr_t, size_t);
  48   48  static void segumap_free(struct seg *);
  49   49  static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
  50   50      enum fault_type, enum seg_rw);
  51   51  static faultcode_t segumap_faulta(struct seg *, caddr_t);
  52   52  static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
  53   53  static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
  54   54  static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
  55   55  static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
  56   56  static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
  57   57      size_t);
  58   58  static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
  59   59  static u_offset_t segumap_getoffset(struct seg *, caddr_t);
  60   60  static int segumap_gettype(struct seg *, caddr_t);
  61   61  static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
  62   62  static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
  63   63  static void segumap_dump(struct seg *);
  64   64  static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
  65   65      enum lock_type, enum seg_rw);
  66   66  static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
  67   67  static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
  68   68  static int segumap_capable(struct seg *, segcapability_t);
  69   69  
  70   70  static struct seg_ops segumap_ops = {
  71   71          segumap_dup,
  72   72          segumap_unmap,
  73   73          segumap_free,
  74   74          segumap_fault,
  75   75          segumap_faulta,
  76   76          segumap_setprot,
  77   77          segumap_checkprot,
  78   78          NULL,                   /* kluster: disabled */
  79   79          NULL,                   /* swapout: disabled */
  80   80          segumap_sync,
  81   81          segumap_incore,
  82   82          segumap_lockop,
  83   83          segumap_getprot,
  84   84          segumap_getoffset,
  85   85          segumap_gettype,
  86   86          segumap_getvp,
  87   87          segumap_advise,
  88   88          segumap_dump,
  89   89          segumap_pagelock,
  90   90          segumap_setpagesize,
  91   91          segumap_getmemid,
  92   92          NULL,                   /* getpolicy: disabled */
  93   93          segumap_capable,
  94   94          seg_inherit_notsup
  95   95  };
  96   96  
  97   97  
  98   98  /*
  99   99   * Create a kernel/user-mapped segment.
 100  100   */
 101  101  int
 102  102  segumap_create(struct seg *seg, void *argsp)
 103  103  {
 104  104          segumap_crargs_t *a = (struct segumap_crargs *)argsp;
 105  105          segumap_data_t *data;
 106  106  
 107  107          ASSERT((uintptr_t)a->kaddr > _userlimit);
 108  108  
 109  109          /*
 110  110           * Check several aspects of the mapping request to ensure validity:
 111  111           * - kernel pages must reside entirely in kernel space
 112  112           * - target protection must be user-accessible
 113  113           * - kernel address must be page-aligned
 114  114           * - kernel address must reside inside a "safe" segment
 115  115           */
 116  116          if ((uintptr_t)a->kaddr <= _userlimit ||
 117  117              ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
 118  118              (a->prot & PROT_USER) == 0 ||
 119  119              ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
 120  120              !segumap_verify_safe(a->kaddr, seg->s_size)) {
 121  121                  return (EINVAL);
 122  122          }
 123  123  
 124  124          data = kmem_zalloc(sizeof (*data), KM_SLEEP);
 125  125          rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
 126  126          data->sud_kaddr = a->kaddr;
 127  127          data->sud_prot = a->prot;
 128  128  
 129  129          seg->s_ops = &segumap_ops;
 130  130          seg->s_data = data;
 131  131          return (0);
 132  132  }
 133  133  
 134  134  static boolean_t
 135  135  segumap_verify_safe(caddr_t kaddr, size_t len)
 136  136  {
 137  137          struct seg *seg;
 138  138  
 139  139          /*
 140  140           * Presently, only pages which are backed by segkmem are allowed to be
 141  141           * shared with userspace.  This prevents nasty paging behavior with
 142  142           * other drivers such as seg_kp.  Furthermore, the backing kernel
 143  143           * segment must completely contain the region to be mapped.
 144  144           *
 145  145           * Failing these checks is fatal for now since such mappings are done
 146  146           * in a very limited context from the kernel.
 147  147           */
 148  148          AS_LOCK_ENTER(&kas, RW_READER);
 149  149          seg = as_segat(&kas, kaddr);
 150  150          VERIFY(seg != NULL);
 151  151          VERIFY(seg->s_base + seg->s_size >= kaddr + len);
 152  152          VERIFY(seg->s_ops == &segkmem_ops);
 153  153          AS_LOCK_EXIT(&kas);
 154  154  
 155  155          return (B_TRUE);
 156  156  }
 157  157  
 158  158  static int
 159  159  segumap_dup(struct seg *seg, struct seg *newseg)
 160  160  {
 161  161          segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 162  162          segumap_data_t *newsud;
 163  163  
 164  164          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 165  165  
 166  166          newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
 167  167          rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
 168  168          newsud->sud_kaddr = sud->sud_kaddr;
 169  169          newsud->sud_prot = sud->sud_prot;
 170  170  
 171  171          newseg->s_ops = seg->s_ops;
 172  172          newseg->s_data = newsud;
 173  173          return (0);
 174  174  }
 175  175  
 176  176  static int
 177  177  segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
 178  178  {
 179  179          segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 180  180  
 181  181          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 182  182  
 183  183          /* Only allow unmap of entire segment */
 184  184          if (addr != seg->s_base || len != seg->s_size) {
 185  185                  return (EINVAL);
 186  186          }
 187  187          if (sud->sud_softlockcnt != 0) {
 188  188                  return (EAGAIN);
 189  189          }
 190  190  
 191  191          /*
 192  192           * Unconditionally unload the entire segment range.
 193  193           */
 194  194          hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 195  195  
 196  196          seg_free(seg);
 197  197          return (0);
 198  198  }
 199  199  
 200  200  static void
 201  201  segumap_free(struct seg *seg)
 202  202  {
 203  203          segumap_data_t *data = (segumap_data_t *)seg->s_data;
 204  204  
 205  205          ASSERT(data != NULL);
 206  206  
 207  207          rw_destroy(&data->sud_lock);
 208  208          VERIFY(data->sud_softlockcnt == 0);
 209  209          kmem_free(data, sizeof (*data));
 210  210          seg->s_data = NULL;
 211  211  }
 212  212  
 213  213  /* ARGSUSED */
 214  214  static faultcode_t
 215  215  segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
 216  216      enum fault_type type, enum seg_rw tw)
 217  217  {
 218  218          segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 219  219  
 220  220          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 221  221  
 222  222          if (type == F_PROT) {
 223  223                  /*
 224  224                   * Since protection on the segment is fixed, there is nothing
 225  225                   * to do but report an error for protection faults.
 226  226                   */
 227  227                  return (FC_PROT);
 228  228          } else if (type == F_SOFTUNLOCK) {
 229  229                  size_t plen = btop(len);
 230  230  
 231  231                  rw_enter(&sud->sud_lock, RW_WRITER);
 232  232                  VERIFY(sud->sud_softlockcnt >= plen);
 233  233                  sud->sud_softlockcnt -= plen;
 234  234                  rw_exit(&sud->sud_lock);
 235  235                  return (0);
 236  236          }
 237  237  
 238  238          ASSERT(type == F_INVAL || type == F_SOFTLOCK);
 239  239          rw_enter(&sud->sud_lock, RW_WRITER);
 240  240  
 241  241          if (type == F_INVAL ||
 242  242              (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
 243  243                  /*
 244  244                   * Load the (entire) segment into the HAT.
 245  245                   *
 246  246                   * It's possible that threads racing into as_fault will cause
 247  247                   * seg_umap to load the same range multiple times in quick
 248  248                   * succession.  Redundant hat_devload operations are safe.
 249  249                   */
 250  250                  for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
 251  251                          pfn_t pfn;
 252  252  
 253  253                          pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
 254  254                          VERIFY(pfn != PFN_INVALID);
 255  255                          hat_devload(seg->s_as->a_hat, seg->s_base + i,
 256  256                              PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
 257  257                  }
 258  258          }
 259  259          if (type == F_SOFTLOCK) {
 260  260                  size_t nval = sud->sud_softlockcnt + btop(len);
 261  261  
 262  262                  if (sud->sud_softlockcnt >= nval) {
 263  263                          rw_exit(&sud->sud_lock);
 264  264                          return (FC_MAKE_ERR(EOVERFLOW));
 265  265                  }
 266  266                  sud->sud_softlockcnt = nval;
 267  267          }
 268  268  
 269  269          rw_exit(&sud->sud_lock);
 270  270          return (0);
 271  271  }
 272  272  
 273  273  /* ARGSUSED */
 274  274  static faultcode_t
 275  275  segumap_faulta(struct seg *seg, caddr_t addr)
 276  276  {
 277  277          /* Do nothing since asynch pagefault should not load translation. */
 278  278          return (0);
 279  279  }
 280  280  
 281  281  /* ARGSUSED */
 282  282  static int
 283  283  segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 284  284  {
 285  285          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 286  286  
 287  287          /*
 288  288           * The seg_umap driver does not yet allow protection to be changed.
 289  289           */
 290  290          return (EACCES);
 291  291  }
 292  292  
 293  293  /* ARGSUSED */
 294  294  static int
 295  295  segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 296  296  {
 297  297          segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 298  298          int error = 0;
 299  299  
 300  300          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 301  301  
 302  302          rw_enter(&sud->sud_lock, RW_READER);
 303  303          if ((sud->sud_prot & prot) != prot) {
 304  304                  error = EACCES;
 305  305          }
 306  306          rw_exit(&sud->sud_lock);
 307  307          return (error);
 308  308  }
 309  309  
 310  310  /* ARGSUSED */
 311  311  static int
 312  312  segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
 313  313  {
 314  314          /* Always succeed since there are no backing store to sync */
 315  315          return (0);
 316  316  }
 317  317  
 318  318  /* ARGSUSED */
 319  319  static size_t
 320  320  segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
 321  321  {
 322  322          size_t sz = 0;
 323  323  
 324  324          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 325  325  
 326  326          len = (len + PAGEOFFSET) & PAGEMASK;
 327  327          while (len > 0) {
 328  328                  *vec = 1;
 329  329                  sz += PAGESIZE;
 330  330                  vec++;
 331  331                  len -= PAGESIZE;
 332  332          }
 333  333          return (sz);
 334  334  }
 335  335  
 336  336  /* ARGSUSED */
 337  337  static int
 338  338  segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
 339  339      ulong_t *lockmap, size_t pos)
 340  340  {
 341  341          /* Report success since kernel pages are always in memory. */
 342  342          return (0);
 343  343  }
 344  344  
 345  345  static int
 346  346  segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 347  347  {
 348  348          segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 349  349          size_t pgno;
 350  350          uint_t prot;
 351  351  
 352  352          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 353  353  
 354  354          rw_enter(&sud->sud_lock, RW_READER);
 355  355          prot = sud->sud_prot;
 356  356          rw_exit(&sud->sud_lock);
 357  357  
 358  358          /*
 359  359           * Reporting protection is simple since it is not tracked per-page.
 360  360           */
 361  361          pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 362  362          while (pgno > 0) {
 363  363                  protv[--pgno] = prot;
 364  364          }
 365  365          return (0);
 366  366  }
 367  367  
 368  368  /* ARGSUSED */
 369  369  static u_offset_t
 370  370  segumap_getoffset(struct seg *seg, caddr_t addr)
 371  371  {
 372  372          /*
 373  373           * To avoid leaking information about the layout of the kernel address
 374  374           * space, always report '0' as the offset.
 375  375           */
 376  376          return (0);
 377  377  }
 378  378  
 379  379  /* ARGSUSED */
 380  380  static int
 381  381  segumap_gettype(struct seg *seg, caddr_t addr)
 382  382  {
 383  383          /*
 384  384           * Since already-existing kernel pages are being mapped into userspace,
 385  385           * always report the segment type as shared.
 386  386           */
 387  387          return (MAP_SHARED);
 388  388  }
 389  389  
 390  390  /* ARGSUSED */
 391  391  static int
 392  392  segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 393  393  {
 394  394          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 395  395  
 396  396          *vpp = NULL;
 397  397          return (0);
 398  398  }
 399  399  
 400  400  /* ARGSUSED */
 401  401  static int
 402  402  segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
 403  403  {
 404  404          if (behav == MADV_PURGE) {
 405  405                  /* Purge does not make sense for this mapping */
 406  406                  return (EINVAL);
 407  407          }
 408  408          /* Indicate success for everything else. */
 409  409          return (0);
 410  410  }
 411  411  
 412  412  /* ARGSUSED */
 413  413  static void
 414  414  segumap_dump(struct seg *seg)
 415  415  {
 416  416          /*
 417  417           * Since this is a mapping to share kernel data with userspace, nothing
 418  418           * additional should be dumped.
 419  419           */
 420  420  }
 421  421  
 422  422  /* ARGSUSED */
 423  423  static int
 424  424  segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
 425  425      enum lock_type type, enum seg_rw rw)
 426  426  {
 427  427          return (ENOTSUP);
 428  428  }
 429  429  
 430  430  /* ARGSUSED */
 431  431  static int
 432  432  segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
 433  433  {
 434  434          return (ENOTSUP);
 435  435  }
 436  436  
 437  437  static int
 438  438  segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 439  439  {
 440  440          segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 441  441  
 442  442          memidp->val[0] = (uintptr_t)sud->sud_kaddr;
 443  443          memidp->val[1] = (uintptr_t)(addr - seg->s_base);
 444  444          return (0);
 445  445  }
 446  446  
 447  447  /* ARGSUSED */
 448  448  static int
 449  449  segumap_capable(struct seg *seg, segcapability_t capability)
 450  450  {
 451  451          /* no special capablities */
 452  452          return (0);
 453  453  }
  
    | 
      ↓ open down ↓ | 
    453 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX