1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2016 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * VM - Kernel-to-user mapping segment
  18  *
  19  * The umap segment driver was primarily designed to facilitate the comm page:
  20  * a portion of kernel memory shared with userspace so that certain (namely
  21  * clock-related) actions could operate without making an expensive trip into
  22  * the kernel.
  23  *
  24  * Since the initial requirements for the comm page are slim, advanced features
  25  * of the segment driver such as per-page protection have been left
  26  * unimplemented at this time.
  27  */
  28 
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/errno.h>
  33 #include <sys/cred.h>
  34 #include <sys/kmem.h>
  35 #include <sys/lgrp.h>
  36 #include <sys/mman.h>
  37 
  38 #include <vm/hat.h>
  39 #include <vm/as.h>
  40 #include <vm/seg.h>
  41 #include <vm/seg_kmem.h>
  42 #include <vm/seg_umap.h>
  43 
  44 
  45 static boolean_t segumap_verify_safe(caddr_t, size_t);
  46 static int segumap_dup(struct seg *, struct seg *);
  47 static int segumap_unmap(struct seg *, caddr_t, size_t);
  48 static void segumap_free(struct seg *);
  49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
  50     enum fault_type, enum seg_rw);
  51 static faultcode_t segumap_faulta(struct seg *, caddr_t);
  52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
  53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
  54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
  55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
  56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
  57     size_t);
  58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
  59 static u_offset_t segumap_getoffset(struct seg *, caddr_t);
  60 static int segumap_gettype(struct seg *, caddr_t);
  61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
  62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
  63 static void segumap_dump(struct seg *);
  64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
  65     enum lock_type, enum seg_rw);
  66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
  67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
  68 static int segumap_capable(struct seg *, segcapability_t);
  69 
  70 static struct seg_ops segumap_ops = {
  71         segumap_dup,
  72         segumap_unmap,
  73         segumap_free,
  74         segumap_fault,
  75         segumap_faulta,
  76         segumap_setprot,
  77         segumap_checkprot,
  78         NULL,                   /* kluster: disabled */
  79         NULL,                   /* swapout: disabled */
  80         segumap_sync,
  81         segumap_incore,
  82         segumap_lockop,
  83         segumap_getprot,
  84         segumap_getoffset,
  85         segumap_gettype,
  86         segumap_getvp,
  87         segumap_advise,
  88         segumap_dump,
  89         segumap_pagelock,
  90         segumap_setpagesize,
  91         segumap_getmemid,
  92         NULL,                   /* getpolicy: disabled */
  93         segumap_capable,
  94         seg_inherit_notsup
  95 };
  96 
  97 
  98 /*
  99  * Create a kernel/user-mapped segment.
 100  */
 101 int
 102 segumap_create(struct seg *seg, void *argsp)
 103 {
 104         segumap_crargs_t *a = (struct segumap_crargs *)argsp;
 105         segumap_data_t *data;
 106 
 107         ASSERT((uintptr_t)a->kaddr > _userlimit);
 108 
 109         /*
 110          * Check several aspects of the mapping request to ensure validity:
 111          * - kernel pages must reside entirely in kernel space
 112          * - target protection must be user-accessible
 113          * - kernel address must be page-aligned
 114          * - kernel address must reside inside a "safe" segment
 115          */
 116         if ((uintptr_t)a->kaddr <= _userlimit ||
 117             ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
 118             (a->prot & PROT_USER) == 0 ||
 119             ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
 120             !segumap_verify_safe(a->kaddr, seg->s_size)) {
 121                 return (EINVAL);
 122         }
 123 
 124         data = kmem_zalloc(sizeof (*data), KM_SLEEP);
 125         rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
 126         data->sud_kaddr = a->kaddr;
 127         data->sud_prot = a->prot;
 128 
 129         seg->s_ops = &segumap_ops;
 130         seg->s_data = data;
 131         return (0);
 132 }
 133 
 134 static boolean_t
 135 segumap_verify_safe(caddr_t kaddr, size_t len)
 136 {
 137         struct seg *seg;
 138 
 139         /*
 140          * Presently, only pages which are backed by segkmem are allowed to be
 141          * shared with userspace.  This prevents nasty paging behavior with
 142          * other drivers such as seg_kp.  Furthermore, the backing kernel
 143          * segment must completely contain the region to be mapped.
 144          *
 145          * Failing these checks is fatal for now since such mappings are done
 146          * in a very limited context from the kernel.
 147          */
 148         AS_LOCK_ENTER(&kas, RW_READER);
 149         seg = as_segat(&kas, kaddr);
 150         VERIFY(seg != NULL);
 151         VERIFY(seg->s_base + seg->s_size >= kaddr + len);
 152         VERIFY(seg->s_ops == &segkmem_ops);
 153         AS_LOCK_EXIT(&kas);
 154 
 155         return (B_TRUE);
 156 }
 157 
 158 static int
 159 segumap_dup(struct seg *seg, struct seg *newseg)
 160 {
 161         segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 162         segumap_data_t *newsud;
 163 
 164         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 165 
 166         newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
 167         rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
 168         newsud->sud_kaddr = sud->sud_kaddr;
 169         newsud->sud_prot = sud->sud_prot;
 170 
 171         newseg->s_ops = seg->s_ops;
 172         newseg->s_data = newsud;
 173         return (0);
 174 }
 175 
 176 static int
 177 segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
 178 {
 179         segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 180 
 181         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 182 
 183         /* Only allow unmap of entire segment */
 184         if (addr != seg->s_base || len != seg->s_size) {
 185                 return (EINVAL);
 186         }
 187         if (sud->sud_softlockcnt != 0) {
 188                 return (EAGAIN);
 189         }
 190 
 191         /*
 192          * Unconditionally unload the entire segment range.
 193          */
 194         hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 195 
 196         seg_free(seg);
 197         return (0);
 198 }
 199 
 200 static void
 201 segumap_free(struct seg *seg)
 202 {
 203         segumap_data_t *data = (segumap_data_t *)seg->s_data;
 204 
 205         ASSERT(data != NULL);
 206 
 207         rw_destroy(&data->sud_lock);
 208         VERIFY(data->sud_softlockcnt == 0);
 209         kmem_free(data, sizeof (*data));
 210         seg->s_data = NULL;
 211 }
 212 
 213 /* ARGSUSED */
 214 static faultcode_t
 215 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
 216     enum fault_type type, enum seg_rw tw)
 217 {
 218         segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 219 
 220         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 221 
 222         if (type == F_PROT) {
 223                 /*
 224                  * Since protection on the segment is fixed, there is nothing
 225                  * to do but report an error for protection faults.
 226                  */
 227                 return (FC_PROT);
 228         } else if (type == F_SOFTUNLOCK) {
 229                 size_t plen = btop(len);
 230 
 231                 rw_enter(&sud->sud_lock, RW_WRITER);
 232                 VERIFY(sud->sud_softlockcnt >= plen);
 233                 sud->sud_softlockcnt -= plen;
 234                 rw_exit(&sud->sud_lock);
 235                 return (0);
 236         }
 237 
 238         ASSERT(type == F_INVAL || type == F_SOFTLOCK);
 239         rw_enter(&sud->sud_lock, RW_WRITER);
 240 
 241         if (type == F_INVAL ||
 242             (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
 243                 /*
 244                  * Load the (entire) segment into the HAT.
 245                  *
 246                  * It's possible that threads racing into as_fault will cause
 247                  * seg_umap to load the same range multiple times in quick
 248                  * succession.  Redundant hat_devload operations are safe.
 249                  */
 250                 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
 251                         pfn_t pfn;
 252 
 253                         pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
 254                         VERIFY(pfn != PFN_INVALID);
 255                         hat_devload(seg->s_as->a_hat, seg->s_base + i,
 256                             PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
 257                 }
 258         }
 259         if (type == F_SOFTLOCK) {
 260                 size_t nval = sud->sud_softlockcnt + btop(len);
 261 
 262                 if (sud->sud_softlockcnt >= nval) {
 263                         rw_exit(&sud->sud_lock);
 264                         return (FC_MAKE_ERR(EOVERFLOW));
 265                 }
 266                 sud->sud_softlockcnt = nval;
 267         }
 268 
 269         rw_exit(&sud->sud_lock);
 270         return (0);
 271 }
 272 
 273 /* ARGSUSED */
 274 static faultcode_t
 275 segumap_faulta(struct seg *seg, caddr_t addr)
 276 {
 277         /* Do nothing since asynch pagefault should not load translation. */
 278         return (0);
 279 }
 280 
 281 /* ARGSUSED */
 282 static int
 283 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 284 {
 285         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 286 
 287         /*
 288          * The seg_umap driver does not yet allow protection to be changed.
 289          */
 290         return (EACCES);
 291 }
 292 
 293 /* ARGSUSED */
 294 static int
 295 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 296 {
 297         segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 298         int error = 0;
 299 
 300         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 301 
 302         rw_enter(&sud->sud_lock, RW_READER);
 303         if ((sud->sud_prot & prot) != prot) {
 304                 error = EACCES;
 305         }
 306         rw_exit(&sud->sud_lock);
 307         return (error);
 308 }
 309 
 310 /* ARGSUSED */
 311 static int
 312 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
 313 {
 314         /* Always succeed since there are no backing store to sync */
 315         return (0);
 316 }
 317 
 318 /* ARGSUSED */
 319 static size_t
 320 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
 321 {
 322         size_t sz = 0;
 323 
 324         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 325 
 326         len = (len + PAGEOFFSET) & PAGEMASK;
 327         while (len > 0) {
 328                 *vec = 1;
 329                 sz += PAGESIZE;
 330                 vec++;
 331                 len -= PAGESIZE;
 332         }
 333         return (sz);
 334 }
 335 
 336 /* ARGSUSED */
 337 static int
 338 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
 339     ulong_t *lockmap, size_t pos)
 340 {
 341         /* Report success since kernel pages are always in memory. */
 342         return (0);
 343 }
 344 
 345 static int
 346 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 347 {
 348         segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 349         size_t pgno;
 350         uint_t prot;
 351 
 352         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 353 
 354         rw_enter(&sud->sud_lock, RW_READER);
 355         prot = sud->sud_prot;
 356         rw_exit(&sud->sud_lock);
 357 
 358         /*
 359          * Reporting protection is simple since it is not tracked per-page.
 360          */
 361         pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 362         while (pgno > 0) {
 363                 protv[--pgno] = prot;
 364         }
 365         return (0);
 366 }
 367 
 368 /* ARGSUSED */
 369 static u_offset_t
 370 segumap_getoffset(struct seg *seg, caddr_t addr)
 371 {
 372         /*
 373          * To avoid leaking information about the layout of the kernel address
 374          * space, always report '0' as the offset.
 375          */
 376         return (0);
 377 }
 378 
 379 /* ARGSUSED */
 380 static int
 381 segumap_gettype(struct seg *seg, caddr_t addr)
 382 {
 383         /*
 384          * Since already-existing kernel pages are being mapped into userspace,
 385          * always report the segment type as shared.
 386          */
 387         return (MAP_SHARED);
 388 }
 389 
 390 /* ARGSUSED */
 391 static int
 392 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 393 {
 394         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 395 
 396         *vpp = NULL;
 397         return (0);
 398 }
 399 
 400 /* ARGSUSED */
 401 static int
 402 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
 403 {
 404         if (behav == MADV_PURGE) {
 405                 /* Purge does not make sense for this mapping */
 406                 return (EINVAL);
 407         }
 408         /* Indicate success for everything else. */
 409         return (0);
 410 }
 411 
 412 /* ARGSUSED */
 413 static void
 414 segumap_dump(struct seg *seg)
 415 {
 416         /*
 417          * Since this is a mapping to share kernel data with userspace, nothing
 418          * additional should be dumped.
 419          */
 420 }
 421 
 422 /* ARGSUSED */
 423 static int
 424 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
 425     enum lock_type type, enum seg_rw rw)
 426 {
 427         return (ENOTSUP);
 428 }
 429 
 430 /* ARGSUSED */
 431 static int
 432 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
 433 {
 434         return (ENOTSUP);
 435 }
 436 
 437 static int
 438 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 439 {
 440         segumap_data_t *sud = (segumap_data_t *)seg->s_data;
 441 
 442         memidp->val[0] = (uintptr_t)sud->sud_kaddr;
 443         memidp->val[1] = (uintptr_t)(addr - seg->s_base);
 444         return (0);
 445 }
 446 
 447 /* ARGSUSED */
 448 static int
 449 segumap_capable(struct seg *seg, segcapability_t capability)
 450 {
 451         /* no special capablities */
 452         return (0);
 453 }