Print this page




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Joyent, Inc.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * University Copyright- Copyright (c) 1982, 1986, 1988
  33  * The Regents of the University of California
  34  * All Rights Reserved
  35  *
  36  * University Acknowledgment- Portions of this document are derived from
  37  * software developed by the University of California, Berkeley, and its
  38  * contributors.
  39  */
  40 
  41 /*
  42  * VM - address spaces.
  43  */
  44 


  55 #include <sys/vnode.h>
  56 #include <sys/vmsystm.h>
  57 #include <sys/cmn_err.h>
  58 #include <sys/debug.h>
  59 #include <sys/tnf_probe.h>
  60 #include <sys/vtrace.h>
  61 #include <sys/ddi.h>
  62 
  63 #include <vm/hat.h>
  64 #include <vm/as.h>
  65 #include <vm/seg.h>
  66 #include <vm/seg_vn.h>
  67 #include <vm/seg_dev.h>
  68 #include <vm/seg_kmem.h>
  69 #include <vm/seg_map.h>
  70 #include <vm/seg_spt.h>
  71 #include <vm/page.h>
  72 
  73 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
  74 
  75 ulong_t as_user_seg_limit = 0xffff; /* max segments in an (non-kas) AS */
  76 
  77 static struct kmem_cache *as_cache;
  78 
  79 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  80 static void as_clearwatchprot(struct as *, caddr_t, size_t);
  81 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  82 
  83 
  84 /*
  85  * Verifying the segment lists is very time-consuming; it may not be
  86  * desirable always to define VERIFY_SEGLIST when DEBUG is set.
  87  */
  88 #ifdef DEBUG
  89 #define VERIFY_SEGLIST
  90 int do_as_verify = 0;
  91 #endif
  92 
  93 /*
  94  * Allocate a new callback data structure entry and fill in the events of
  95  * interest, the address range of interest, and the callback argument.
  96  * Link the entry on the as->a_callbacks list. A callback entry for the


 874 
 875         case F_SOFTLOCK:
 876                 CPU_STATS_ADD_K(vm, softlock, 1);
 877                 break;
 878 
 879         case F_SOFTUNLOCK:
 880                 break;
 881 
 882         case F_PROT:
 883                 CPU_STATS_ADD_K(vm, prot_fault, 1);
 884                 break;
 885 
 886         case F_INVAL:
 887                 CPU_STATS_ENTER_K();
 888                 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 889                 if (as == &kas)
 890                         CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 891                 CPU_STATS_EXIT_K();
 892                 if (zonep->zone_pg_flt_delay != 0) {
 893                         /*
 894                          * The zone in which this process is running is
 895                          * currently over it's physical memory cap. Throttle
 896                          * page faults to help the user-land memory capper
 897                          * catch up. Note that drv_usectohz() rounds up.

 898                          */
 899                         atomic_add_64(&zonep->zone_pf_throttle, 1);
 900                         atomic_add_64(&zonep->zone_pf_throttle_usec,
 901                             zonep->zone_pg_flt_delay);
 902                         if (zonep->zone_pg_flt_delay < TICK_TO_USEC(1)) {
 903                                 drv_usecwait(zonep->zone_pg_flt_delay);
 904                         } else {
 905                                 delay(drv_usectohz(zonep->zone_pg_flt_delay));
 906                         }
 907                 }
 908                 break;
 909         }
 910 
 911         /* Kernel probe */
 912         TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 913             tnf_opaque, address,        addr,
 914             tnf_fault_type,     fault_type,     type,
 915             tnf_seg_access,     access,         rw);
 916 
 917         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 918         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 919             (size_t)raddr;
 920 
 921         /*
 922          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 923          * correctness, but then we could be stuck holding this lock for
 924          * a LONG time if the fault needs to be resolved on a slow
 925          * filesystem, and then no-one will be able to exec new commands,
 926          * as exec'ing requires the write lock on the as.
 927          */


1694          * fallback to curproc in that case.
1695          */
1696         struct proc *p = (as->a_proc == NULL) ? curproc : as->a_proc;
1697         struct segvn_crargs crargs;
1698 
1699         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1700         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1701             (size_t)raddr;
1702 
1703         /*
1704          * check for wrap around
1705          */
1706         if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1707                 AS_LOCK_EXIT(as);
1708                 return (ENOMEM);
1709         }
1710 
1711         as->a_updatedir = 1; /* inform /proc */
1712         gethrestime(&as->a_updatetime);
1713 
1714         if (as != &kas) {
1715                 if (as->a_size + rsize > (size_t)p->p_vmem_ctl) {
1716                         AS_LOCK_EXIT(as);
1717 
1718                         (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM],
1719                             p->p_rctls, p, RCA_UNSAFE_ALL);
1720                         return (ENOMEM);
1721                 }
1722 
1723                 /*
1724                  * Keep the number of segments in a userspace AS constrained to
1725                  * a reasonable limit.  Linux enforces a value slightly less
1726                  * than 64k in order to avoid ELF limits if/when a process
1727                  * dumps core.  While SunOS avoids that specific problem with
1728                  * other tricks, the limit is still valuable to keep kernel
1729                  * memory consumption in check.
1730                  */
1731                 if (avl_numnodes(&as->a_segtree) >= as_user_seg_limit) {
1732                         AS_LOCK_EXIT(as);
1733                         atomic_inc_32(&p->p_zone->zone_mfseglim);
1734                         return (ENOMEM);
1735                 }
1736         }
1737 
1738         if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1739                 crargs = *(struct segvn_crargs *)argsp;
1740                 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1741                 if (error != 0) {
1742                         AS_LOCK_EXIT(as);
1743                         if (unmap) {
1744                                 (void) as_unmap(as, addr, size);
1745                         }
1746                         return (error);
1747                 }
1748         } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1749                 crargs = *(struct segvn_crargs *)argsp;
1750                 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1751                 if (error != 0) {
1752                         AS_LOCK_EXIT(as);
1753                         if (unmap) {
1754                                 (void) as_unmap(as, addr, size);
1755                         }
1756                         return (error);




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2015, Joyent, Inc.  All rights reserved.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * University Copyright- Copyright (c) 1982, 1986, 1988
  33  * The Regents of the University of California
  34  * All Rights Reserved
  35  *
  36  * University Acknowledgment- Portions of this document are derived from
  37  * software developed by the University of California, Berkeley, and its
  38  * contributors.
  39  */
  40 
  41 /*
  42  * VM - address spaces.
  43  */
  44 


  55 #include <sys/vnode.h>
  56 #include <sys/vmsystm.h>
  57 #include <sys/cmn_err.h>
  58 #include <sys/debug.h>
  59 #include <sys/tnf_probe.h>
  60 #include <sys/vtrace.h>
  61 #include <sys/ddi.h>
  62 
  63 #include <vm/hat.h>
  64 #include <vm/as.h>
  65 #include <vm/seg.h>
  66 #include <vm/seg_vn.h>
  67 #include <vm/seg_dev.h>
  68 #include <vm/seg_kmem.h>
  69 #include <vm/seg_map.h>
  70 #include <vm/seg_spt.h>
  71 #include <vm/page.h>
  72 
  73 clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
  74 


  75 static struct kmem_cache *as_cache;
  76 
  77 static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t);
  78 static void as_clearwatchprot(struct as *, caddr_t, size_t);
  79 int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *);
  80 
  81 
  82 /*
  83  * Verifying the segment lists is very time-consuming; it may not be
  84  * desirable always to define VERIFY_SEGLIST when DEBUG is set.
  85  */
  86 #ifdef DEBUG
  87 #define VERIFY_SEGLIST
  88 int do_as_verify = 0;
  89 #endif
  90 
  91 /*
  92  * Allocate a new callback data structure entry and fill in the events of
  93  * interest, the address range of interest, and the callback argument.
  94  * Link the entry on the as->a_callbacks list. A callback entry for the


 872 
 873         case F_SOFTLOCK:
 874                 CPU_STATS_ADD_K(vm, softlock, 1);
 875                 break;
 876 
 877         case F_SOFTUNLOCK:
 878                 break;
 879 
 880         case F_PROT:
 881                 CPU_STATS_ADD_K(vm, prot_fault, 1);
 882                 break;
 883 
 884         case F_INVAL:
 885                 CPU_STATS_ENTER_K();
 886                 CPU_STATS_ADDQ(CPU, vm, as_fault, 1);
 887                 if (as == &kas)
 888                         CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
 889                 CPU_STATS_EXIT_K();
 890                 if (zonep->zone_pg_flt_delay != 0) {
 891                         /*
 892                          * The zone in which this process is running
 893                          * is currently over it's physical memory cap.
 894                          * Throttle page faults to help the user-land
 895                          * memory capper catch up. Note that
 896                          * drv_usectohz() rounds up.
 897                          */
 898                         atomic_add_64(&zonep->zone_pf_throttle, 1);
 899                         atomic_add_64(&zonep->zone_pf_throttle_usec,
 900                             zonep->zone_pg_flt_delay);
 901                         if (zonep->zone_pg_flt_delay < TICK_TO_USEC(1))
 902                                 drv_usecwait(zonep->zone_pg_flt_delay);
 903                         else
 904                                 delay(drv_usectohz(zonep->zone_pg_flt_delay));
 905                 }

 906                 break;
 907         }
 908 
 909         /* Kernel probe */
 910         TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */,
 911             tnf_opaque, address,        addr,
 912             tnf_fault_type,     fault_type,     type,
 913             tnf_seg_access,     access,         rw);
 914 
 915         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
 916         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
 917             (size_t)raddr;
 918 
 919         /*
 920          * XXX -- Don't grab the as lock for segkmap. We should grab it for
 921          * correctness, but then we could be stuck holding this lock for
 922          * a LONG time if the fault needs to be resolved on a slow
 923          * filesystem, and then no-one will be able to exec new commands,
 924          * as exec'ing requires the write lock on the as.
 925          */


1692          * fallback to curproc in that case.
1693          */
1694         struct proc *p = (as->a_proc == NULL) ? curproc : as->a_proc;
1695         struct segvn_crargs crargs;
1696 
1697         raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1698         rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) -
1699             (size_t)raddr;
1700 
1701         /*
1702          * check for wrap around
1703          */
1704         if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) {
1705                 AS_LOCK_EXIT(as);
1706                 return (ENOMEM);
1707         }
1708 
1709         as->a_updatedir = 1; /* inform /proc */
1710         gethrestime(&as->a_updatetime);
1711 
1712         if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) {

1713                 AS_LOCK_EXIT(as);
1714 
1715                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1716                     RCA_UNSAFE_ALL);


1717 











1718                 return (ENOMEM);
1719         }

1720 
1721         if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) {
1722                 crargs = *(struct segvn_crargs *)argsp;
1723                 error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap);
1724                 if (error != 0) {
1725                         AS_LOCK_EXIT(as);
1726                         if (unmap) {
1727                                 (void) as_unmap(as, addr, size);
1728                         }
1729                         return (error);
1730                 }
1731         } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) {
1732                 crargs = *(struct segvn_crargs *)argsp;
1733                 error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap);
1734                 if (error != 0) {
1735                         AS_LOCK_EXIT(as);
1736                         if (unmap) {
1737                                 (void) as_unmap(as, addr, size);
1738                         }
1739                         return (error);