Print this page




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/errno.h>
  28 #include <sys/param.h>
  29 #include <sys/t_lock.h>
  30 #include <sys/systm.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/debug.h>
  33 #include <sys/time.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/vnode.h>
  36 #include <sys/stat.h>
  37 #include <sys/vfs.h>
  38 #include <sys/cred.h>
  39 #include <sys/kmem.h>
  40 #include <sys/atomic.h>
  41 #include <sys/policy.h>
  42 #include <sys/fs/tmp.h>
  43 #include <sys/fs/tmpnode.h>
  44 #include <sys/ddi.h>
  45 #include <sys/sunddi.h>
  46 #include <vm/anon.h>
  47 
  48 #define KILOBYTE        1024
  49 #define MEGABYTE        (1024 * KILOBYTE)
  50 #define GIGABYTE        (1024 * MEGABYTE)
  51 
  52 #define MODESHIFT       3
  53 
  54 #define VALIDMODEBITS   07777
  55 
  56 extern pgcnt_t swapfs_minfree;
  57 
  58 void *
  59 tmp_kmem_zalloc(struct tmount *tm, size_t size, int flag)
  60 {
  61         void *buf;
  62         zone_t *zone;
  63         size_t pages;
  64 
  65         mutex_enter(&tm->tm_contents);
  66         zone = tm->tm_vfsp->vfs_zone;
  67         if (tm->tm_anonmem + size > tm->tm_anonmax ||
  68             tm->tm_anonmem + size < tm->tm_anonmem ||
  69             size + ptob(tmpfs_minfree) <= size ||
  70             !anon_checkspace(size + ptob(tmpfs_minfree), zone)) {
  71                 mutex_exit(&tm->tm_contents);
  72                 return (NULL);
  73         }
  74 
  75         /*
  76          * Only make anonymous memory reservations when a page boundary is
  77          * crossed.  This is necessary since the anon_resv functions rounds up
  78          * to PAGESIZE internally.
  79          */
  80         pages = btopr(tm->tm_allocmem + size);
  81         pages -= btopr(tm->tm_allocmem);
  82         if (pages > 0 && anon_try_resv_zone(ptob(pages), zone) == 0) {
  83                 mutex_exit(&tm->tm_contents);
  84                 return (NULL);
  85         }
  86 
  87         tm->tm_allocmem += size;
  88         tm->tm_anonmem += size;
  89         mutex_exit(&tm->tm_contents);
  90 
  91         buf = kmem_zalloc(size, flag);
  92         if (buf == NULL) {
  93                 mutex_enter(&tm->tm_contents);
  94                 ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
  95                 tm->tm_anonmem -= size;
  96                 if (pages > 0) {
  97                         /*
  98                          * Re-chasing the zone pointer is necessary since a
  99                          * forced umount could have been performed while the
 100                          * tm_contents lock was dropped during allocation.
 101                          */
 102                         anon_unresv_zone(ptob(pages), tm->tm_vfsp->vfs_zone);
 103                 }
 104                 mutex_exit(&tm->tm_contents);
 105         }
 106 
 107         return (buf);
 108 }
 109 
 110 void
 111 tmp_kmem_free(struct tmount *tm, void *buf, size_t size)
 112 {
 113         size_t pages;
 114 
 115         kmem_free(buf, size);
 116         mutex_enter(&tm->tm_contents);
 117         ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
 118         tm->tm_anonmem -= size;
 119         pages = btopr(tm->tm_allocmem);
 120         tm->tm_allocmem -= size;
 121         pages -= btopr(tm->tm_allocmem);
 122         /*
 123          * Like the tmp_kmem_zalloc case, only unreserve anonymous memory when
 124          * a page boundary has been crossed.
 125          */
 126         if (pages > 0) {
 127                 anon_unresv_zone(size, tm->tm_vfsp->vfs_zone);
 128         }
 129         mutex_exit(&tm->tm_contents);
 130 }
 131 
 132 int
 133 tmp_taccess(void *vtp, int mode, struct cred *cred)
 134 {
 135         struct tmpnode *tp = vtp;
 136         int shift = 0;
 137         /*
 138          * Check access based on owner, group and
 139          * public permissions in tmpnode.
 140          */
 141         if (crgetuid(cred) != tp->tn_uid) {
 142                 shift += MODESHIFT;
 143                 if (groupmember(tp->tn_gid, cred) == 0)
 144                         shift += MODESHIFT;
 145         }
 146 
 147         return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
 148             tp->tn_mode << shift, mode));
 149 }
 150 
 151 /*


 157  * a plain file and you have write access to that file.
 158  * Function returns 0 if remove access is granted.
 159  */
 160 int
 161 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
 162     struct cred *cr)
 163 {
 164         uid_t uid = crgetuid(cr);
 165 
 166         if ((dir->tn_mode & S_ISVTX) &&
 167             uid != dir->tn_uid &&
 168             uid != entry->tn_uid &&
 169             (entry->tn_type != VREG ||
 170             tmp_taccess(entry, VWRITE, cr) != 0))
 171                 return (secpolicy_vnode_remove(cr));
 172 
 173         return (0);
 174 }
 175 
 176 /*
 177  * Convert a string containing a number (number of bytes) to a size_t,
 178  * containing the corresponding number of bytes. On 32-bit kernels, the


































 179  * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
 180  * returned in 'maxpg' is at most ULONG_MAX.
 181  *
 182  * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
 183  * "m" or "M" for megabytes; "g" or "G" for gigabytes.  This interface allows
 184  * for an arguably esoteric interpretation of multiple suffix characters:
 185  * namely, they cascade.  For example, the caller may specify "2mk", which is
 186  * interpreted as 2 gigabytes.  It would seem, at this late stage, that the
 187  * horse has left not only the barn but indeed the country, and possibly the
 188  * entire planetary system. Alternatively, the number may be followed by a
 189  * single '%' sign, indicating the size is a percentage of either the zone's
 190  * swap limit or the system's overall swap size.
 191  *
 192  * Parse and overflow errors are detected and a non-zero number returned on
 193  * error.
 194  */
 195 int
 196 tmp_convnum(char *str, size_t *maxbytes)
 197 {
 198         u_longlong_t num = 0;
 199         u_longlong_t max_bytes = (uint64_t)SIZE_MAX;
 200         size_t pages;
 201 


 202         char *c;
 203         const struct convchar {
 204                 char *cc_char;
 205                 uint64_t cc_factor;
 206         } convchars[] = {
 207                 { "kK", KILOBYTE },
 208                 { "mM", MEGABYTE },
 209                 { "gG", GIGABYTE },
 210                 { NULL, 0 }
 211         };
 212 
 213         if (str == NULL) {
 214                 return (EINVAL);
 215         }
 216         c = str;
 217 
 218         /*
 219          * Convert the initial numeric portion of the input string.
 220          */
 221         if (ddi_strtoull(str, &c, 10, &num) != 0) {


 271                                  */
 272                                 if (num > max_bytes / convchars[i].cc_factor) {
 273                                         return (EINVAL);
 274                                 }
 275 
 276                                 num *= convchars[i].cc_factor;
 277                                 goto valid_char;
 278                         }
 279                 }
 280 
 281                 /*
 282                  * This was not a valid multiplier suffix character.
 283                  */
 284                 return (EINVAL);
 285 
 286 valid_char:
 287                 continue;
 288         }
 289 
 290 done:
 291 
 292         /*
 293          * We've been given a size in bytes; however, we want to make sure that
 294          * we have at least one page worth no matter what. Therefore we use
 295          * btopr to round up. However, this may cause an overflow only if 'num'
 296          * is between (max_bytes - PAGESIZE) and (max_bytes). In this case the
 297          * resulting number is zero, which is what we check for below. Note, we
 298          * require at least one page, so if pages is zero, well, it wasn't going
 299          * to work anyways.
 300          */
 301         pages = btopr(num);
 302         if (pages == 0) {
 303                 return (EINVAL);
 304         }
 305 
 306         *maxbytes = ptob(pages);
 307 
 308         return (0);
 309 }
 310 
 311 /*
 312  * Parse an octal mode string for use as the permissions set for the root
 313  * of the tmpfs mount.
 314  */
 315 int
 316 tmp_convmode(char *str, mode_t *mode)
 317 {
 318         ulong_t num;
 319         char *c;
 320 
 321         if (str == NULL) {
 322                 return (EINVAL);
 323         }
 324 
 325         if (ddi_strtoul(str, &c, 8, &num) != 0) {
 326                 return (EINVAL);
 327         }


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Joyent, Inc.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/errno.h>
  28 #include <sys/param.h>
  29 #include <sys/t_lock.h>
  30 #include <sys/systm.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/debug.h>
  33 #include <sys/time.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/vnode.h>
  36 #include <sys/stat.h>
  37 #include <sys/vfs.h>
  38 #include <sys/cred.h>
  39 #include <sys/kmem.h>
  40 #include <sys/atomic.h>
  41 #include <sys/policy.h>
  42 #include <sys/fs/tmp.h>
  43 #include <sys/fs/tmpnode.h>
  44 #include <sys/ddi.h>
  45 #include <sys/sunddi.h>

  46 
  47 #define KILOBYTE        1024
  48 #define MEGABYTE        (1024 * KILOBYTE)
  49 #define GIGABYTE        (1024 * MEGABYTE)
  50 
  51 #define MODESHIFT       3
  52 
  53 #define VALIDMODEBITS   07777
  54 
  55 extern pgcnt_t swapfs_minfree;
  56 










































































  57 int
  58 tmp_taccess(void *vtp, int mode, struct cred *cred)
  59 {
  60         struct tmpnode *tp = vtp;
  61         int shift = 0;
  62         /*
  63          * Check access based on owner, group and
  64          * public permissions in tmpnode.
  65          */
  66         if (crgetuid(cred) != tp->tn_uid) {
  67                 shift += MODESHIFT;
  68                 if (groupmember(tp->tn_gid, cred) == 0)
  69                         shift += MODESHIFT;
  70         }
  71 
  72         return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
  73             tp->tn_mode << shift, mode));
  74 }
  75 
  76 /*


  82  * a plain file and you have write access to that file.
  83  * Function returns 0 if remove access is granted.
  84  */
  85 int
  86 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
  87         struct cred *cr)
  88 {
  89         uid_t uid = crgetuid(cr);
  90 
  91         if ((dir->tn_mode & S_ISVTX) &&
  92             uid != dir->tn_uid &&
  93             uid != entry->tn_uid &&
  94             (entry->tn_type != VREG ||
  95             tmp_taccess(entry, VWRITE, cr) != 0))
  96                 return (secpolicy_vnode_remove(cr));
  97 
  98         return (0);
  99 }
 100 
 101 /*
 102  * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
 103  * or the 'musthave' flag is set.  'musthave' allocations should
 104  * always be subordinate to normal allocations so that tmpfs_maxkmem
 105  * can't be exceeded by more than a few KB.  Example: when creating
 106  * a new directory, the tmpnode is a normal allocation; if that
 107  * succeeds, the dirents for "." and ".." are 'musthave' allocations.
 108  */
 109 void *
 110 tmp_memalloc(size_t size, int musthave)
 111 {
 112         static time_t last_warning;
 113         time_t now;
 114 
 115         if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
 116             musthave)
 117                 return (kmem_zalloc(size, KM_SLEEP));
 118 
 119         atomic_add_long(&tmp_kmemspace, -size);
 120         now = gethrestime_sec();
 121         if (last_warning != now) {
 122                 last_warning = now;
 123                 cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
 124         }
 125         return (NULL);
 126 }
 127 
 128 void
 129 tmp_memfree(void *cp, size_t size)
 130 {
 131         kmem_free(cp, size);
 132         atomic_add_long(&tmp_kmemspace, -size);
 133 }
 134 
 135 /*
 136  * Convert a string containing a number (number of bytes) to a pgcnt_t,
 137  * containing the corresponding number of pages. On 32-bit kernels, the
 138  * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
 139  * returned in 'maxpg' is at most ULONG_MAX.
 140  *
 141  * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
 142  * "m" or "M" for megabytes; "g" or "G" for gigabytes.  This interface allows
 143  * for an arguably esoteric interpretation of multiple suffix characters:
 144  * namely, they cascade.  For example, the caller may specify "2mk", which is
 145  * interpreted as 2 gigabytes.  It would seem, at this late stage, that the
 146  * horse has left not only the barn but indeed the country, and possibly the
 147  * entire planetary system. Alternatively, the number may be followed by a
 148  * single '%' sign, indicating the size is a percentage of either the zone's
 149  * swap limit or the system's overall swap size.
 150  *
 151  * Parse and overflow errors are detected and a non-zero number returned on
 152  * error.
 153  */
 154 int
 155 tmp_convnum(char *str, pgcnt_t *maxpg)
 156 {
 157         u_longlong_t num = 0;
 158 #ifdef _LP64
 159         u_longlong_t max_bytes = ULONG_MAX;
 160 #else
 161         u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
 162 #endif
 163         char *c;
 164         const struct convchar {
 165                 char *cc_char;
 166                 uint64_t cc_factor;
 167         } convchars[] = {
 168                 { "kK", KILOBYTE },
 169                 { "mM", MEGABYTE },
 170                 { "gG", GIGABYTE },
 171                 { NULL, 0 }
 172         };
 173 
 174         if (str == NULL) {
 175                 return (EINVAL);
 176         }
 177         c = str;
 178 
 179         /*
 180          * Convert the initial numeric portion of the input string.
 181          */
 182         if (ddi_strtoull(str, &c, 10, &num) != 0) {


 232                                  */
 233                                 if (num > max_bytes / convchars[i].cc_factor) {
 234                                         return (EINVAL);
 235                                 }
 236 
 237                                 num *= convchars[i].cc_factor;
 238                                 goto valid_char;
 239                         }
 240                 }
 241 
 242                 /*
 243                  * This was not a valid multiplier suffix character.
 244                  */
 245                 return (EINVAL);
 246 
 247 valid_char:
 248                 continue;
 249         }
 250 
 251 done:

 252         /*
 253          * Since btopr() rounds up to page granularity, this round-up can
 254          * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
 255          * and (max_bytes). In this case the resulting number is zero, which
 256          * is what we check for below.



 257          */
 258         if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)

 259                 return (EINVAL);




 260         return (0);
 261 }
 262 
 263 /*
 264  * Parse an octal mode string for use as the permissions set for the root
 265  * of the tmpfs mount.
 266  */
 267 int
 268 tmp_convmode(char *str, mode_t *mode)
 269 {
 270         ulong_t num;
 271         char *c;
 272 
 273         if (str == NULL) {
 274                 return (EINVAL);
 275         }
 276 
 277         if (ddi_strtoul(str, &c, 8, &num) != 0) {
 278                 return (EINVAL);
 279         }