Print this page
Reduce lint
OS-4060 need tmpfs size support in percent
OS-4061 invalid tmpfs option causes panic
OS-4043 tmpfs should support gigabyte sizes
OS-4044 tmpfs should support "mode" option
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.

  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/errno.h>
  27 #include <sys/param.h>
  28 #include <sys/t_lock.h>
  29 #include <sys/systm.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/debug.h>
  32 #include <sys/time.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/vnode.h>
  35 #include <sys/stat.h>
  36 #include <sys/vfs.h>
  37 #include <sys/cred.h>
  38 #include <sys/kmem.h>
  39 #include <sys/atomic.h>
  40 #include <sys/policy.h>
  41 #include <sys/fs/tmp.h>
  42 #include <sys/fs/tmpnode.h>


  43 




  44 #define MODESHIFT       3
  45 




  46 int
  47 tmp_taccess(void *vtp, int mode, struct cred *cred)
  48 {
  49         struct tmpnode *tp = vtp;
  50         int shift = 0;
  51         /*
  52          * Check access based on owner, group and
  53          * public permissions in tmpnode.
  54          */
  55         if (crgetuid(cred) != tp->tn_uid) {
  56                 shift += MODESHIFT;
  57                 if (groupmember(tp->tn_gid, cred) == 0)
  58                         shift += MODESHIFT;
  59         }
  60 
  61         return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
  62             tp->tn_mode << shift, mode));
  63 }
  64 
  65 /*
  66  * Decide whether it is okay to remove within a sticky directory.
  67  * Two conditions need to be met:  write access to the directory
  68  * is needed.  In sticky directories, write access is not sufficient;
  69  * you can remove entries from a directory only if you own the directory,
  70  * if you are privileged, if you own the entry or if they entry is
  71  * a plain file and you have write access to that file.
  72  * Function returns 0 if remove access is granted.
  73  */
  74 
  75 int
  76 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
  77         struct cred *cr)
  78 {
  79         uid_t uid = crgetuid(cr);
  80 
  81         if ((dir->tn_mode & S_ISVTX) &&
  82             uid != dir->tn_uid &&
  83             uid != entry->tn_uid &&
  84             (entry->tn_type != VREG ||
  85             tmp_taccess(entry, VWRITE, cr) != 0))
  86                 return (secpolicy_vnode_remove(cr));
  87 
  88         return (0);
  89 }
  90 
  91 /*
  92  * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
  93  * or the 'musthave' flag is set.  'musthave' allocations should
  94  * always be subordinate to normal allocations so that tmpfs_maxkmem


 111         if (last_warning != now) {
 112                 last_warning = now;
 113                 cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
 114         }
 115         return (NULL);
 116 }
 117 
 118 void
 119 tmp_memfree(void *cp, size_t size)
 120 {
 121         kmem_free(cp, size);
 122         atomic_add_long(&tmp_kmemspace, -size);
 123 }
 124 
 125 /*
 126  * Convert a string containing a number (number of bytes) to a pgcnt_t,
 127  * containing the corresponding number of pages. On 32-bit kernels, the
 128  * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
 129  * returned in 'maxpg' is at most ULONG_MAX.
 130  *
 131  * If the number is followed by a "k" or "K", the value is converted from
 132  * kilobytes to bytes.  If it is followed by an "m" or "M" it is converted
 133  * from megabytes to bytes.  If it is not followed by a character it is
 134  * assumed to be in bytes. Multiple letter options are allowed, so for instance
 135  * '2mk' is interpreted as 2gb.




 136  *
 137  * Parse and overflow errors are detected and a non-zero number returned on
 138  * error.
 139  */
 140 
 141 int
 142 tmp_convnum(char *str, pgcnt_t *maxpg)
 143 {
 144         uint64_t num = 0, oldnum;
 145 #ifdef _LP64
 146         uint64_t max_bytes = ULONG_MAX;
 147 #else
 148         uint64_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
 149 #endif
 150         char *c;









 151 
 152         if (str == NULL)
 153                 return (EINVAL);

 154         c = str;
 155 
 156         /*
 157          * Convert str to number
 158          */
 159         while ((*c >= '0') && (*c <= '9')) {
 160                 oldnum = num;
 161                 num = num * 10 + (*c++ - '0');
 162                 if (oldnum > num) /* overflow */
 163                         return (EINVAL);
 164         }
 165 
 166         /*
 167          * Terminate on null



 168          */
 169         while (*c != '\0') {
 170                 switch (*c++) {
 171 








 172                 /*
 173                  * convert from kilobytes
 174                  */
 175                 case 'k':
 176                 case 'K':
 177                         if (num > max_bytes / 1024) /* will overflow */
 178                                 return (EINVAL);
 179                         num *= 1024;
 180                         break;
 181 




 182                 /*
 183                  * convert from megabytes

 184                  */
 185                 case 'm':
 186                 case 'M':
 187                         if (num > max_bytes / (1024 * 1024)) /* will overflow */
 188                                 return (EINVAL);
 189                         num *= 1024 * 1024;
 190                         break;
 191 
 192                 default:









 193                         return (EINVAL);
 194                 }



 195         }

 196 
 197         /*










 198          * Since btopr() rounds up to page granularity, this round-up can
 199          * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
 200          * and (max_bytes). In this case the resulting number is zero, which
 201          * is what we check for below.
 202          */
 203         if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
 204                 return (EINVAL);
 205         return (0);
 206 }




























   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 Joyent, Inc.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/errno.h>
  28 #include <sys/param.h>
  29 #include <sys/t_lock.h>
  30 #include <sys/systm.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/debug.h>
  33 #include <sys/time.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/vnode.h>
  36 #include <sys/stat.h>
  37 #include <sys/vfs.h>
  38 #include <sys/cred.h>
  39 #include <sys/kmem.h>
  40 #include <sys/atomic.h>
  41 #include <sys/policy.h>
  42 #include <sys/fs/tmp.h>
  43 #include <sys/fs/tmpnode.h>
  44 #include <sys/ddi.h>
  45 #include <sys/sunddi.h>
  46 
  47 #define KILOBYTE        1024
  48 #define MEGABYTE        (1024 * KILOBYTE)
  49 #define GIGABYTE        (1024 * MEGABYTE)
  50 
  51 #define MODESHIFT       3
  52 
  53 #define VALIDMODEBITS   07777
  54 
  55 extern pgcnt_t swapfs_minfree;
  56 
  57 int
  58 tmp_taccess(void *vtp, int mode, struct cred *cred)
  59 {
  60         struct tmpnode *tp = vtp;
  61         int shift = 0;
  62         /*
  63          * Check access based on owner, group and
  64          * public permissions in tmpnode.
  65          */
  66         if (crgetuid(cred) != tp->tn_uid) {
  67                 shift += MODESHIFT;
  68                 if (groupmember(tp->tn_gid, cred) == 0)
  69                         shift += MODESHIFT;
  70         }
  71 
  72         return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
  73             tp->tn_mode << shift, mode));
  74 }
  75 
  76 /*
  77  * Decide whether it is okay to remove within a sticky directory.
  78  * Two conditions need to be met:  write access to the directory
  79  * is needed.  In sticky directories, write access is not sufficient;
  80  * you can remove entries from a directory only if you own the directory,
  81  * if you are privileged, if you own the entry or if they entry is
  82  * a plain file and you have write access to that file.
  83  * Function returns 0 if remove access is granted.
  84  */

  85 int
  86 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
  87         struct cred *cr)
  88 {
  89         uid_t uid = crgetuid(cr);
  90 
  91         if ((dir->tn_mode & S_ISVTX) &&
  92             uid != dir->tn_uid &&
  93             uid != entry->tn_uid &&
  94             (entry->tn_type != VREG ||
  95             tmp_taccess(entry, VWRITE, cr) != 0))
  96                 return (secpolicy_vnode_remove(cr));
  97 
  98         return (0);
  99 }
 100 
 101 /*
 102  * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
 103  * or the 'musthave' flag is set.  'musthave' allocations should
 104  * always be subordinate to normal allocations so that tmpfs_maxkmem


 121         if (last_warning != now) {
 122                 last_warning = now;
 123                 cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
 124         }
 125         return (NULL);
 126 }
 127 
 128 void
 129 tmp_memfree(void *cp, size_t size)
 130 {
 131         kmem_free(cp, size);
 132         atomic_add_long(&tmp_kmemspace, -size);
 133 }
 134 
 135 /*
 136  * Convert a string containing a number (number of bytes) to a pgcnt_t,
 137  * containing the corresponding number of pages. On 32-bit kernels, the
 138  * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
 139  * returned in 'maxpg' is at most ULONG_MAX.
 140  *
 141  * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
 142  * "m" or "M" for megabytes; "g" or "G" for gigabytes.  This interface allows
 143  * for an arguably esoteric interpretation of multiple suffix characters:
 144  * namely, they cascade.  For example, the caller may specify "2mk", which is
 145  * interpreted as 2 gigabytes.  It would seem, at this late stage, that the
 146  * horse has left not only the barn but indeed the country, and possibly the
 147  * entire planetary system. Alternatively, the number may be followed by a
 148  * single '%' sign, indicating the size is a percentage of either the zone's
 149  * swap limit or the system's overall swap size.
 150  *
 151  * Parse and overflow errors are detected and a non-zero number returned on
 152  * error.
 153  */

 154 int
 155 tmp_convnum(char *str, pgcnt_t *maxpg)
 156 {
 157         u_longlong_t num = 0;
 158 #ifdef _LP64
 159         u_longlong_t max_bytes = ULONG_MAX;
 160 #else
 161         u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
 162 #endif
 163         char *c;
 164         const struct convchar {
 165                 char *cc_char;
 166                 uint64_t cc_factor;
 167         } convchars[] = {
 168                 { "kK", KILOBYTE },
 169                 { "mM", MEGABYTE },
 170                 { "gG", GIGABYTE },
 171                 { NULL, 0 }
 172         };
 173 
 174         if (str == NULL) {
 175                 return (EINVAL);
 176         }
 177         c = str;
 178 
 179         /*
 180          * Convert the initial numeric portion of the input string.
 181          */
 182         if (ddi_strtoull(str, &c, 10, &num) != 0) {



 183                 return (EINVAL);
 184         }
 185 
 186         /*
 187          * Handle a size in percent. Anything other than a single percent
 188          * modifier is invalid. We use either the zone's swap limit or the
 189          * system's total available swap size as the initial value. Perform the
 190          * intermediate calculation in pages to avoid overflow.
 191          */
 192         if (*c == '%') {
 193                 u_longlong_t cap;
 194 
 195                 if (*(c + 1) != '\0')
 196                         return (EINVAL);
 197 
 198                 if (num > 100)
 199                         return (EINVAL);
 200 
 201                 cap = (u_longlong_t)curproc->p_zone->zone_max_swap_ctl;
 202                 if (cap == UINT64_MAX) {
 203                         /*
 204                          * Use the amount of available physical and memory swap
 205                          */
 206                         mutex_enter(&anoninfo_lock);
 207                         cap = TOTAL_AVAILABLE_SWAP;
 208                         mutex_exit(&anoninfo_lock);
 209                 } else {
 210                         cap = btop(cap);
 211                 }
 212 
 213                 num = ptob(cap * num / 100);
 214                 goto done;
 215         }
 216 
 217         /*
 218          * Apply the (potentially cascading) magnitude suffixes until an
 219          * invalid character is found, or the string comes to an end.
 220          */
 221         for (; *c != '\0'; c++) {
 222                 int i;




 223 
 224                 for (i = 0; convchars[i].cc_char != NULL; i++) {
 225                         /*
 226                          * Check if this character matches this multiplier
 227                          * class:
 228                          */
 229                         if (strchr(convchars[i].cc_char, *c) != NULL) {
 230                                 /*
 231                                  * Check for overflow:
 232                                  */
 233                                 if (num > max_bytes / convchars[i].cc_factor) {
 234                                         return (EINVAL);
 235                                 }
 236 
 237                                 num *= convchars[i].cc_factor;
 238                                 goto valid_char;
 239                         }
 240                 }
 241 
 242                 /*
 243                  * This was not a valid multiplier suffix character.
 244                  */
 245                 return (EINVAL);
 246 
 247 valid_char:
 248                 continue;
 249         }
 250 
 251 done:
 252         /*
 253          * Since btopr() rounds up to page granularity, this round-up can
 254          * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
 255          * and (max_bytes). In this case the resulting number is zero, which
 256          * is what we check for below.
 257          */
 258         if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
 259                 return (EINVAL);
 260         return (0);
 261 }
 262 
 263 /*
 264  * Parse an octal mode string for use as the permissions set for the root
 265  * of the tmpfs mount.
 266  */
 267 int
 268 tmp_convmode(char *str, mode_t *mode)
 269 {
 270         ulong_t num;
 271         char *c;
 272 
 273         if (str == NULL) {
 274                 return (EINVAL);
 275         }
 276 
 277         if (ddi_strtoul(str, &c, 8, &num) != 0) {
 278                 return (EINVAL);
 279         }
 280 
 281         if ((num & ~VALIDMODEBITS) != 0) {
 282                 return (EINVAL);
 283         }
 284 
 285         *mode = VALIDMODEBITS & num;
 286         return (0);
 287 }