Print this page


Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/tmpfs/tmp_subr.c
          +++ new/usr/src/uts/common/fs/tmpfs/tmp_subr.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright 2016 Joyent, Inc.
       23 + * Copyright 2015 Joyent, Inc.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/errno.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/t_lock.h>
  30   30  #include <sys/systm.h>
  31   31  #include <sys/sysmacros.h>
  32   32  #include <sys/debug.h>
  33   33  #include <sys/time.h>
↓ open down ↓ 2 lines elided ↑ open up ↑
  36   36  #include <sys/stat.h>
  37   37  #include <sys/vfs.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/kmem.h>
  40   40  #include <sys/atomic.h>
  41   41  #include <sys/policy.h>
  42   42  #include <sys/fs/tmp.h>
  43   43  #include <sys/fs/tmpnode.h>
  44   44  #include <sys/ddi.h>
  45   45  #include <sys/sunddi.h>
  46      -#include <vm/anon.h>
  47   46  
  48   47  #define KILOBYTE        1024
  49   48  #define MEGABYTE        (1024 * KILOBYTE)
  50   49  #define GIGABYTE        (1024 * MEGABYTE)
  51   50  
  52   51  #define MODESHIFT       3
  53   52  
  54   53  #define VALIDMODEBITS   07777
  55   54  
  56   55  extern pgcnt_t swapfs_minfree;
  57   56  
  58      -void *
  59      -tmp_kmem_zalloc(struct tmount *tm, size_t size, int flag)
  60      -{
  61      -        void *buf;
  62      -        zone_t *zone;
  63      -        size_t pages;
  64      -
  65      -        mutex_enter(&tm->tm_contents);
  66      -        zone = tm->tm_vfsp->vfs_zone;
  67      -        if (tm->tm_anonmem + size > tm->tm_anonmax ||
  68      -            tm->tm_anonmem + size < tm->tm_anonmem ||
  69      -            size + ptob(tmpfs_minfree) <= size ||
  70      -            !anon_checkspace(size + ptob(tmpfs_minfree), zone)) {
  71      -                mutex_exit(&tm->tm_contents);
  72      -                return (NULL);
  73      -        }
  74      -
  75      -        /*
  76      -         * Only make anonymous memory reservations when a page boundary is
  77      -         * crossed.  This is necessary since the anon_resv functions rounds up
  78      -         * to PAGESIZE internally.
  79      -         */
  80      -        pages = btopr(tm->tm_allocmem + size);
  81      -        pages -= btopr(tm->tm_allocmem);
  82      -        if (pages > 0 && anon_try_resv_zone(ptob(pages), zone) == 0) {
  83      -                mutex_exit(&tm->tm_contents);
  84      -                return (NULL);
  85      -        }
  86      -
  87      -        tm->tm_allocmem += size;
  88      -        tm->tm_anonmem += size;
  89      -        mutex_exit(&tm->tm_contents);
  90      -
  91      -        buf = kmem_zalloc(size, flag);
  92      -        if (buf == NULL) {
  93      -                mutex_enter(&tm->tm_contents);
  94      -                ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
  95      -                tm->tm_anonmem -= size;
  96      -                if (pages > 0) {
  97      -                        /*
  98      -                         * Re-chasing the zone pointer is necessary since a
  99      -                         * forced umount could have been performed while the
 100      -                         * tm_contents lock was dropped during allocation.
 101      -                         */
 102      -                        anon_unresv_zone(ptob(pages), tm->tm_vfsp->vfs_zone);
 103      -                }
 104      -                mutex_exit(&tm->tm_contents);
 105      -        }
 106      -
 107      -        return (buf);
 108      -}
 109      -
 110      -void
 111      -tmp_kmem_free(struct tmount *tm, void *buf, size_t size)
 112      -{
 113      -        size_t pages;
 114      -
 115      -        kmem_free(buf, size);
 116      -        mutex_enter(&tm->tm_contents);
 117      -        ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
 118      -        tm->tm_anonmem -= size;
 119      -        pages = btopr(tm->tm_allocmem);
 120      -        tm->tm_allocmem -= size;
 121      -        pages -= btopr(tm->tm_allocmem);
 122      -        /*
 123      -         * Like the tmp_kmem_zalloc case, only unreserve anonymous memory when
 124      -         * a page boundary has been crossed.
 125      -         */
 126      -        if (pages > 0) {
 127      -                anon_unresv_zone(size, tm->tm_vfsp->vfs_zone);
 128      -        }
 129      -        mutex_exit(&tm->tm_contents);
 130      -}
 131      -
 132   57  int
 133   58  tmp_taccess(void *vtp, int mode, struct cred *cred)
 134   59  {
 135   60          struct tmpnode *tp = vtp;
 136   61          int shift = 0;
 137   62          /*
 138   63           * Check access based on owner, group and
 139   64           * public permissions in tmpnode.
 140   65           */
 141   66          if (crgetuid(cred) != tp->tn_uid) {
↓ open down ↓ 10 lines elided ↑ open up ↑
 152   77   * Decide whether it is okay to remove within a sticky directory.
 153   78   * Two conditions need to be met:  write access to the directory
 154   79   * is needed.  In sticky directories, write access is not sufficient;
 155   80   * you can remove entries from a directory only if you own the directory,
 156   81   * if you are privileged, if you own the entry or if they entry is
 157   82   * a plain file and you have write access to that file.
 158   83   * Function returns 0 if remove access is granted.
 159   84   */
 160   85  int
 161   86  tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
 162      -    struct cred *cr)
       87 +        struct cred *cr)
 163   88  {
 164   89          uid_t uid = crgetuid(cr);
 165   90  
 166   91          if ((dir->tn_mode & S_ISVTX) &&
 167   92              uid != dir->tn_uid &&
 168   93              uid != entry->tn_uid &&
 169   94              (entry->tn_type != VREG ||
 170   95              tmp_taccess(entry, VWRITE, cr) != 0))
 171   96                  return (secpolicy_vnode_remove(cr));
 172   97  
 173   98          return (0);
 174   99  }
 175  100  
 176  101  /*
 177      - * Convert a string containing a number (number of bytes) to a size_t,
 178      - * containing the corresponding number of bytes. On 32-bit kernels, the
      102 + * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
      103 + * or the 'musthave' flag is set.  'musthave' allocations should
      104 + * always be subordinate to normal allocations so that tmpfs_maxkmem
      105 + * can't be exceeded by more than a few KB.  Example: when creating
      106 + * a new directory, the tmpnode is a normal allocation; if that
      107 + * succeeds, the dirents for "." and ".." are 'musthave' allocations.
      108 + */
      109 +void *
      110 +tmp_memalloc(size_t size, int musthave)
      111 +{
      112 +        static time_t last_warning;
      113 +        time_t now;
      114 +
      115 +        if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
      116 +            musthave)
      117 +                return (kmem_zalloc(size, KM_SLEEP));
      118 +
      119 +        atomic_add_long(&tmp_kmemspace, -size);
      120 +        now = gethrestime_sec();
      121 +        if (last_warning != now) {
      122 +                last_warning = now;
      123 +                cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
      124 +        }
      125 +        return (NULL);
      126 +}
      127 +
      128 +void
      129 +tmp_memfree(void *cp, size_t size)
      130 +{
      131 +        kmem_free(cp, size);
      132 +        atomic_add_long(&tmp_kmemspace, -size);
      133 +}
      134 +
      135 +/*
      136 + * Convert a string containing a number (number of bytes) to a pgcnt_t,
      137 + * containing the corresponding number of pages. On 32-bit kernels, the
 179  138   * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
 180  139   * returned in 'maxpg' is at most ULONG_MAX.
 181  140   *
 182  141   * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
 183  142   * "m" or "M" for megabytes; "g" or "G" for gigabytes.  This interface allows
 184  143   * for an arguably esoteric interpretation of multiple suffix characters:
 185  144   * namely, they cascade.  For example, the caller may specify "2mk", which is
 186  145   * interpreted as 2 gigabytes.  It would seem, at this late stage, that the
 187  146   * horse has left not only the barn but indeed the country, and possibly the
 188  147   * entire planetary system. Alternatively, the number may be followed by a
 189  148   * single '%' sign, indicating the size is a percentage of either the zone's
 190  149   * swap limit or the system's overall swap size.
 191  150   *
 192  151   * Parse and overflow errors are detected and a non-zero number returned on
 193  152   * error.
 194  153   */
 195  154  int
 196      -tmp_convnum(char *str, size_t *maxbytes)
      155 +tmp_convnum(char *str, pgcnt_t *maxpg)
 197  156  {
 198  157          u_longlong_t num = 0;
 199      -        u_longlong_t max_bytes = (uint64_t)SIZE_MAX;
 200      -        size_t pages;
 201      -
      158 +#ifdef _LP64
      159 +        u_longlong_t max_bytes = ULONG_MAX;
      160 +#else
      161 +        u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
      162 +#endif
 202  163          char *c;
 203  164          const struct convchar {
 204  165                  char *cc_char;
 205  166                  uint64_t cc_factor;
 206  167          } convchars[] = {
 207  168                  { "kK", KILOBYTE },
 208  169                  { "mM", MEGABYTE },
 209  170                  { "gG", GIGABYTE },
 210  171                  { NULL, 0 }
 211  172          };
↓ open down ↓ 69 lines elided ↑ open up ↑
 281  242                  /*
 282  243                   * This was not a valid multiplier suffix character.
 283  244                   */
 284  245                  return (EINVAL);
 285  246  
 286  247  valid_char:
 287  248                  continue;
 288  249          }
 289  250  
 290  251  done:
 291      -
 292  252          /*
 293      -         * We've been given a size in bytes; however, we want to make sure that
 294      -         * we have at least one page worth no matter what. Therefore we use
 295      -         * btopr to round up. However, this may cause an overflow only if 'num'
 296      -         * is between (max_bytes - PAGESIZE) and (max_bytes). In this case the
 297      -         * resulting number is zero, which is what we check for below. Note, we
 298      -         * require at least one page, so if pages is zero, well, it wasn't going
 299      -         * to work anyways.
      253 +         * Since btopr() rounds up to page granularity, this round-up can
      254 +         * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
      255 +         * and (max_bytes). In this case the resulting number is zero, which
      256 +         * is what we check for below.
 300  257           */
 301      -        pages = btopr(num);
 302      -        if (pages == 0) {
      258 +        if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
 303  259                  return (EINVAL);
 304      -        }
 305      -
 306      -        *maxbytes = ptob(pages);
 307      -
 308  260          return (0);
 309  261  }
 310  262  
 311  263  /*
 312  264   * Parse an octal mode string for use as the permissions set for the root
 313  265   * of the tmpfs mount.
 314  266   */
 315  267  int
 316  268  tmp_convmode(char *str, mode_t *mode)
 317  269  {
↓ open down ↓ 18 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX