Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/tmpfs/tmp_subr.c
+++ new/usr/src/uts/common/fs/tmpfs/tmp_subr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2016 Joyent, Inc.
23 + * Copyright 2015 Joyent, Inc.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/errno.h>
28 28 #include <sys/param.h>
29 29 #include <sys/t_lock.h>
30 30 #include <sys/systm.h>
31 31 #include <sys/sysmacros.h>
32 32 #include <sys/debug.h>
33 33 #include <sys/time.h>
34 34 #include <sys/cmn_err.h>
35 35 #include <sys/vnode.h>
|
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
36 36 #include <sys/stat.h>
37 37 #include <sys/vfs.h>
38 38 #include <sys/cred.h>
39 39 #include <sys/kmem.h>
40 40 #include <sys/atomic.h>
41 41 #include <sys/policy.h>
42 42 #include <sys/fs/tmp.h>
43 43 #include <sys/fs/tmpnode.h>
44 44 #include <sys/ddi.h>
45 45 #include <sys/sunddi.h>
46 -#include <vm/anon.h>
47 46
48 47 #define KILOBYTE 1024
49 48 #define MEGABYTE (1024 * KILOBYTE)
50 49 #define GIGABYTE (1024 * MEGABYTE)
51 50
52 51 #define MODESHIFT 3
53 52
54 53 #define VALIDMODEBITS 07777
55 54
56 55 extern pgcnt_t swapfs_minfree;
57 56
58 -void *
59 -tmp_kmem_zalloc(struct tmount *tm, size_t size, int flag)
60 -{
61 - void *buf;
62 - zone_t *zone;
63 - size_t pages;
64 -
65 - mutex_enter(&tm->tm_contents);
66 - zone = tm->tm_vfsp->vfs_zone;
67 - if (tm->tm_anonmem + size > tm->tm_anonmax ||
68 - tm->tm_anonmem + size < tm->tm_anonmem ||
69 - size + ptob(tmpfs_minfree) <= size ||
70 - !anon_checkspace(size + ptob(tmpfs_minfree), zone)) {
71 - mutex_exit(&tm->tm_contents);
72 - return (NULL);
73 - }
74 -
75 - /*
76 - * Only make anonymous memory reservations when a page boundary is
77 - * crossed. This is necessary since the anon_resv functions rounds up
78 - * to PAGESIZE internally.
79 - */
80 - pages = btopr(tm->tm_allocmem + size);
81 - pages -= btopr(tm->tm_allocmem);
82 - if (pages > 0 && anon_try_resv_zone(ptob(pages), zone) == 0) {
83 - mutex_exit(&tm->tm_contents);
84 - return (NULL);
85 - }
86 -
87 - tm->tm_allocmem += size;
88 - tm->tm_anonmem += size;
89 - mutex_exit(&tm->tm_contents);
90 -
91 - buf = kmem_zalloc(size, flag);
92 - if (buf == NULL) {
93 - mutex_enter(&tm->tm_contents);
94 - ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
95 - tm->tm_anonmem -= size;
96 - if (pages > 0) {
97 - /*
98 - * Re-chasing the zone pointer is necessary since a
99 - * forced umount could have been performed while the
100 - * tm_contents lock was dropped during allocation.
101 - */
102 - anon_unresv_zone(ptob(pages), tm->tm_vfsp->vfs_zone);
103 - }
104 - mutex_exit(&tm->tm_contents);
105 - }
106 -
107 - return (buf);
108 -}
109 -
110 -void
111 -tmp_kmem_free(struct tmount *tm, void *buf, size_t size)
112 -{
113 - size_t pages;
114 -
115 - kmem_free(buf, size);
116 - mutex_enter(&tm->tm_contents);
117 - ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
118 - tm->tm_anonmem -= size;
119 - pages = btopr(tm->tm_allocmem);
120 - tm->tm_allocmem -= size;
121 - pages -= btopr(tm->tm_allocmem);
122 - /*
123 - * Like the tmp_kmem_zalloc case, only unreserve anonymous memory when
124 - * a page boundary has been crossed.
125 - */
126 - if (pages > 0) {
127 - anon_unresv_zone(size, tm->tm_vfsp->vfs_zone);
128 - }
129 - mutex_exit(&tm->tm_contents);
130 -}
131 -
132 57 int
133 58 tmp_taccess(void *vtp, int mode, struct cred *cred)
134 59 {
135 60 struct tmpnode *tp = vtp;
136 61 int shift = 0;
137 62 /*
138 63 * Check access based on owner, group and
139 64 * public permissions in tmpnode.
140 65 */
141 66 if (crgetuid(cred) != tp->tn_uid) {
142 67 shift += MODESHIFT;
143 68 if (groupmember(tp->tn_gid, cred) == 0)
144 69 shift += MODESHIFT;
145 70 }
146 71
147 72 return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
148 73 tp->tn_mode << shift, mode));
149 74 }
150 75
151 76 /*
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
152 77 * Decide whether it is okay to remove within a sticky directory.
153 78 * Two conditions need to be met: write access to the directory
154 79 * is needed. In sticky directories, write access is not sufficient;
155 80 * you can remove entries from a directory only if you own the directory,
156 81 * if you are privileged, if you own the entry or if they entry is
157 82 * a plain file and you have write access to that file.
158 83 * Function returns 0 if remove access is granted.
159 84 */
160 85 int
161 86 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
162 - struct cred *cr)
87 + struct cred *cr)
163 88 {
164 89 uid_t uid = crgetuid(cr);
165 90
166 91 if ((dir->tn_mode & S_ISVTX) &&
167 92 uid != dir->tn_uid &&
168 93 uid != entry->tn_uid &&
169 94 (entry->tn_type != VREG ||
170 95 tmp_taccess(entry, VWRITE, cr) != 0))
171 96 return (secpolicy_vnode_remove(cr));
172 97
173 98 return (0);
174 99 }
175 100
176 101 /*
177 - * Convert a string containing a number (number of bytes) to a size_t,
178 - * containing the corresponding number of bytes. On 32-bit kernels, the
102 + * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
103 + * or the 'musthave' flag is set. 'musthave' allocations should
104 + * always be subordinate to normal allocations so that tmpfs_maxkmem
105 + * can't be exceeded by more than a few KB. Example: when creating
106 + * a new directory, the tmpnode is a normal allocation; if that
107 + * succeeds, the dirents for "." and ".." are 'musthave' allocations.
108 + */
109 +void *
110 +tmp_memalloc(size_t size, int musthave)
111 +{
112 + static time_t last_warning;
113 + time_t now;
114 +
115 + if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
116 + musthave)
117 + return (kmem_zalloc(size, KM_SLEEP));
118 +
119 + atomic_add_long(&tmp_kmemspace, -size);
120 + now = gethrestime_sec();
121 + if (last_warning != now) {
122 + last_warning = now;
123 + cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
124 + }
125 + return (NULL);
126 +}
127 +
128 +void
129 +tmp_memfree(void *cp, size_t size)
130 +{
131 + kmem_free(cp, size);
132 + atomic_add_long(&tmp_kmemspace, -size);
133 +}
134 +
135 +/*
136 + * Convert a string containing a number (number of bytes) to a pgcnt_t,
137 + * containing the corresponding number of pages. On 32-bit kernels, the
179 138 * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
180 139 * returned in 'maxpg' is at most ULONG_MAX.
181 140 *
182 141 * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
183 142 * "m" or "M" for megabytes; "g" or "G" for gigabytes. This interface allows
184 143 * for an arguably esoteric interpretation of multiple suffix characters:
185 144 * namely, they cascade. For example, the caller may specify "2mk", which is
186 145 * interpreted as 2 gigabytes. It would seem, at this late stage, that the
187 146 * horse has left not only the barn but indeed the country, and possibly the
188 147 * entire planetary system. Alternatively, the number may be followed by a
189 148 * single '%' sign, indicating the size is a percentage of either the zone's
190 149 * swap limit or the system's overall swap size.
191 150 *
192 151 * Parse and overflow errors are detected and a non-zero number returned on
193 152 * error.
194 153 */
195 154 int
196 -tmp_convnum(char *str, size_t *maxbytes)
155 +tmp_convnum(char *str, pgcnt_t *maxpg)
197 156 {
198 157 u_longlong_t num = 0;
199 - u_longlong_t max_bytes = (uint64_t)SIZE_MAX;
200 - size_t pages;
201 -
158 +#ifdef _LP64
159 + u_longlong_t max_bytes = ULONG_MAX;
160 +#else
161 + u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
162 +#endif
202 163 char *c;
203 164 const struct convchar {
204 165 char *cc_char;
205 166 uint64_t cc_factor;
206 167 } convchars[] = {
207 168 { "kK", KILOBYTE },
208 169 { "mM", MEGABYTE },
209 170 { "gG", GIGABYTE },
210 171 { NULL, 0 }
211 172 };
212 173
213 174 if (str == NULL) {
214 175 return (EINVAL);
215 176 }
216 177 c = str;
217 178
218 179 /*
219 180 * Convert the initial numeric portion of the input string.
220 181 */
221 182 if (ddi_strtoull(str, &c, 10, &num) != 0) {
222 183 return (EINVAL);
223 184 }
224 185
225 186 /*
226 187 * Handle a size in percent. Anything other than a single percent
227 188 * modifier is invalid. We use either the zone's swap limit or the
228 189 * system's total available swap size as the initial value. Perform the
229 190 * intermediate calculation in pages to avoid overflow.
230 191 */
231 192 if (*c == '%') {
232 193 u_longlong_t cap;
233 194
234 195 if (*(c + 1) != '\0')
235 196 return (EINVAL);
236 197
237 198 if (num > 100)
238 199 return (EINVAL);
239 200
240 201 cap = (u_longlong_t)curproc->p_zone->zone_max_swap_ctl;
241 202 if (cap == UINT64_MAX) {
242 203 /*
243 204 * Use the amount of available physical and memory swap
244 205 */
245 206 mutex_enter(&anoninfo_lock);
246 207 cap = TOTAL_AVAILABLE_SWAP;
247 208 mutex_exit(&anoninfo_lock);
248 209 } else {
249 210 cap = btop(cap);
250 211 }
251 212
252 213 num = ptob(cap * num / 100);
253 214 goto done;
254 215 }
255 216
256 217 /*
257 218 * Apply the (potentially cascading) magnitude suffixes until an
258 219 * invalid character is found, or the string comes to an end.
259 220 */
260 221 for (; *c != '\0'; c++) {
261 222 int i;
262 223
263 224 for (i = 0; convchars[i].cc_char != NULL; i++) {
264 225 /*
265 226 * Check if this character matches this multiplier
266 227 * class:
267 228 */
268 229 if (strchr(convchars[i].cc_char, *c) != NULL) {
269 230 /*
270 231 * Check for overflow:
271 232 */
272 233 if (num > max_bytes / convchars[i].cc_factor) {
273 234 return (EINVAL);
274 235 }
275 236
276 237 num *= convchars[i].cc_factor;
277 238 goto valid_char;
278 239 }
279 240 }
280 241
|
↓ open down ↓ |
69 lines elided |
↑ open up ↑ |
281 242 /*
282 243 * This was not a valid multiplier suffix character.
283 244 */
284 245 return (EINVAL);
285 246
286 247 valid_char:
287 248 continue;
288 249 }
289 250
290 251 done:
291 -
292 252 /*
293 - * We've been given a size in bytes; however, we want to make sure that
294 - * we have at least one page worth no matter what. Therefore we use
295 - * btopr to round up. However, this may cause an overflow only if 'num'
296 - * is between (max_bytes - PAGESIZE) and (max_bytes). In this case the
297 - * resulting number is zero, which is what we check for below. Note, we
298 - * require at least one page, so if pages is zero, well, it wasn't going
299 - * to work anyways.
253 + * Since btopr() rounds up to page granularity, this round-up can
254 + * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
255 + * and (max_bytes). In this case the resulting number is zero, which
256 + * is what we check for below.
300 257 */
301 - pages = btopr(num);
302 - if (pages == 0) {
258 + if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
303 259 return (EINVAL);
304 - }
305 -
306 - *maxbytes = ptob(pages);
307 -
308 260 return (0);
309 261 }
310 262
311 263 /*
312 264 * Parse an octal mode string for use as the permissions set for the root
313 265 * of the tmpfs mount.
314 266 */
315 267 int
316 268 tmp_convmode(char *str, mode_t *mode)
317 269 {
318 270 ulong_t num;
319 271 char *c;
320 272
321 273 if (str == NULL) {
322 274 return (EINVAL);
323 275 }
324 276
325 277 if (ddi_strtoul(str, &c, 8, &num) != 0) {
326 278 return (EINVAL);
327 279 }
328 280
329 281 if ((num & ~VALIDMODEBITS) != 0) {
330 282 return (EINVAL);
331 283 }
332 284
333 285 *mode = VALIDMODEBITS & num;
334 286 return (0);
335 287 }
|
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX