Print this page
OS-4191 blown assert on lx zone with delegated dataset
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/dev/sdev_zvolops.c
+++ new/usr/src/uts/common/fs/dev/sdev_zvolops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2013, 2016 Joyent, Inc. All rights reserved.
25 25 * Copyright (c) 2014 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /* vnode ops for the /dev/zvol directory */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/sysmacros.h>
33 33 #include <sys/ddi.h>
34 34 #include <sys/sunndi.h>
35 35 #include <sys/sunldi.h>
36 36 #include <fs/fs_subr.h>
37 37 #include <sys/fs/dv_node.h>
38 38 #include <sys/fs/sdev_impl.h>
39 39 #include <sys/zfs_ioctl.h>
40 40 #include <sys/policy.h>
41 41 #include <sys/stat.h>
42 42 #include <sys/vfs_opreg.h>
43 43
44 44 struct vnodeops *devzvol_vnodeops;
45 45 static major_t devzvol_major;
46 46 static taskq_ent_t devzvol_zclist_task;
47 47
48 48 static kmutex_t devzvol_mtx;
49 49 /* Below are protected by devzvol_mtx */
50 50 static boolean_t devzvol_isopen;
51 51 static boolean_t devzvol_zclist_task_running = B_FALSE;
52 52 static uint64_t devzvol_gen = 0;
53 53 static uint64_t devzvol_zclist;
54 54 static size_t devzvol_zclist_size;
55 55 static ldi_ident_t devzvol_li;
56 56 static ldi_handle_t devzvol_lh;
57 57
58 58 /*
59 59 * we need to use ddi_mod* since fs/dev gets loaded early on in
60 60 * startup(), and linking fs/dev to fs/zfs would drag in a lot of
61 61 * other stuff (like drv/random) before the rest of the system is
62 62 * ready to go
63 63 */
64 64 ddi_modhandle_t zfs_mod;
65 65 int (*szcm)(char *);
66 66 int (*szn2m)(char *, minor_t *);
67 67
68 68
69 69 /*
70 70 * Enable/disable snapshots from being created in /dev/zvol. By default,
71 71 * they are enabled, preserving the historic behavior.
72 72 */
73 73 boolean_t devzvol_snaps_allowed = B_TRUE;
74 74
75 75 int
76 76 sdev_zvol_create_minor(char *dsname)
77 77 {
78 78 if (szcm == NULL)
79 79 return (-1);
80 80 return ((*szcm)(dsname));
81 81 }
82 82
83 83 int
84 84 sdev_zvol_name2minor(char *dsname, minor_t *minor)
85 85 {
86 86 if (szn2m == NULL)
87 87 return (-1);
88 88 return ((*szn2m)(dsname, minor));
89 89 }
90 90
91 91 int
92 92 devzvol_open_zfs()
93 93 {
94 94 int rc;
95 95 dev_t dv;
96 96
97 97 devzvol_li = ldi_ident_from_anon();
98 98 if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
99 99 &devzvol_lh, devzvol_li))
100 100 return (-1);
101 101 if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs",
102 102 KRTLD_MODE_FIRST, &rc)) == NULL)) {
103 103 return (rc);
104 104 }
105 105 ASSERT(szcm == NULL && szn2m == NULL);
106 106 if ((szcm = (int (*)(char *))
107 107 ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) {
108 108 cmn_err(CE_WARN, "couldn't resolve zvol_create_minor");
109 109 return (rc);
110 110 }
111 111 if ((szn2m = (int(*)(char *, minor_t *))
112 112 ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
113 113 cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
114 114 return (rc);
115 115 }
116 116 if (ldi_get_dev(devzvol_lh, &dv))
117 117 return (-1);
118 118 devzvol_major = getmajor(dv);
119 119 return (0);
120 120 }
121 121
122 122 void
123 123 devzvol_close_zfs()
124 124 {
125 125 szcm = NULL;
126 126 szn2m = NULL;
127 127 (void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred);
128 128 ldi_ident_release(devzvol_li);
129 129 if (zfs_mod != NULL) {
130 130 (void) ddi_modclose(zfs_mod);
131 131 zfs_mod = NULL;
132 132 }
133 133 }
134 134
135 135 int
136 136 devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size)
137 137 {
138 138 uint64_t cookie;
139 139 int size = 8000;
140 140 int unused;
141 141 int rc;
142 142
143 143 if (cmd != ZFS_IOC_POOL_CONFIGS)
144 144 mutex_enter(&devzvol_mtx);
145 145 if (!devzvol_isopen) {
146 146 if ((rc = devzvol_open_zfs()) == 0) {
147 147 devzvol_isopen = B_TRUE;
148 148 } else {
149 149 if (cmd != ZFS_IOC_POOL_CONFIGS)
150 150 mutex_exit(&devzvol_mtx);
151 151 return (ENXIO);
152 152 }
153 153 }
154 154 cookie = zc->zc_cookie;
155 155 again:
156 156 zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
157 157 KM_SLEEP);
158 158 zc->zc_nvlist_dst_size = size;
159 159 rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred,
160 160 &unused);
161 161 if (rc == ENOMEM) {
162 162 int newsize;
163 163 newsize = zc->zc_nvlist_dst_size;
164 164 ASSERT(newsize > size);
165 165 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
166 166 size = newsize;
167 167 zc->zc_cookie = cookie;
168 168 goto again;
169 169 }
170 170 if (alloc_size == NULL)
171 171 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
172 172 else
173 173 *alloc_size = size;
174 174 if (cmd != ZFS_IOC_POOL_CONFIGS)
175 175 mutex_exit(&devzvol_mtx);
176 176 return (rc);
177 177 }
178 178
179 179 /* figures out if the objset exists and returns its type */
180 180 int
181 181 devzvol_objset_check(char *dsname, dmu_objset_type_t *type)
182 182 {
183 183 boolean_t ispool, is_snapshot;
184 184 zfs_cmd_t *zc;
185 185 int rc;
186 186 nvlist_t *nvl;
187 187 size_t nvsz;
188 188
189 189 ispool = (strchr(dsname, '/') == NULL);
190 190 is_snapshot = (strchr(dsname, '@') != NULL);
191 191
192 192 if (is_snapshot && !devzvol_snaps_allowed)
193 193 return (ENOTSUP);
194 194
195 195 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
196 196 (void) strlcpy(zc->zc_name, dsname, MAXPATHLEN);
197 197
198 198 nvl = fnvlist_alloc();
199 199 fnvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE);
200 200 zc->zc_nvlist_src = (uintptr_t)fnvlist_pack(nvl, &nvsz);
201 201 zc->zc_nvlist_src_size = nvsz;
202 202 fnvlist_free(nvl);
203 203
204 204 rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS :
205 205 ZFS_IOC_OBJSET_STATS, zc, NULL);
206 206 if (type && rc == 0)
207 207 *type = (ispool) ? DMU_OST_ZFS :
208 208 zc->zc_objset_stats.dds_type;
209 209 fnvlist_pack_free((char *)(uintptr_t)zc->zc_nvlist_src, nvsz);
210 210 kmem_free(zc, sizeof (zfs_cmd_t));
211 211 return (rc);
212 212 }
213 213
214 214 /*
215 215 * Returns what the zfs dataset name should be, given the /dev/zvol
216 216 * path and an optional name (can be NULL).
217 217 *
218 218 * Note that if the name param is NULL, then path must be an
219 219 * actual dataset's directory and not one of the top-level
220 220 * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a
221 221 * specific dataset.
222 222 */
223 223 char *
224 224 devzvol_make_dsname(const char *path, const char *name)
225 225 {
226 226 char *dsname;
227 227 const char *ptr;
228 228 int dslen;
229 229
230 230 if (strcmp(path, ZVOL_DIR) == 0)
231 231 return (NULL);
232 232 if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0))
233 233 return (NULL);
234 234 ptr = path + strlen(ZVOL_DIR);
235 235 if (strncmp(ptr, "/dsk", 4) == 0)
236 236 ptr += strlen("/dsk");
237 237 else if (strncmp(ptr, "/rdsk", 5) == 0)
238 238 ptr += strlen("/rdsk");
239 239 else
240 240 return (NULL);
241 241
242 242 if (*ptr == '/')
243 243 ptr++;
244 244 else if (name == NULL)
245 245 return (NULL);
246 246
247 247 dslen = strlen(ptr);
248 248 if (dslen)
249 249 dslen++; /* plus null */
250 250 if (name)
251 251 dslen += strlen(name) + 1; /* plus slash */
252 252 dsname = kmem_zalloc(dslen, KM_SLEEP);
253 253 if (*ptr) {
254 254 (void) strlcpy(dsname, ptr, dslen);
255 255 if (name)
256 256 (void) strlcat(dsname, "/", dslen);
257 257 }
258 258 if (name)
259 259 (void) strlcat(dsname, name, dslen);
260 260 return (dsname);
261 261 }
262 262
263 263 /*
264 264 * check if the zvol's sdev_node is still valid, which means make
265 265 * sure the zvol is still valid. zvol minors aren't proactively
266 266 * destroyed when the zvol is destroyed, so we use a validator to clean
267 267 * these up (in other words, when such nodes are encountered during
268 268 * subsequent lookup() and readdir() operations) so that only valid
269 269 * nodes are returned. The ordering between devname_lookup_func and
270 270 * devzvol_validate is a little inefficient in the case of invalid
271 271 * or stale nodes because devname_lookup_func calls
272 272 * devzvol_create_{dir, link}, then the validator says it's invalid,
273 273 * and then the node gets cleaned up.
274 274 */
275 275 int
276 276 devzvol_validate(struct sdev_node *dv)
277 277 {
278 278 vnode_t *vn = SDEVTOV(dv);
279 279 dmu_objset_type_t do_type;
280 280 char *dsname;
281 281 char *nm = dv->sdev_name;
282 282 int rc;
283 283
284 284 sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm));
285 285 /*
286 286 * validate only READY nodes; if someone is sitting on the
287 287 * directory of a dataset that just got destroyed we could
288 288 * get a zombie node which we just skip.
289 289 */
290 290 if (dv->sdev_state != SDEV_READY) {
291 291 sdcmn_err13(("skipping '%s'", nm));
292 292 return (SDEV_VTOR_SKIP);
293 293 }
294 294
295 295 if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) ||
296 296 (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0))
297 297 return (SDEV_VTOR_VALID);
298 298 dsname = devzvol_make_dsname(dv->sdev_path, NULL);
299 299 if (dsname == NULL)
300 300 return (SDEV_VTOR_INVALID);
301 301
302 302 /*
303 303 * Leave any nodes alone that have been explicitly created by
304 304 * sdev profiles.
305 305 */
306 306 if (!(dv->sdev_flags & SDEV_GLOBAL) && dv->sdev_origin != NULL) {
307 307 kmem_free(dsname, strlen(dsname) + 1);
308 308 return (SDEV_VTOR_VALID);
309 309 }
310 310
311 311 rc = devzvol_objset_check(dsname, &do_type);
312 312 sdcmn_err13((" '%s' rc %d", dsname, rc));
313 313 if (rc != 0) {
314 314 sdev_node_t *parent = dv->sdev_dotdot;
315 315 /*
316 316 * Explicitly passed-through zvols in our sdev profile can't
317 317 * be created as prof_* shadow nodes, because in the GZ they
318 318 * are symlinks, but in the NGZ they are actual device files.
319 319 *
320 320 * The objset_check will fail on these as they are outside
321 321 * any delegated dataset (zfs will not allow ioctl access to
322 322 * them from this zone). We still want them to work, though.
323 323 */
324 324 if (!(parent->sdev_flags & SDEV_GLOBAL) &&
325 325 parent->sdev_origin != NULL &&
326 326 !(dv->sdev_flags & SDEV_GLOBAL) &&
327 327 (vn->v_type == VBLK || vn->v_type == VCHR) &&
328 328 prof_name_matched(nm, parent)) {
329 329 do_type = DMU_OST_ZVOL;
330 330 } else {
331 331 kmem_free(dsname, strlen(dsname) + 1);
332 332 return (SDEV_VTOR_INVALID);
333 333 }
334 334 }
335 335
336 336 sdcmn_err13((" v_type %d do_type %d",
337 337 vn->v_type, do_type));
338 338 if ((vn->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
339 339 ((vn->v_type == VBLK || vn->v_type == VCHR) &&
340 340 do_type != DMU_OST_ZVOL) ||
341 341 (vn->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
342 342 kmem_free(dsname, strlen(dsname) + 1);
343 343 return (SDEV_VTOR_STALE);
344 344 }
345 345 if (vn->v_type == VLNK) {
346 346 char *ptr, *link;
347 347 long val = 0;
348 348 minor_t lminor, ominor;
349 349
350 350 rc = sdev_getlink(vn, &link);
351 351 ASSERT(rc == 0);
352 352
353 353 ptr = strrchr(link, ':') + 1;
354 354 rc = ddi_strtol(ptr, NULL, 10, &val);
355 355 kmem_free(link, strlen(link) + 1);
356 356 ASSERT(rc == 0 && val != 0);
357 357 lminor = (minor_t)val;
358 358 if (sdev_zvol_name2minor(dsname, &ominor) < 0 ||
359 359 ominor != lminor) {
360 360 kmem_free(dsname, strlen(dsname) + 1);
361 361 return (SDEV_VTOR_STALE);
362 362 }
363 363 }
364 364 kmem_free(dsname, strlen(dsname) + 1);
365 365 return (SDEV_VTOR_VALID);
366 366 }
367 367
368 368 /*
369 369 * Taskq callback to update the devzvol_zclist.
370 370 *
371 371 * We need to defer this to the taskq to avoid it running with a user
372 372 * context that might be associated with some non-global zone, and thus
373 373 * not being able to list all of the pools on the entire system.
374 374 */
375 375 /*ARGSUSED*/
376 376 static void
377 377 devzvol_update_zclist_cb(void *arg)
378 378 {
379 379 zfs_cmd_t *zc;
380 380 int rc;
381 381 size_t size;
382 382
383 383 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
384 384 mutex_enter(&devzvol_mtx);
385 385 zc->zc_cookie = devzvol_gen;
386 386
387 387 rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size);
388 388 switch (rc) {
389 389 case 0:
390 390 /* new generation */
391 391 ASSERT(devzvol_gen != zc->zc_cookie);
392 392 devzvol_gen = zc->zc_cookie;
393 393 if (devzvol_zclist)
394 394 kmem_free((void *)(uintptr_t)devzvol_zclist,
395 395 devzvol_zclist_size);
396 396 devzvol_zclist = zc->zc_nvlist_dst;
397 397 /* Keep the alloc'd size, not the nvlist size. */
398 398 devzvol_zclist_size = size;
399 399 break;
400 400 default:
401 401 /*
402 402 * Either there was no change in pool configuration
403 403 * since we last asked (rc == EEXIST) or we got a
404 404 * catastrophic error.
405 405 *
406 406 * Give up memory and exit.
407 407 */
408 408 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
409 409 size);
410 410 break;
411 411 }
412 412
413 413 VERIFY(devzvol_zclist_task_running == B_TRUE);
414 414 devzvol_zclist_task_running = B_FALSE;
415 415 mutex_exit(&devzvol_mtx);
416 416
417 417 kmem_free(zc, sizeof (zfs_cmd_t));
418 418 }
419 419
420 420 static void
421 421 devzvol_update_zclist(void)
422 422 {
423 423 mutex_enter(&devzvol_mtx);
424 424 if (devzvol_zclist_task_running == B_TRUE) {
425 425 mutex_exit(&devzvol_mtx);
426 426 goto wait;
427 427 }
428 428
429 429 devzvol_zclist_task_running = B_TRUE;
430 430
431 431 taskq_dispatch_ent(sdev_taskq, devzvol_update_zclist_cb, NULL, 0,
432 432 &devzvol_zclist_task);
433 433
434 434 mutex_exit(&devzvol_mtx);
435 435
436 436 wait:
437 437 taskq_wait(sdev_taskq);
438 438 }
439 439
440 440 /*
441 441 * Creates sub-directories for each zpool as needed in response to a
442 442 * readdir on one of the /dev/zvol/{dsk,rdsk} directories.
443 443 */
444 444 void
445 445 devzvol_create_pool_dirs(struct vnode *dvp)
446 446 {
447 447 nvlist_t *nv = NULL;
448 448 nvpair_t *elem = NULL;
449 449 int pools = 0;
450 450 int rc;
451 451
452 452 sdcmn_err13(("devzvol_create_pool_dirs"));
453 453
454 454 devzvol_update_zclist();
455 455
456 456 mutex_enter(&devzvol_mtx);
457 457
458 458 rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist,
459 459 devzvol_zclist_size, &nv, 0);
460 460 if (rc) {
461 461 ASSERT(rc == 0);
462 462 kmem_free((void *)(uintptr_t)devzvol_zclist,
463 463 devzvol_zclist_size);
464 464 devzvol_gen = 0;
|
↓ open down ↓ |
464 lines elided |
↑ open up ↑ |
465 465 devzvol_zclist = NULL;
466 466 devzvol_zclist_size = 0;
467 467 goto out;
468 468 }
469 469 mutex_exit(&devzvol_mtx);
470 470 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
471 471 struct vnode *vp;
472 472 ASSERT(dvp->v_count > 0);
473 473 rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
474 474 NULL, kcred, NULL, 0, NULL);
475 - /* should either work, or not be visible from a zone */
476 - ASSERT(rc == 0 || rc == ENOENT);
475 + /*
476 + * should either work or we should get an error if this should
477 + * not be visible from the zone, or disallowed in the zone
478 + */
477 479 if (rc == 0)
478 480 VN_RELE(vp);
479 481 pools++;
480 482 }
481 483 nvlist_free(nv);
482 484 mutex_enter(&devzvol_mtx);
483 485 if (devzvol_isopen && pools == 0) {
484 486 /* clean up so zfs can be unloaded */
485 487 devzvol_close_zfs();
486 488 devzvol_isopen = B_FALSE;
487 489 }
488 490 out:
489 491 mutex_exit(&devzvol_mtx);
490 492 }
491 493
492 494 /*ARGSUSED3*/
493 495 static int
494 496 devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg,
495 497 cred_t *cred, void *whatever, char *whichever)
496 498 {
497 499 timestruc_t now;
498 500 struct vattr *vap = (struct vattr *)arg;
499 501
500 502 sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name,
501 503 ddv->sdev_path, nm));
502 504 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR,
503 505 strlen(ZVOL_DIR)) == 0);
504 506 *vap = *sdev_getdefault_attr(VDIR);
505 507 gethrestime(&now);
506 508 vap->va_atime = now;
507 509 vap->va_mtime = now;
508 510 vap->va_ctime = now;
509 511 return (0);
510 512 }
511 513
512 514 /*ARGSUSED3*/
513 515 static int
514 516 devzvol_create_link(struct sdev_node *ddv, char *nm,
515 517 void **arg, cred_t *cred, void *whatever, char *whichever)
516 518 {
517 519 minor_t minor;
518 520 char *pathname = (char *)*arg;
519 521 int rc;
520 522 char *dsname;
521 523 char *x;
522 524 char str[MAXNAMELEN];
523 525 sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name,
524 526 ddv->sdev_path, nm));
525 527 dsname = devzvol_make_dsname(ddv->sdev_path, nm);
526 528 rc = sdev_zvol_create_minor(dsname);
527 529 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
528 530 sdev_zvol_name2minor(dsname, &minor)) {
529 531 sdcmn_err13(("devzvol_create_link %d", rc));
530 532 kmem_free(dsname, strlen(dsname) + 1);
531 533 return (-1);
532 534 }
533 535 kmem_free(dsname, strlen(dsname) + 1);
534 536
535 537 /*
536 538 * This is a valid zvol; create a symlink that points to the
537 539 * minor which was created under /devices/pseudo/zfs@0
538 540 */
539 541 *pathname = '\0';
540 542 for (x = ddv->sdev_path; x = strchr(x, '/'); x++)
541 543 (void) strcat(pathname, "../");
542 544 (void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor);
543 545 (void) strncat(pathname, str, MAXPATHLEN);
544 546 if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR,
545 547 strlen(ZVOL_FULL_RDEV_DIR)) == 0)
546 548 (void) strcat(pathname, ",raw");
547 549 return (0);
548 550 }
549 551
550 552 /* Clean zvol sdev_nodes that are no longer valid. */
551 553 static void
552 554 devzvol_prunedir(struct sdev_node *ddv)
553 555 {
554 556 struct sdev_node *dv;
555 557
556 558 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
557 559
558 560 sdcmn_err13(("prunedir '%s'", ddv->sdev_name));
559 561 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
560 562 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
561 563 rw_exit(&ddv->sdev_contents);
562 564 rw_enter(&ddv->sdev_contents, RW_WRITER);
563 565 }
564 566
565 567 dv = SDEV_FIRST_ENTRY(ddv);
566 568 while (dv) {
567 569 sdcmn_err13(("sdev_name '%s'", dv->sdev_name));
568 570
569 571 switch (devzvol_validate(dv)) {
570 572 case SDEV_VTOR_VALID:
571 573 case SDEV_VTOR_SKIP:
572 574 dv = SDEV_NEXT_ENTRY(ddv, dv);
573 575 continue;
574 576 case SDEV_VTOR_INVALID:
575 577 sdcmn_err7(("prunedir: destroy invalid "
576 578 "node: %s\n", dv->sdev_name));
577 579 break;
578 580 }
579 581
580 582 if ((SDEVTOV(dv)->v_type == VDIR) &&
581 583 (sdev_cleandir(dv, NULL, 0) != 0)) {
582 584 dv = SDEV_NEXT_ENTRY(ddv, dv);
583 585 continue;
584 586 }
585 587 SDEV_HOLD(dv);
586 588 /* remove the cache node */
587 589 sdev_cache_update(ddv, &dv, dv->sdev_name,
588 590 SDEV_CACHE_DELETE);
589 591 SDEV_RELE(dv);
590 592 dv = SDEV_FIRST_ENTRY(ddv);
591 593 }
592 594 rw_downgrade(&ddv->sdev_contents);
593 595 }
594 596
595 597 /*
596 598 * This function is used to create a dir or dev inside a zone's /dev when the
597 599 * zone has a zvol that is dynamically created within the zone (i.e. inside
598 600 * of a delegated dataset. Since there is no /devices tree within a zone,
599 601 * we create the chr/blk devices directly inside the zone's /dev instead of
600 602 * making symlinks.
601 603 */
602 604 static int
603 605 devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
604 606 {
605 607 struct vattr vattr;
606 608 timestruc_t now;
607 609 enum vtype expected_type = VDIR;
608 610 dmu_objset_type_t do_type;
609 611 struct sdev_node *dv = NULL;
610 612 int res;
611 613 char *dsname;
612 614
613 615 bzero(&vattr, sizeof (vattr));
614 616 gethrestime(&now);
615 617 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
616 618 vattr.va_uid = SDEV_UID_DEFAULT;
617 619 vattr.va_gid = SDEV_GID_DEFAULT;
618 620 vattr.va_type = VNON;
619 621 vattr.va_atime = now;
620 622 vattr.va_mtime = now;
621 623 vattr.va_ctime = now;
622 624
623 625 if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
624 626 return (ENOENT);
625 627
626 628 if (devzvol_objset_check(dsname, &do_type) != 0) {
627 629 /*
628 630 * objset_check will succeed on any valid objset in the global
629 631 * zone, and any valid delegated dataset. It will fail, however,
630 632 * in non-global zones on explicitly whitelisted zvol devices
631 633 * that are outside any delegated dataset.
632 634 *
633 635 * The directories leading up to the zvol device itself will be
634 636 * created by prof for us in advance (and will always validate
635 637 * because of the matching check in devzvol_validate). The zvol
636 638 * device itself can't be created by prof though because in the
637 639 * GZ it's a symlink, and in the NGZ it is not. So, we create
638 640 * such zvol device files here.
639 641 */
640 642 if (!(parent->sdev_flags & SDEV_GLOBAL) &&
641 643 parent->sdev_origin != NULL &&
642 644 prof_name_matched(nm, parent)) {
643 645 do_type = DMU_OST_ZVOL;
644 646 } else {
645 647 kmem_free(dsname, strlen(dsname) + 1);
646 648 return (ENOENT);
647 649 }
648 650 }
649 651
650 652 if (do_type == DMU_OST_ZVOL)
651 653 expected_type = VBLK;
652 654
653 655 if (expected_type == VDIR) {
654 656 vattr.va_type = VDIR;
655 657 vattr.va_mode = SDEV_DIRMODE_DEFAULT;
656 658 } else {
657 659 minor_t minor;
658 660 dev_t devnum;
659 661 int rc;
660 662
661 663 rc = sdev_zvol_create_minor(dsname);
662 664 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
663 665 sdev_zvol_name2minor(dsname, &minor)) {
664 666 kmem_free(dsname, strlen(dsname) + 1);
665 667 return (ENOENT);
666 668 }
667 669
668 670 devnum = makedevice(devzvol_major, minor);
669 671 vattr.va_rdev = devnum;
670 672
671 673 if (strstr(parent->sdev_path, "/rdsk/") != NULL)
672 674 vattr.va_type = VCHR;
673 675 else
674 676 vattr.va_type = VBLK;
675 677 vattr.va_mode = SDEV_DEVMODE_DEFAULT;
676 678 }
677 679 kmem_free(dsname, strlen(dsname) + 1);
678 680
679 681 rw_enter(&parent->sdev_contents, RW_WRITER);
680 682
681 683 res = sdev_mknode(parent, nm, &dv, &vattr,
682 684 NULL, NULL, kcred, SDEV_READY);
683 685 rw_exit(&parent->sdev_contents);
684 686 if (res != 0)
685 687 return (ENOENT);
686 688
687 689 SDEV_RELE(dv);
688 690 return (0);
689 691 }
690 692
691 693 /*ARGSUSED*/
692 694 static int
693 695 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
694 696 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
695 697 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
696 698 {
697 699 enum vtype expected_type = VDIR;
698 700 struct sdev_node *parent = VTOSDEV(dvp);
699 701 char *dsname;
700 702 dmu_objset_type_t do_type;
701 703 int error;
702 704
703 705 sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm));
704 706 *vpp = NULL;
705 707 /* execute access is required to search the directory */
706 708 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
707 709 return (error);
708 710
709 711 rw_enter(&parent->sdev_contents, RW_READER);
710 712 if (!SDEV_IS_GLOBAL(parent)) {
711 713 int res;
712 714
713 715 rw_exit(&parent->sdev_contents);
714 716
715 717 /*
716 718 * If we're in the global zone and reach down into a non-global
717 719 * zone's /dev/zvol then this action could trigger the creation
718 720 * of all of the zvol devices for every zone into the non-global
719 721 * zone's /dev tree. This could be a big security hole. To
720 722 * prevent this, disallow the global zone from looking inside
721 723 * a non-global zones /dev/zvol. This behavior is similar to
722 724 * delegated datasets, which cannot be used by the global zone.
723 725 */
724 726 if (getzoneid() == GLOBAL_ZONEID)
725 727 return (EPERM);
726 728
727 729 res = prof_lookup(dvp, nm, vpp, cred);
728 730
729 731 /*
730 732 * We won't find a zvol that was dynamically created inside
731 733 * a NGZ, within a delegated dataset, in the zone's dev profile
732 734 * but prof_lookup will also find it via sdev_cache_lookup.
733 735 */
734 736 if (res == ENOENT) {
735 737 /*
736 738 * We have to create the sdev node for the dymamically
737 739 * created zvol.
738 740 */
739 741 if (devzvol_mk_ngz_node(parent, nm) != 0)
740 742 return (ENOENT);
741 743 res = prof_lookup(dvp, nm, vpp, cred);
742 744 }
743 745
744 746 return (res);
745 747 }
746 748
747 749 /*
748 750 * Don't let the global-zone style lookup succeed here when we're not
749 751 * running in the global zone. This can happen because prof calls into
750 752 * us (in prof_filldir) trying to create an explicitly passed-through
751 753 * zvol device outside any delegated dataset.
752 754 *
753 755 * We have to stop this here or else we will create prof shadows of
754 756 * the global zone symlink, which will make no sense at all in the
755 757 * non-global zone (it has no /devices for the symlink to point at).
756 758 *
757 759 * These zvols will be created later (at access time) by mk_ngz_node
758 760 * instead. The dirs leading up to them will be created by prof
759 761 * internally.
760 762 *
761 763 * We have to return EPERM here, because ENOENT is given special
762 764 * meaning by prof in this context.
763 765 */
764 766 if (getzoneid() != GLOBAL_ZONEID) {
765 767 rw_exit(&parent->sdev_contents);
766 768 return (EPERM);
767 769 }
768 770
769 771 dsname = devzvol_make_dsname(parent->sdev_path, nm);
770 772 rw_exit(&parent->sdev_contents);
771 773 sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
772 774 if (dsname) {
773 775 error = devzvol_objset_check(dsname, &do_type);
774 776 if (error != 0) {
775 777 error = ENOENT;
776 778 goto out;
777 779 }
778 780 if (do_type == DMU_OST_ZVOL)
779 781 expected_type = VLNK;
780 782 }
781 783 /*
782 784 * the callbacks expect:
783 785 *
784 786 * parent->sdev_path nm
785 787 * /dev/zvol {r}dsk
786 788 * /dev/zvol/{r}dsk <pool name>
787 789 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
788 790 *
789 791 * sdev_name is always last path component of sdev_path
790 792 */
791 793 if (expected_type == VDIR) {
792 794 error = devname_lookup_func(parent, nm, vpp, cred,
793 795 devzvol_create_dir, SDEV_VATTR);
794 796 } else {
795 797 error = devname_lookup_func(parent, nm, vpp, cred,
796 798 devzvol_create_link, SDEV_VLINK);
797 799 }
798 800 sdcmn_err13(("devzvol_lookup %d %d", expected_type, error));
799 801 ASSERT(error || ((*vpp)->v_type == expected_type));
800 802 out:
801 803 if (dsname)
802 804 kmem_free(dsname, strlen(dsname) + 1);
803 805 sdcmn_err13(("devzvol_lookup %d", error));
804 806 return (error);
805 807 }
806 808
807 809 /*
808 810 * We allow create to find existing nodes
809 811 * - if the node doesn't exist - EROFS
810 812 * - creating an existing dir read-only succeeds, otherwise EISDIR
811 813 * - exclusive creates fail - EEXIST
812 814 */
813 815 /*ARGSUSED2*/
814 816 static int
815 817 devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
816 818 int mode, struct vnode **vpp, struct cred *cred, int flag,
817 819 caller_context_t *ct, vsecattr_t *vsecp)
818 820 {
819 821 int error;
820 822 struct vnode *vp;
821 823
822 824 *vpp = NULL;
823 825
824 826 error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL,
825 827 NULL);
826 828 if (error == 0) {
827 829 if (excl == EXCL)
828 830 error = EEXIST;
829 831 else if (vp->v_type == VDIR && (mode & VWRITE))
830 832 error = EISDIR;
831 833 else
832 834 error = VOP_ACCESS(vp, mode, 0, cred, ct);
833 835
834 836 if (error) {
835 837 VN_RELE(vp);
836 838 } else
837 839 *vpp = vp;
838 840 } else if (error == ENOENT) {
839 841 error = EROFS;
840 842 }
841 843
842 844 return (error);
843 845 }
844 846
845 847 void sdev_iter_snapshots(struct vnode *dvp, char *name);
846 848
847 849 void
848 850 sdev_iter_datasets(struct vnode *dvp, int arg, char *name)
849 851 {
850 852 zfs_cmd_t *zc;
851 853 int rc;
852 854
853 855 sdcmn_err13(("iter name is '%s' (arg %x)", name, arg));
854 856 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
855 857 (void) strcpy(zc->zc_name, name);
856 858
857 859 while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) {
858 860 struct vnode *vpp;
859 861 char *ptr;
860 862
861 863 sdcmn_err13((" name %s", zc->zc_name));
862 864 if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%'))
863 865 goto skip;
864 866 ptr = strrchr(zc->zc_name, '/') + 1;
865 867 rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL,
866 868 kcred, NULL, NULL, NULL);
867 869 if (rc == 0) {
868 870 VN_RELE(vpp);
869 871 } else if (rc == ENOENT) {
870 872 goto skip;
871 873 } else {
872 874 /*
873 875 * EBUSY == problem with zvols's dmu holds?
874 876 * EPERM when in a NGZ and traversing up and out.
875 877 */
876 878 goto skip;
877 879 }
878 880 if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
879 881 zc->zc_objset_stats.dds_type == DMU_OST_ZVOL &&
880 882 devzvol_snaps_allowed)
881 883 sdev_iter_snapshots(dvp, zc->zc_name);
882 884 skip:
883 885 (void) strcpy(zc->zc_name, name);
884 886 }
885 887 kmem_free(zc, sizeof (zfs_cmd_t));
886 888 }
887 889
888 890 void
889 891 sdev_iter_snapshots(struct vnode *dvp, char *name)
890 892 {
891 893 sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name);
892 894 }
893 895
894 896 /*ARGSUSED4*/
895 897 static int
896 898 devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
897 899 int *eofp, caller_context_t *ct_unused, int flags_unused)
898 900 {
899 901 struct sdev_node *sdvp = VTOSDEV(dvp);
900 902 char *ptr;
901 903
902 904 sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path,
903 905 sdvp->sdev_name));
904 906
905 907 if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) {
906 908 struct vnode *vp;
907 909
908 910 rw_exit(&sdvp->sdev_contents);
909 911 (void) devname_lookup_func(sdvp, "dsk", &vp, cred,
910 912 devzvol_create_dir, SDEV_VATTR);
911 913 VN_RELE(vp);
912 914 (void) devname_lookup_func(sdvp, "rdsk", &vp, cred,
913 915 devzvol_create_dir, SDEV_VATTR);
914 916 VN_RELE(vp);
915 917 rw_enter(&sdvp->sdev_contents, RW_READER);
916 918 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
917 919 }
918 920 if (uiop->uio_offset == 0)
919 921 devzvol_prunedir(sdvp);
920 922 ptr = sdvp->sdev_path + strlen(ZVOL_DIR);
921 923 if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) {
922 924 rw_exit(&sdvp->sdev_contents);
923 925 devzvol_create_pool_dirs(dvp);
924 926 rw_enter(&sdvp->sdev_contents, RW_READER);
925 927 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
926 928 }
927 929
928 930 ptr = strchr(ptr + 1, '/');
929 931 if (ptr == NULL)
930 932 return (ENOENT);
931 933 ptr++;
932 934 rw_exit(&sdvp->sdev_contents);
933 935 sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr);
934 936 rw_enter(&sdvp->sdev_contents, RW_READER);
935 937 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
936 938 }
937 939
938 940 const fs_operation_def_t devzvol_vnodeops_tbl[] = {
939 941 VOPNAME_READDIR, { .vop_readdir = devzvol_readdir },
940 942 VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup },
941 943 VOPNAME_CREATE, { .vop_create = devzvol_create },
942 944 VOPNAME_RENAME, { .error = fs_nosys },
943 945 VOPNAME_MKDIR, { .error = fs_nosys },
944 946 VOPNAME_RMDIR, { .error = fs_nosys },
945 947 VOPNAME_REMOVE, { .error = fs_nosys },
946 948 VOPNAME_SYMLINK, { .error = fs_nosys },
947 949 NULL, NULL
948 950 };
|
↓ open down ↓ |
462 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX