Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/dev/sdev_zvolops.c
+++ new/usr/src/uts/common/fs/dev/sdev_zvolops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2013, 2016 Joyent, Inc. All rights reserved.
25 25 * Copyright (c) 2014 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /* vnode ops for the /dev/zvol directory */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/sysmacros.h>
33 33 #include <sys/ddi.h>
34 34 #include <sys/sunndi.h>
35 35 #include <sys/sunldi.h>
36 36 #include <fs/fs_subr.h>
37 37 #include <sys/fs/dv_node.h>
38 38 #include <sys/fs/sdev_impl.h>
39 39 #include <sys/zfs_ioctl.h>
40 40 #include <sys/policy.h>
41 41 #include <sys/stat.h>
42 42 #include <sys/vfs_opreg.h>
43 43
44 44 struct vnodeops *devzvol_vnodeops;
45 45 static major_t devzvol_major;
46 46 static taskq_ent_t devzvol_zclist_task;
47 47
48 48 static kmutex_t devzvol_mtx;
49 49 /* Below are protected by devzvol_mtx */
50 50 static boolean_t devzvol_isopen;
51 51 static boolean_t devzvol_zclist_task_running = B_FALSE;
52 52 static uint64_t devzvol_gen = 0;
53 53 static uint64_t devzvol_zclist;
54 54 static size_t devzvol_zclist_size;
55 55 static ldi_ident_t devzvol_li;
56 56 static ldi_handle_t devzvol_lh;
57 57
58 58 /*
59 59 * we need to use ddi_mod* since fs/dev gets loaded early on in
60 60 * startup(), and linking fs/dev to fs/zfs would drag in a lot of
61 61 * other stuff (like drv/random) before the rest of the system is
62 62 * ready to go
63 63 */
64 64 ddi_modhandle_t zfs_mod;
65 65 int (*szcm)(char *);
66 66 int (*szn2m)(char *, minor_t *);
67 67
68 68
69 69 /*
70 70 * Enable/disable snapshots from being created in /dev/zvol. By default,
71 71 * they are enabled, preserving the historic behavior.
72 72 */
73 73 boolean_t devzvol_snaps_allowed = B_TRUE;
74 74
75 75 int
76 76 sdev_zvol_create_minor(char *dsname)
77 77 {
78 78 if (szcm == NULL)
79 79 return (-1);
80 80 return ((*szcm)(dsname));
81 81 }
82 82
83 83 int
84 84 sdev_zvol_name2minor(char *dsname, minor_t *minor)
85 85 {
86 86 if (szn2m == NULL)
87 87 return (-1);
88 88 return ((*szn2m)(dsname, minor));
89 89 }
90 90
91 91 int
92 92 devzvol_open_zfs()
93 93 {
94 94 int rc;
95 95 dev_t dv;
96 96
97 97 devzvol_li = ldi_ident_from_anon();
98 98 if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
99 99 &devzvol_lh, devzvol_li))
100 100 return (-1);
101 101 if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs",
102 102 KRTLD_MODE_FIRST, &rc)) == NULL)) {
103 103 return (rc);
104 104 }
105 105 ASSERT(szcm == NULL && szn2m == NULL);
106 106 if ((szcm = (int (*)(char *))
107 107 ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) {
108 108 cmn_err(CE_WARN, "couldn't resolve zvol_create_minor");
109 109 return (rc);
110 110 }
111 111 if ((szn2m = (int(*)(char *, minor_t *))
112 112 ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
113 113 cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
114 114 return (rc);
115 115 }
116 116 if (ldi_get_dev(devzvol_lh, &dv))
117 117 return (-1);
118 118 devzvol_major = getmajor(dv);
119 119 return (0);
120 120 }
121 121
122 122 void
123 123 devzvol_close_zfs()
124 124 {
125 125 szcm = NULL;
126 126 szn2m = NULL;
127 127 (void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred);
128 128 ldi_ident_release(devzvol_li);
129 129 if (zfs_mod != NULL) {
130 130 (void) ddi_modclose(zfs_mod);
131 131 zfs_mod = NULL;
132 132 }
133 133 }
134 134
135 135 int
136 136 devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size)
137 137 {
138 138 uint64_t cookie;
139 139 int size = 8000;
140 140 int unused;
141 141 int rc;
142 142
143 143 if (cmd != ZFS_IOC_POOL_CONFIGS)
144 144 mutex_enter(&devzvol_mtx);
145 145 if (!devzvol_isopen) {
146 146 if ((rc = devzvol_open_zfs()) == 0) {
147 147 devzvol_isopen = B_TRUE;
148 148 } else {
149 149 if (cmd != ZFS_IOC_POOL_CONFIGS)
150 150 mutex_exit(&devzvol_mtx);
151 151 return (ENXIO);
152 152 }
153 153 }
154 154 cookie = zc->zc_cookie;
155 155 again:
156 156 zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
157 157 KM_SLEEP);
158 158 zc->zc_nvlist_dst_size = size;
159 159 rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred,
160 160 &unused);
161 161 if (rc == ENOMEM) {
162 162 int newsize;
163 163 newsize = zc->zc_nvlist_dst_size;
164 164 ASSERT(newsize > size);
165 165 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
166 166 size = newsize;
167 167 zc->zc_cookie = cookie;
168 168 goto again;
169 169 }
170 170 if (alloc_size == NULL)
171 171 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
172 172 else
173 173 *alloc_size = size;
174 174 if (cmd != ZFS_IOC_POOL_CONFIGS)
175 175 mutex_exit(&devzvol_mtx);
176 176 return (rc);
177 177 }
178 178
179 179 /* figures out if the objset exists and returns its type */
180 180 int
181 181 devzvol_objset_check(char *dsname, dmu_objset_type_t *type)
182 182 {
183 183 boolean_t ispool, is_snapshot;
184 184 zfs_cmd_t *zc;
185 185 int rc;
186 186 nvlist_t *nvl;
187 187 size_t nvsz;
188 188
189 189 ispool = (strchr(dsname, '/') == NULL);
190 190 is_snapshot = (strchr(dsname, '@') != NULL);
191 191
192 192 if (is_snapshot && !devzvol_snaps_allowed)
193 193 return (ENOTSUP);
194 194
195 195 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
196 196 (void) strlcpy(zc->zc_name, dsname, MAXPATHLEN);
197 197
198 198 nvl = fnvlist_alloc();
199 199 fnvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE);
200 200 zc->zc_nvlist_src = (uintptr_t)fnvlist_pack(nvl, &nvsz);
201 201 zc->zc_nvlist_src_size = nvsz;
202 202 fnvlist_free(nvl);
203 203
204 204 rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS :
205 205 ZFS_IOC_OBJSET_STATS, zc, NULL);
206 206 if (type && rc == 0)
207 207 *type = (ispool) ? DMU_OST_ZFS :
208 208 zc->zc_objset_stats.dds_type;
209 209 fnvlist_pack_free((char *)(uintptr_t)zc->zc_nvlist_src, nvsz);
210 210 kmem_free(zc, sizeof (zfs_cmd_t));
211 211 return (rc);
212 212 }
213 213
214 214 /*
215 215 * Returns what the zfs dataset name should be, given the /dev/zvol
216 216 * path and an optional name (can be NULL).
217 217 *
218 218 * Note that if the name param is NULL, then path must be an
219 219 * actual dataset's directory and not one of the top-level
220 220 * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a
221 221 * specific dataset.
222 222 */
223 223 char *
224 224 devzvol_make_dsname(const char *path, const char *name)
225 225 {
226 226 char *dsname;
227 227 const char *ptr;
228 228 int dslen;
229 229
230 230 if (strcmp(path, ZVOL_DIR) == 0)
231 231 return (NULL);
232 232 if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0))
233 233 return (NULL);
234 234 ptr = path + strlen(ZVOL_DIR);
235 235 if (strncmp(ptr, "/dsk", 4) == 0)
236 236 ptr += strlen("/dsk");
237 237 else if (strncmp(ptr, "/rdsk", 5) == 0)
238 238 ptr += strlen("/rdsk");
239 239 else
240 240 return (NULL);
241 241
242 242 if (*ptr == '/')
243 243 ptr++;
244 244 else if (name == NULL)
245 245 return (NULL);
246 246
247 247 dslen = strlen(ptr);
248 248 if (dslen)
249 249 dslen++; /* plus null */
250 250 if (name)
251 251 dslen += strlen(name) + 1; /* plus slash */
252 252 dsname = kmem_zalloc(dslen, KM_SLEEP);
253 253 if (*ptr) {
254 254 (void) strlcpy(dsname, ptr, dslen);
255 255 if (name)
256 256 (void) strlcat(dsname, "/", dslen);
257 257 }
258 258 if (name)
259 259 (void) strlcat(dsname, name, dslen);
260 260 return (dsname);
261 261 }
262 262
263 263 /*
264 264 * check if the zvol's sdev_node is still valid, which means make
265 265 * sure the zvol is still valid. zvol minors aren't proactively
266 266 * destroyed when the zvol is destroyed, so we use a validator to clean
267 267 * these up (in other words, when such nodes are encountered during
268 268 * subsequent lookup() and readdir() operations) so that only valid
269 269 * nodes are returned. The ordering between devname_lookup_func and
270 270 * devzvol_validate is a little inefficient in the case of invalid
271 271 * or stale nodes because devname_lookup_func calls
272 272 * devzvol_create_{dir, link}, then the validator says it's invalid,
273 273 * and then the node gets cleaned up.
274 274 */
275 275 int
276 276 devzvol_validate(struct sdev_node *dv)
277 277 {
278 278 vnode_t *vn = SDEVTOV(dv);
279 279 dmu_objset_type_t do_type;
280 280 char *dsname;
281 281 char *nm = dv->sdev_name;
282 282 int rc;
283 283
284 284 sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm));
285 285 /*
286 286 * validate only READY nodes; if someone is sitting on the
287 287 * directory of a dataset that just got destroyed we could
288 288 * get a zombie node which we just skip.
289 289 */
290 290 if (dv->sdev_state != SDEV_READY) {
291 291 sdcmn_err13(("skipping '%s'", nm));
292 292 return (SDEV_VTOR_SKIP);
293 293 }
294 294
295 295 if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) ||
296 296 (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0))
297 297 return (SDEV_VTOR_VALID);
298 298 dsname = devzvol_make_dsname(dv->sdev_path, NULL);
299 299 if (dsname == NULL)
300 300 return (SDEV_VTOR_INVALID);
301 301
302 302 /*
303 303 * Leave any nodes alone that have been explicitly created by
304 304 * sdev profiles.
305 305 */
306 306 if (!(dv->sdev_flags & SDEV_GLOBAL) && dv->sdev_origin != NULL) {
307 307 kmem_free(dsname, strlen(dsname) + 1);
308 308 return (SDEV_VTOR_VALID);
309 309 }
310 310
311 311 rc = devzvol_objset_check(dsname, &do_type);
312 312 sdcmn_err13((" '%s' rc %d", dsname, rc));
313 313 if (rc != 0) {
314 314 sdev_node_t *parent = dv->sdev_dotdot;
315 315 /*
316 316 * Explicitly passed-through zvols in our sdev profile can't
317 317 * be created as prof_* shadow nodes, because in the GZ they
318 318 * are symlinks, but in the NGZ they are actual device files.
319 319 *
320 320 * The objset_check will fail on these as they are outside
321 321 * any delegated dataset (zfs will not allow ioctl access to
322 322 * them from this zone). We still want them to work, though.
323 323 */
324 324 if (!(parent->sdev_flags & SDEV_GLOBAL) &&
325 325 parent->sdev_origin != NULL &&
326 326 !(dv->sdev_flags & SDEV_GLOBAL) &&
327 327 (vn->v_type == VBLK || vn->v_type == VCHR) &&
328 328 prof_name_matched(nm, parent)) {
329 329 do_type = DMU_OST_ZVOL;
330 330 } else {
331 331 kmem_free(dsname, strlen(dsname) + 1);
332 332 return (SDEV_VTOR_INVALID);
333 333 }
334 334 }
335 335
336 336 sdcmn_err13((" v_type %d do_type %d",
337 337 vn->v_type, do_type));
338 338 if ((vn->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
339 339 ((vn->v_type == VBLK || vn->v_type == VCHR) &&
340 340 do_type != DMU_OST_ZVOL) ||
341 341 (vn->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
342 342 kmem_free(dsname, strlen(dsname) + 1);
343 343 return (SDEV_VTOR_STALE);
344 344 }
345 345 if (vn->v_type == VLNK) {
346 346 char *ptr, *link;
347 347 long val = 0;
348 348 minor_t lminor, ominor;
349 349
350 350 rc = sdev_getlink(vn, &link);
351 351 ASSERT(rc == 0);
352 352
353 353 ptr = strrchr(link, ':') + 1;
354 354 rc = ddi_strtol(ptr, NULL, 10, &val);
355 355 kmem_free(link, strlen(link) + 1);
356 356 ASSERT(rc == 0 && val != 0);
357 357 lminor = (minor_t)val;
358 358 if (sdev_zvol_name2minor(dsname, &ominor) < 0 ||
359 359 ominor != lminor) {
360 360 kmem_free(dsname, strlen(dsname) + 1);
361 361 return (SDEV_VTOR_STALE);
362 362 }
363 363 }
364 364 kmem_free(dsname, strlen(dsname) + 1);
365 365 return (SDEV_VTOR_VALID);
366 366 }
367 367
368 368 /*
369 369 * Taskq callback to update the devzvol_zclist.
370 370 *
371 371 * We need to defer this to the taskq to avoid it running with a user
372 372 * context that might be associated with some non-global zone, and thus
373 373 * not being able to list all of the pools on the entire system.
374 374 */
375 375 /*ARGSUSED*/
376 376 static void
377 377 devzvol_update_zclist_cb(void *arg)
378 378 {
379 379 zfs_cmd_t *zc;
380 380 int rc;
381 381 size_t size;
382 382
383 383 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
384 384 mutex_enter(&devzvol_mtx);
385 385 zc->zc_cookie = devzvol_gen;
386 386
387 387 rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size);
388 388 switch (rc) {
389 389 case 0:
390 390 /* new generation */
391 391 ASSERT(devzvol_gen != zc->zc_cookie);
392 392 devzvol_gen = zc->zc_cookie;
393 393 if (devzvol_zclist)
394 394 kmem_free((void *)(uintptr_t)devzvol_zclist,
395 395 devzvol_zclist_size);
396 396 devzvol_zclist = zc->zc_nvlist_dst;
397 397 /* Keep the alloc'd size, not the nvlist size. */
398 398 devzvol_zclist_size = size;
399 399 break;
400 400 default:
401 401 /*
402 402 * Either there was no change in pool configuration
403 403 * since we last asked (rc == EEXIST) or we got a
404 404 * catastrophic error.
405 405 *
406 406 * Give up memory and exit.
407 407 */
408 408 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
409 409 size);
410 410 break;
411 411 }
412 412
413 413 VERIFY(devzvol_zclist_task_running == B_TRUE);
414 414 devzvol_zclist_task_running = B_FALSE;
415 415 mutex_exit(&devzvol_mtx);
416 416
417 417 kmem_free(zc, sizeof (zfs_cmd_t));
418 418 }
419 419
420 420 static void
421 421 devzvol_update_zclist(void)
422 422 {
423 423 mutex_enter(&devzvol_mtx);
424 424 if (devzvol_zclist_task_running == B_TRUE) {
425 425 mutex_exit(&devzvol_mtx);
426 426 goto wait;
427 427 }
428 428
429 429 devzvol_zclist_task_running = B_TRUE;
430 430
431 431 taskq_dispatch_ent(sdev_taskq, devzvol_update_zclist_cb, NULL, 0,
432 432 &devzvol_zclist_task);
433 433
434 434 mutex_exit(&devzvol_mtx);
435 435
436 436 wait:
437 437 taskq_wait(sdev_taskq);
438 438 }
439 439
440 440 /*
441 441 * Creates sub-directories for each zpool as needed in response to a
442 442 * readdir on one of the /dev/zvol/{dsk,rdsk} directories.
443 443 */
444 444 void
445 445 devzvol_create_pool_dirs(struct vnode *dvp)
446 446 {
447 447 nvlist_t *nv = NULL;
448 448 nvpair_t *elem = NULL;
449 449 int pools = 0;
450 450 int rc;
451 451
452 452 sdcmn_err13(("devzvol_create_pool_dirs"));
453 453
454 454 devzvol_update_zclist();
455 455
456 456 mutex_enter(&devzvol_mtx);
457 457
458 458 rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist,
459 459 devzvol_zclist_size, &nv, 0);
460 460 if (rc) {
461 461 ASSERT(rc == 0);
462 462 kmem_free((void *)(uintptr_t)devzvol_zclist,
463 463 devzvol_zclist_size);
464 464 devzvol_gen = 0;
465 465 devzvol_zclist = NULL;
466 466 devzvol_zclist_size = 0;
467 467 goto out;
468 468 }
469 469 mutex_exit(&devzvol_mtx);
470 470 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
471 471 struct vnode *vp;
472 472 ASSERT(dvp->v_count > 0);
473 473 rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
474 474 NULL, kcred, NULL, 0, NULL);
475 475 /*
476 476 * should either work or we should get an error if this should
477 477 * not be visible from the zone, or disallowed in the zone
478 478 */
479 479 if (rc == 0)
480 480 VN_RELE(vp);
481 481 pools++;
482 482 }
483 483 nvlist_free(nv);
484 484 mutex_enter(&devzvol_mtx);
485 485 if (devzvol_isopen && pools == 0) {
486 486 /* clean up so zfs can be unloaded */
487 487 devzvol_close_zfs();
488 488 devzvol_isopen = B_FALSE;
489 489 }
490 490 out:
491 491 mutex_exit(&devzvol_mtx);
492 492 }
493 493
494 494 /*ARGSUSED3*/
495 495 static int
496 496 devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg,
497 497 cred_t *cred, void *whatever, char *whichever)
498 498 {
499 499 timestruc_t now;
500 500 struct vattr *vap = (struct vattr *)arg;
501 501
502 502 sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name,
503 503 ddv->sdev_path, nm));
504 504 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR,
505 505 strlen(ZVOL_DIR)) == 0);
506 506 *vap = *sdev_getdefault_attr(VDIR);
507 507 gethrestime(&now);
508 508 vap->va_atime = now;
509 509 vap->va_mtime = now;
510 510 vap->va_ctime = now;
511 511 return (0);
512 512 }
513 513
514 514 /*ARGSUSED3*/
515 515 static int
516 516 devzvol_create_link(struct sdev_node *ddv, char *nm,
517 517 void **arg, cred_t *cred, void *whatever, char *whichever)
518 518 {
519 519 minor_t minor;
520 520 char *pathname = (char *)*arg;
521 521 int rc;
522 522 char *dsname;
523 523 char *x;
524 524 char str[MAXNAMELEN];
525 525 sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name,
526 526 ddv->sdev_path, nm));
527 527 dsname = devzvol_make_dsname(ddv->sdev_path, nm);
528 528 rc = sdev_zvol_create_minor(dsname);
529 529 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
530 530 sdev_zvol_name2minor(dsname, &minor)) {
531 531 sdcmn_err13(("devzvol_create_link %d", rc));
532 532 kmem_free(dsname, strlen(dsname) + 1);
533 533 return (-1);
534 534 }
535 535 kmem_free(dsname, strlen(dsname) + 1);
536 536
537 537 /*
538 538 * This is a valid zvol; create a symlink that points to the
539 539 * minor which was created under /devices/pseudo/zfs@0
540 540 */
541 541 *pathname = '\0';
542 542 for (x = ddv->sdev_path; x = strchr(x, '/'); x++)
543 543 (void) strcat(pathname, "../");
544 544 (void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor);
545 545 (void) strncat(pathname, str, MAXPATHLEN);
546 546 if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR,
547 547 strlen(ZVOL_FULL_RDEV_DIR)) == 0)
548 548 (void) strcat(pathname, ",raw");
549 549 return (0);
550 550 }
551 551
552 552 /* Clean zvol sdev_nodes that are no longer valid. */
553 553 static void
554 554 devzvol_prunedir(struct sdev_node *ddv)
555 555 {
556 556 struct sdev_node *dv;
557 557
558 558 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
559 559
560 560 sdcmn_err13(("prunedir '%s'", ddv->sdev_name));
561 561 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
562 562 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
563 563 rw_exit(&ddv->sdev_contents);
564 564 rw_enter(&ddv->sdev_contents, RW_WRITER);
565 565 }
566 566
567 567 dv = SDEV_FIRST_ENTRY(ddv);
568 568 while (dv) {
569 569 sdcmn_err13(("sdev_name '%s'", dv->sdev_name));
570 570
571 571 switch (devzvol_validate(dv)) {
572 572 case SDEV_VTOR_VALID:
573 573 case SDEV_VTOR_SKIP:
574 574 dv = SDEV_NEXT_ENTRY(ddv, dv);
575 575 continue;
576 576 case SDEV_VTOR_INVALID:
577 577 sdcmn_err7(("prunedir: destroy invalid "
578 578 "node: %s\n", dv->sdev_name));
579 579 break;
580 580 }
581 581
582 582 if ((SDEVTOV(dv)->v_type == VDIR) &&
583 583 (sdev_cleandir(dv, NULL, 0) != 0)) {
584 584 dv = SDEV_NEXT_ENTRY(ddv, dv);
585 585 continue;
586 586 }
587 587 SDEV_HOLD(dv);
588 588 /* remove the cache node */
589 589 sdev_cache_update(ddv, &dv, dv->sdev_name,
590 590 SDEV_CACHE_DELETE);
591 591 SDEV_RELE(dv);
592 592 dv = SDEV_FIRST_ENTRY(ddv);
593 593 }
594 594 rw_downgrade(&ddv->sdev_contents);
595 595 }
596 596
597 597 /*
598 598 * This function is used to create a dir or dev inside a zone's /dev when the
599 599 * zone has a zvol that is dynamically created within the zone (i.e. inside
600 600 * of a delegated dataset. Since there is no /devices tree within a zone,
601 601 * we create the chr/blk devices directly inside the zone's /dev instead of
602 602 * making symlinks.
603 603 */
604 604 static int
605 605 devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
606 606 {
607 607 struct vattr vattr;
608 608 timestruc_t now;
609 609 enum vtype expected_type = VDIR;
610 610 dmu_objset_type_t do_type;
611 611 struct sdev_node *dv = NULL;
612 612 int res;
613 613 char *dsname;
614 614
615 615 bzero(&vattr, sizeof (vattr));
616 616 gethrestime(&now);
617 617 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
618 618 vattr.va_uid = SDEV_UID_DEFAULT;
619 619 vattr.va_gid = SDEV_GID_DEFAULT;
620 620 vattr.va_type = VNON;
621 621 vattr.va_atime = now;
622 622 vattr.va_mtime = now;
623 623 vattr.va_ctime = now;
624 624
625 625 if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
626 626 return (ENOENT);
627 627
628 628 if (devzvol_objset_check(dsname, &do_type) != 0) {
629 629 /*
630 630 * objset_check will succeed on any valid objset in the global
631 631 * zone, and any valid delegated dataset. It will fail, however,
632 632 * in non-global zones on explicitly whitelisted zvol devices
633 633 * that are outside any delegated dataset.
634 634 *
635 635 * The directories leading up to the zvol device itself will be
636 636 * created by prof for us in advance (and will always validate
637 637 * because of the matching check in devzvol_validate). The zvol
638 638 * device itself can't be created by prof though because in the
639 639 * GZ it's a symlink, and in the NGZ it is not. So, we create
640 640 * such zvol device files here.
641 641 */
642 642 if (!(parent->sdev_flags & SDEV_GLOBAL) &&
643 643 parent->sdev_origin != NULL &&
644 644 prof_name_matched(nm, parent)) {
645 645 do_type = DMU_OST_ZVOL;
646 646 } else {
647 647 kmem_free(dsname, strlen(dsname) + 1);
648 648 return (ENOENT);
649 649 }
650 650 }
651 651
652 652 if (do_type == DMU_OST_ZVOL)
653 653 expected_type = VBLK;
654 654
655 655 if (expected_type == VDIR) {
656 656 vattr.va_type = VDIR;
657 657 vattr.va_mode = SDEV_DIRMODE_DEFAULT;
658 658 } else {
659 659 minor_t minor;
660 660 dev_t devnum;
661 661 int rc;
662 662
663 663 rc = sdev_zvol_create_minor(dsname);
664 664 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
665 665 sdev_zvol_name2minor(dsname, &minor)) {
666 666 kmem_free(dsname, strlen(dsname) + 1);
667 667 return (ENOENT);
668 668 }
669 669
670 670 devnum = makedevice(devzvol_major, minor);
671 671 vattr.va_rdev = devnum;
672 672
673 673 if (strstr(parent->sdev_path, "/rdsk/") != NULL)
674 674 vattr.va_type = VCHR;
675 675 else
676 676 vattr.va_type = VBLK;
677 677 vattr.va_mode = SDEV_DEVMODE_DEFAULT;
678 678 }
679 679 kmem_free(dsname, strlen(dsname) + 1);
680 680
681 681 rw_enter(&parent->sdev_contents, RW_WRITER);
682 682
683 683 res = sdev_mknode(parent, nm, &dv, &vattr,
684 684 NULL, NULL, kcred, SDEV_READY);
685 685 rw_exit(&parent->sdev_contents);
686 686 if (res != 0)
687 687 return (ENOENT);
688 688
689 689 SDEV_RELE(dv);
690 690 return (0);
691 691 }
692 692
693 693 /*ARGSUSED*/
694 694 static int
695 695 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
696 696 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
697 697 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
698 698 {
699 699 enum vtype expected_type = VDIR;
700 700 struct sdev_node *parent = VTOSDEV(dvp);
701 701 char *dsname;
702 702 dmu_objset_type_t do_type;
703 703 int error;
704 704
705 705 sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm));
706 706 *vpp = NULL;
707 707 /* execute access is required to search the directory */
708 708 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
709 709 return (error);
710 710
711 711 rw_enter(&parent->sdev_contents, RW_READER);
712 712 if (!SDEV_IS_GLOBAL(parent)) {
713 713 int res;
714 714
715 715 rw_exit(&parent->sdev_contents);
716 716
717 717 /*
718 718 * If we're in the global zone and reach down into a non-global
719 719 * zone's /dev/zvol then this action could trigger the creation
720 720 * of all of the zvol devices for every zone into the non-global
721 721 * zone's /dev tree. This could be a big security hole. To
722 722 * prevent this, disallow the global zone from looking inside
723 723 * a non-global zones /dev/zvol. This behavior is similar to
724 724 * delegated datasets, which cannot be used by the global zone.
725 725 */
726 726 if (getzoneid() == GLOBAL_ZONEID)
727 727 return (EPERM);
728 728
729 729 res = prof_lookup(dvp, nm, vpp, cred);
730 730
731 731 /*
732 732 * We won't find a zvol that was dynamically created inside
733 733 * a NGZ, within a delegated dataset, in the zone's dev profile
734 734 * but prof_lookup will also find it via sdev_cache_lookup.
735 735 */
736 736 if (res == ENOENT) {
737 737 /*
738 738 * We have to create the sdev node for the dymamically
739 739 * created zvol.
740 740 */
741 741 if (devzvol_mk_ngz_node(parent, nm) != 0)
742 742 return (ENOENT);
743 743 res = prof_lookup(dvp, nm, vpp, cred);
744 744 }
745 745
746 746 return (res);
747 747 }
748 748
749 749 /*
750 750 * Don't let the global-zone style lookup succeed here when we're not
751 751 * running in the global zone. This can happen because prof calls into
752 752 * us (in prof_filldir) trying to create an explicitly passed-through
753 753 * zvol device outside any delegated dataset.
754 754 *
755 755 * We have to stop this here or else we will create prof shadows of
756 756 * the global zone symlink, which will make no sense at all in the
757 757 * non-global zone (it has no /devices for the symlink to point at).
758 758 *
759 759 * These zvols will be created later (at access time) by mk_ngz_node
760 760 * instead. The dirs leading up to them will be created by prof
761 761 * internally.
762 762 *
763 763 * We have to return EPERM here, because ENOENT is given special
764 764 * meaning by prof in this context.
765 765 */
766 766 if (getzoneid() != GLOBAL_ZONEID) {
767 767 rw_exit(&parent->sdev_contents);
768 768 return (EPERM);
769 769 }
770 770
771 771 dsname = devzvol_make_dsname(parent->sdev_path, nm);
772 772 rw_exit(&parent->sdev_contents);
773 773 sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
774 774 if (dsname) {
775 775 error = devzvol_objset_check(dsname, &do_type);
776 776 if (error != 0) {
777 777 error = ENOENT;
778 778 goto out;
779 779 }
780 780 if (do_type == DMU_OST_ZVOL)
781 781 expected_type = VLNK;
782 782 }
783 783 /*
784 784 * the callbacks expect:
785 785 *
786 786 * parent->sdev_path nm
787 787 * /dev/zvol {r}dsk
788 788 * /dev/zvol/{r}dsk <pool name>
789 789 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
790 790 *
791 791 * sdev_name is always last path component of sdev_path
792 792 */
793 793 if (expected_type == VDIR) {
794 794 error = devname_lookup_func(parent, nm, vpp, cred,
795 795 devzvol_create_dir, SDEV_VATTR);
796 796 } else {
797 797 error = devname_lookup_func(parent, nm, vpp, cred,
798 798 devzvol_create_link, SDEV_VLINK);
799 799 }
800 800 sdcmn_err13(("devzvol_lookup %d %d", expected_type, error));
801 801 ASSERT(error || ((*vpp)->v_type == expected_type));
802 802 out:
803 803 if (dsname)
804 804 kmem_free(dsname, strlen(dsname) + 1);
805 805 sdcmn_err13(("devzvol_lookup %d", error));
806 806 return (error);
807 807 }
808 808
809 809 /*
810 810 * We allow create to find existing nodes
811 811 * - if the node doesn't exist - EROFS
812 812 * - creating an existing dir read-only succeeds, otherwise EISDIR
813 813 * - exclusive creates fail - EEXIST
814 814 */
815 815 /*ARGSUSED2*/
816 816 static int
817 817 devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
818 818 int mode, struct vnode **vpp, struct cred *cred, int flag,
819 819 caller_context_t *ct, vsecattr_t *vsecp)
820 820 {
821 821 int error;
822 822 struct vnode *vp;
823 823
824 824 *vpp = NULL;
825 825
826 826 error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL,
827 827 NULL);
828 828 if (error == 0) {
829 829 if (excl == EXCL)
830 830 error = EEXIST;
831 831 else if (vp->v_type == VDIR && (mode & VWRITE))
832 832 error = EISDIR;
833 833 else
834 834 error = VOP_ACCESS(vp, mode, 0, cred, ct);
835 835
836 836 if (error) {
837 837 VN_RELE(vp);
838 838 } else
839 839 *vpp = vp;
840 840 } else if (error == ENOENT) {
841 841 error = EROFS;
842 842 }
843 843
844 844 return (error);
845 845 }
846 846
847 847 void sdev_iter_snapshots(struct vnode *dvp, char *name);
848 848
849 849 void
850 850 sdev_iter_datasets(struct vnode *dvp, int arg, char *name)
851 851 {
852 852 zfs_cmd_t *zc;
853 853 int rc;
854 854
855 855 sdcmn_err13(("iter name is '%s' (arg %x)", name, arg));
856 856 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
857 857 (void) strcpy(zc->zc_name, name);
858 858
859 859 while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) {
860 860 struct vnode *vpp;
861 861 char *ptr;
862 862
863 863 sdcmn_err13((" name %s", zc->zc_name));
864 864 if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%'))
865 865 goto skip;
866 866 ptr = strrchr(zc->zc_name, '/') + 1;
867 867 rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL,
868 868 kcred, NULL, NULL, NULL);
869 869 if (rc == 0) {
870 870 VN_RELE(vpp);
871 871 } else if (rc == ENOENT) {
872 872 goto skip;
873 873 } else {
874 874 /*
875 875 * EBUSY == problem with zvols's dmu holds?
876 876 * EPERM when in a NGZ and traversing up and out.
877 877 */
878 878 goto skip;
879 879 }
880 880 if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
881 881 zc->zc_objset_stats.dds_type == DMU_OST_ZVOL &&
882 882 devzvol_snaps_allowed)
883 883 sdev_iter_snapshots(dvp, zc->zc_name);
884 884 skip:
885 885 (void) strcpy(zc->zc_name, name);
886 886 }
887 887 kmem_free(zc, sizeof (zfs_cmd_t));
888 888 }
889 889
890 890 void
891 891 sdev_iter_snapshots(struct vnode *dvp, char *name)
892 892 {
893 893 sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name);
894 894 }
895 895
896 896 /*ARGSUSED4*/
897 897 static int
898 898 devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
899 899 int *eofp, caller_context_t *ct_unused, int flags_unused)
900 900 {
901 901 struct sdev_node *sdvp = VTOSDEV(dvp);
902 902 char *ptr;
903 903
904 904 sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path,
905 905 sdvp->sdev_name));
906 906
907 907 if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) {
908 908 struct vnode *vp;
909 909
910 910 rw_exit(&sdvp->sdev_contents);
911 911 (void) devname_lookup_func(sdvp, "dsk", &vp, cred,
912 912 devzvol_create_dir, SDEV_VATTR);
913 913 VN_RELE(vp);
914 914 (void) devname_lookup_func(sdvp, "rdsk", &vp, cred,
915 915 devzvol_create_dir, SDEV_VATTR);
916 916 VN_RELE(vp);
917 917 rw_enter(&sdvp->sdev_contents, RW_READER);
918 918 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
919 919 }
920 920 if (uiop->uio_offset == 0)
921 921 devzvol_prunedir(sdvp);
922 922 ptr = sdvp->sdev_path + strlen(ZVOL_DIR);
923 923 if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) {
924 924 rw_exit(&sdvp->sdev_contents);
925 925 devzvol_create_pool_dirs(dvp);
926 926 rw_enter(&sdvp->sdev_contents, RW_READER);
927 927 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
928 928 }
929 929
930 930 ptr = strchr(ptr + 1, '/');
931 931 if (ptr == NULL)
932 932 return (ENOENT);
933 933 ptr++;
934 934 rw_exit(&sdvp->sdev_contents);
935 935 sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr);
936 936 rw_enter(&sdvp->sdev_contents, RW_READER);
937 937 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
938 938 }
939 939
940 940 const fs_operation_def_t devzvol_vnodeops_tbl[] = {
941 941 VOPNAME_READDIR, { .vop_readdir = devzvol_readdir },
942 942 VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup },
943 943 VOPNAME_CREATE, { .vop_create = devzvol_create },
944 944 VOPNAME_RENAME, { .error = fs_nosys },
945 945 VOPNAME_MKDIR, { .error = fs_nosys },
946 946 VOPNAME_RMDIR, { .error = fs_nosys },
947 947 VOPNAME_REMOVE, { .error = fs_nosys },
948 948 VOPNAME_SYMLINK, { .error = fs_nosys },
949 949 NULL, NULL
950 950 };
|
↓ open down ↓ |
950 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX