Print this page
11679 vn_rele() and friends should VERIFY after mutex
Reviewed by: Dan McDonald <danmcd@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/vnode.c
+++ new/usr/src/uts/common/fs/vnode.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2020 Joyent, Inc.
25 + * Copyright 2022 Spencer Evans-Cole.
25 26 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 27 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
27 28 */
28 29
29 30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 31 /* All Rights Reserved */
31 32
32 33 /*
33 34 * University Copyright- Copyright (c) 1982, 1986, 1988
34 35 * The Regents of the University of California
35 36 * All Rights Reserved
36 37 *
37 38 * University Acknowledgment- Portions of this document are derived from
38 39 * software developed by the University of California, Berkeley, and its
39 40 * contributors.
40 41 */
41 42
42 43 #include <sys/types.h>
43 44 #include <sys/param.h>
44 45 #include <sys/t_lock.h>
45 46 #include <sys/errno.h>
46 47 #include <sys/cred.h>
47 48 #include <sys/user.h>
48 49 #include <sys/uio.h>
49 50 #include <sys/file.h>
50 51 #include <sys/pathname.h>
51 52 #include <sys/vfs.h>
52 53 #include <sys/vfs_opreg.h>
53 54 #include <sys/vnode.h>
54 55 #include <sys/filio.h>
55 56 #include <sys/rwstlock.h>
56 57 #include <sys/fem.h>
57 58 #include <sys/stat.h>
58 59 #include <sys/mode.h>
59 60 #include <sys/conf.h>
60 61 #include <sys/sysmacros.h>
61 62 #include <sys/cmn_err.h>
62 63 #include <sys/systm.h>
63 64 #include <sys/kmem.h>
64 65 #include <sys/debug.h>
65 66 #include <c2/audit.h>
66 67 #include <sys/acl.h>
67 68 #include <sys/nbmlock.h>
68 69 #include <sys/fcntl.h>
69 70 #include <fs/fs_subr.h>
70 71 #include <sys/taskq.h>
71 72 #include <fs/fs_reparse.h>
72 73 #include <sys/time.h>
73 74 #include <sys/sdt.h>
74 75
75 76 /* Determine if this vnode is a file that is read-only */
76 77 #define ISROFILE(vp) \
77 78 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
78 79 (vp)->v_type != VFIFO && vn_is_readonly(vp))
79 80
80 81 /* Tunable via /etc/system; used only by admin/install */
81 82 int nfs_global_client_only;
82 83
83 84 /*
84 85 * Array of vopstats_t for per-FS-type vopstats. This array has the same
85 86 * number of entries as and parallel to the vfssw table. (Arguably, it could
86 87 * be part of the vfssw table.) Once it's initialized, it's accessed using
87 88 * the same fstype index that is used to index into the vfssw table.
88 89 */
89 90 vopstats_t **vopstats_fstype;
90 91
91 92 /* vopstats initialization template used for fast initialization via bcopy() */
92 93 static vopstats_t *vs_templatep;
93 94
94 95 /* Kmem cache handle for vsk_anchor_t allocations */
95 96 kmem_cache_t *vsk_anchor_cache;
96 97
97 98 /* file events cleanup routine */
98 99 extern void free_fopdata(vnode_t *);
99 100
100 101 /*
101 102 * Root of AVL tree for the kstats associated with vopstats. Lock protects
102 103 * updates to vsktat_tree.
103 104 */
104 105 avl_tree_t vskstat_tree;
105 106 kmutex_t vskstat_tree_lock;
106 107
107 108 /* Global variable which enables/disables the vopstats collection */
108 109 int vopstats_enabled = 1;
109 110
110 111 /* Global used for empty/invalid v_path */
111 112 char *vn_vpath_empty = "";
112 113
113 114 /*
114 115 * forward declarations for internal vnode specific data (vsd)
115 116 */
116 117 static void *vsd_realloc(void *, size_t, size_t);
117 118
118 119 /*
119 120 * forward declarations for reparse point functions
120 121 */
121 122 static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
122 123
123 124 /*
124 125 * VSD -- VNODE SPECIFIC DATA
125 126 * The v_data pointer is typically used by a file system to store a
126 127 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
127 128 * However, there are times when additional project private data needs
128 129 * to be stored separately from the data (node) pointed to by v_data.
129 130 * This additional data could be stored by the file system itself or
130 131 * by a completely different kernel entity. VSD provides a way for
131 132 * callers to obtain a key and store a pointer to private data associated
132 133 * with a vnode.
133 134 *
134 135 * Callers are responsible for protecting the vsd by holding v_vsd_lock
135 136 * for calls to vsd_set() and vsd_get().
136 137 */
137 138
138 139 /*
139 140 * vsd_lock protects:
140 141 * vsd_nkeys - creation and deletion of vsd keys
141 142 * vsd_list - insertion and deletion of vsd_node in the vsd_list
142 143 * vsd_destructor - adding and removing destructors to the list
143 144 */
144 145 static kmutex_t vsd_lock;
145 146 static uint_t vsd_nkeys; /* size of destructor array */
146 147 /* list of vsd_node's */
147 148 static list_t *vsd_list = NULL;
148 149 /* per-key destructor funcs */
149 150 static void (**vsd_destructor)(void *);
150 151
151 152 /*
152 153 * The following is the common set of actions needed to update the
153 154 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
154 155 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
155 156 * recording of the bytes transferred. Since the code is similar
156 157 * but small, it is nearly a duplicate. Consequently any changes
157 158 * to one may need to be reflected in the other.
158 159 * Rundown of the variables:
159 160 * vp - Pointer to the vnode
160 161 * counter - Partial name structure member to update in vopstats for counts
161 162 * bytecounter - Partial name structure member to update in vopstats for bytes
162 163 * bytesval - Value to update in vopstats for bytes
163 164 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
164 165 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
165 166 */
166 167
167 168 #define VOPSTATS_UPDATE(vp, counter) { \
168 169 vfs_t *vfsp = (vp)->v_vfsp; \
169 170 if (vfsp && vfsp->vfs_implp && \
170 171 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
171 172 vopstats_t *vsp = &vfsp->vfs_vopstats; \
172 173 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
173 174 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
174 175 size_t, uint64_t *); \
175 176 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
176 177 (*stataddr)++; \
177 178 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
178 179 vsp->n##counter.value.ui64++; \
179 180 } \
180 181 } \
181 182 }
182 183
183 184 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
184 185 vfs_t *vfsp = (vp)->v_vfsp; \
185 186 if (vfsp && vfsp->vfs_implp && \
186 187 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
187 188 vopstats_t *vsp = &vfsp->vfs_vopstats; \
188 189 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
189 190 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
190 191 size_t, uint64_t *); \
191 192 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
192 193 (*stataddr)++; \
193 194 vsp->bytecounter.value.ui64 += bytesval; \
194 195 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
195 196 vsp->n##counter.value.ui64++; \
196 197 vsp->bytecounter.value.ui64 += bytesval; \
197 198 } \
198 199 } \
199 200 }
200 201
201 202 /*
202 203 * If the filesystem does not support XIDs map credential
203 204 * If the vfsp is NULL, perhaps we should also map?
204 205 */
205 206 #define VOPXID_MAP_CR(vp, cr) { \
206 207 vfs_t *vfsp = (vp)->v_vfsp; \
207 208 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
208 209 cr = crgetmapped(cr); \
209 210 }
210 211
211 212 /*
212 213 * Convert stat(2) formats to vnode types and vice versa. (Knows about
213 214 * numerical order of S_IFMT and vnode types.)
214 215 */
215 216 enum vtype iftovt_tab[] = {
216 217 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
217 218 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
218 219 };
219 220
220 221 ushort_t vttoif_tab[] = {
221 222 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
222 223 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
223 224 };
224 225
225 226 /*
226 227 * The system vnode cache.
227 228 */
228 229
229 230 kmem_cache_t *vn_cache;
230 231
231 232
232 233 /*
233 234 * Vnode operations vector.
234 235 */
235 236
236 237 static const fs_operation_trans_def_t vn_ops_table[] = {
237 238 VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
238 239 fs_nosys, fs_nosys,
239 240
240 241 VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
241 242 fs_nosys, fs_nosys,
242 243
243 244 VOPNAME_READ, offsetof(struct vnodeops, vop_read),
244 245 fs_nosys, fs_nosys,
245 246
246 247 VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
247 248 fs_nosys, fs_nosys,
248 249
249 250 VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
250 251 fs_nosys, fs_nosys,
251 252
252 253 VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
253 254 fs_setfl, fs_nosys,
254 255
255 256 VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
256 257 fs_nosys, fs_nosys,
257 258
258 259 VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
259 260 fs_nosys, fs_nosys,
260 261
261 262 VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
262 263 fs_nosys, fs_nosys,
263 264
264 265 VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
265 266 fs_nosys, fs_nosys,
266 267
267 268 VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
268 269 fs_nosys, fs_nosys,
269 270
270 271 VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
271 272 fs_nosys, fs_nosys,
272 273
273 274 VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
274 275 fs_nosys, fs_nosys,
275 276
276 277 VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
277 278 fs_nosys, fs_nosys,
278 279
279 280 VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
280 281 fs_nosys, fs_nosys,
281 282
282 283 VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
283 284 fs_nosys, fs_nosys,
284 285
285 286 VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
286 287 fs_nosys, fs_nosys,
287 288
288 289 VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
289 290 fs_nosys, fs_nosys,
290 291
291 292 VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
292 293 fs_nosys, fs_nosys,
293 294
294 295 VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
295 296 fs_nosys, fs_nosys,
296 297
297 298 VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
298 299 fs_nosys, fs_nosys,
299 300
300 301 VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
301 302 fs_nosys, fs_nosys,
302 303
303 304 VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
304 305 fs_rwlock, fs_rwlock,
305 306
306 307 VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
307 308 (fs_generic_func_p)(uintptr_t)fs_rwunlock,
308 309 (fs_generic_func_p)(uintptr_t)fs_rwunlock, /* no errors allowed */
309 310
310 311 VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
311 312 fs_nosys, fs_nosys,
312 313
313 314 VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
314 315 fs_cmp, fs_cmp, /* no errors allowed */
315 316
316 317 VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
317 318 fs_frlock, fs_nosys,
318 319
319 320 VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
320 321 fs_nosys, fs_nosys,
321 322
322 323 VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
323 324 fs_nosys, fs_nosys,
324 325
325 326 VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
326 327 fs_nosys, fs_nosys,
327 328
328 329 VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
329 330 fs_nosys, fs_nosys,
330 331
331 332 VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
332 333 (fs_generic_func_p) fs_nosys_map,
333 334 (fs_generic_func_p) fs_nosys_map,
334 335
335 336 VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
336 337 (fs_generic_func_p) fs_nosys_addmap,
337 338 (fs_generic_func_p) fs_nosys_addmap,
338 339
339 340 VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
340 341 fs_nosys, fs_nosys,
341 342
342 343 VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
343 344 (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
344 345
345 346 VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
346 347 fs_nosys, fs_nosys,
347 348
348 349 VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
349 350 fs_pathconf, fs_nosys,
350 351
351 352 VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
352 353 fs_nosys, fs_nosys,
353 354
354 355 VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
355 356 fs_nosys, fs_nosys,
356 357
357 358 VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
358 359 (fs_generic_func_p)(uintptr_t)fs_dispose,
359 360 (fs_generic_func_p)(uintptr_t)fs_nodispose,
360 361
361 362 VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
362 363 fs_nosys, fs_nosys,
363 364
364 365 VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
365 366 fs_fab_acl, fs_nosys,
366 367
367 368 VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
368 369 fs_shrlock, fs_nosys,
369 370
370 371 VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
371 372 (fs_generic_func_p) fs_vnevent_nosupport,
372 373 (fs_generic_func_p) fs_vnevent_nosupport,
373 374
374 375 VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
375 376 fs_nosys, fs_nosys,
376 377
377 378 VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
378 379 fs_nosys, fs_nosys,
379 380
380 381 NULL, 0, NULL, NULL
381 382 };
382 383
383 384 /* Extensible attribute (xva) routines. */
384 385
385 386 /*
386 387 * Zero out the structure, set the size of the requested/returned bitmaps,
387 388 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
388 389 * to the returned attributes array.
389 390 */
390 391 void
391 392 xva_init(xvattr_t *xvap)
392 393 {
393 394 bzero(xvap, sizeof (xvattr_t));
394 395 xvap->xva_mapsize = XVA_MAPSIZE;
395 396 xvap->xva_magic = XVA_MAGIC;
396 397 xvap->xva_vattr.va_mask = AT_XVATTR;
397 398 xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
398 399 }
399 400
400 401 /*
401 402 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
402 403 * structure. Otherwise, returns NULL.
403 404 */
404 405 xoptattr_t *
405 406 xva_getxoptattr(xvattr_t *xvap)
406 407 {
407 408 xoptattr_t *xoap = NULL;
408 409 if (xvap->xva_vattr.va_mask & AT_XVATTR)
409 410 xoap = &xvap->xva_xoptattrs;
410 411 return (xoap);
411 412 }
412 413
413 414 /*
414 415 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
415 416 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
416 417 * kstat name.
417 418 */
418 419 static int
419 420 vska_compar(const void *n1, const void *n2)
420 421 {
421 422 int ret;
422 423 ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
423 424 ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
424 425
425 426 if (p1 < p2) {
426 427 ret = -1;
427 428 } else if (p1 > p2) {
428 429 ret = 1;
429 430 } else {
430 431 ret = 0;
431 432 }
432 433
433 434 return (ret);
434 435 }
435 436
436 437 /*
437 438 * Used to create a single template which will be bcopy()ed to a newly
438 439 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
439 440 */
440 441 static vopstats_t *
441 442 create_vopstats_template()
442 443 {
443 444 vopstats_t *vsp;
444 445
445 446 vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
446 447 bzero(vsp, sizeof (*vsp)); /* Start fresh */
447 448
448 449 /* VOP_OPEN */
449 450 kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
450 451 /* VOP_CLOSE */
451 452 kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
452 453 /* VOP_READ I/O */
453 454 kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
454 455 kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
455 456 /* VOP_WRITE I/O */
456 457 kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
457 458 kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
458 459 /* VOP_IOCTL */
459 460 kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
460 461 /* VOP_SETFL */
461 462 kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
462 463 /* VOP_GETATTR */
463 464 kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
464 465 /* VOP_SETATTR */
465 466 kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
466 467 /* VOP_ACCESS */
467 468 kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
468 469 /* VOP_LOOKUP */
469 470 kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
470 471 /* VOP_CREATE */
471 472 kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
472 473 /* VOP_REMOVE */
473 474 kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
474 475 /* VOP_LINK */
475 476 kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
476 477 /* VOP_RENAME */
477 478 kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
478 479 /* VOP_MKDIR */
479 480 kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
480 481 /* VOP_RMDIR */
481 482 kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
482 483 /* VOP_READDIR I/O */
483 484 kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
484 485 kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
485 486 KSTAT_DATA_UINT64);
486 487 /* VOP_SYMLINK */
487 488 kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
488 489 /* VOP_READLINK */
489 490 kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
490 491 /* VOP_FSYNC */
491 492 kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
492 493 /* VOP_INACTIVE */
493 494 kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
494 495 /* VOP_FID */
495 496 kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
496 497 /* VOP_RWLOCK */
497 498 kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
498 499 /* VOP_RWUNLOCK */
499 500 kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
500 501 /* VOP_SEEK */
501 502 kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
502 503 /* VOP_CMP */
503 504 kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
504 505 /* VOP_FRLOCK */
505 506 kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
506 507 /* VOP_SPACE */
507 508 kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
508 509 /* VOP_REALVP */
509 510 kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
510 511 /* VOP_GETPAGE */
511 512 kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
512 513 /* VOP_PUTPAGE */
513 514 kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
514 515 /* VOP_MAP */
515 516 kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
516 517 /* VOP_ADDMAP */
517 518 kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
518 519 /* VOP_DELMAP */
519 520 kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
520 521 /* VOP_POLL */
521 522 kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
522 523 /* VOP_DUMP */
523 524 kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
524 525 /* VOP_PATHCONF */
525 526 kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
526 527 /* VOP_PAGEIO */
527 528 kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
528 529 /* VOP_DUMPCTL */
529 530 kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
530 531 /* VOP_DISPOSE */
531 532 kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
532 533 /* VOP_SETSECATTR */
533 534 kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
534 535 /* VOP_GETSECATTR */
535 536 kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
536 537 /* VOP_SHRLOCK */
537 538 kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
538 539 /* VOP_VNEVENT */
539 540 kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
540 541 /* VOP_REQZCBUF */
541 542 kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
542 543 /* VOP_RETZCBUF */
543 544 kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
544 545
545 546 return (vsp);
546 547 }
547 548
548 549 /*
549 550 * Creates a kstat structure associated with a vopstats structure.
550 551 */
551 552 kstat_t *
552 553 new_vskstat(char *ksname, vopstats_t *vsp)
553 554 {
554 555 kstat_t *ksp;
555 556
556 557 if (!vopstats_enabled) {
557 558 return (NULL);
558 559 }
559 560
560 561 ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
561 562 sizeof (vopstats_t)/sizeof (kstat_named_t),
562 563 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
563 564 if (ksp) {
564 565 ksp->ks_data = vsp;
565 566 kstat_install(ksp);
566 567 }
567 568
568 569 return (ksp);
569 570 }
570 571
571 572 /*
572 573 * Called from vfsinit() to initialize the support mechanisms for vopstats
573 574 */
574 575 void
575 576 vopstats_startup()
576 577 {
577 578 if (!vopstats_enabled)
578 579 return;
579 580
580 581 /*
581 582 * Creates the AVL tree which holds per-vfs vopstat anchors. This
582 583 * is necessary since we need to check if a kstat exists before we
583 584 * attempt to create it. Also, initialize its lock.
584 585 */
585 586 avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
586 587 offsetof(vsk_anchor_t, vsk_node));
587 588 mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
588 589
589 590 vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
590 591 sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
591 592 NULL, NULL, 0);
592 593
593 594 /*
594 595 * Set up the array of pointers for the vopstats-by-FS-type.
595 596 * The entries will be allocated/initialized as each file system
596 597 * goes through modload/mod_installfs.
597 598 */
598 599 vopstats_fstype = (vopstats_t **)kmem_zalloc(
599 600 (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
600 601
601 602 /* Set up the global vopstats initialization template */
602 603 vs_templatep = create_vopstats_template();
603 604 }
604 605
605 606 /*
606 607 * We need to have the all of the counters zeroed.
607 608 * The initialization of the vopstats_t includes on the order of
608 609 * 50 calls to kstat_named_init(). Rather that do that on every call,
609 610 * we do it once in a template (vs_templatep) then bcopy it over.
610 611 */
611 612 void
612 613 initialize_vopstats(vopstats_t *vsp)
613 614 {
614 615 if (vsp == NULL)
615 616 return;
616 617
617 618 bcopy(vs_templatep, vsp, sizeof (vopstats_t));
618 619 }
619 620
620 621 /*
621 622 * If possible, determine which vopstats by fstype to use and
622 623 * return a pointer to the caller.
623 624 */
624 625 vopstats_t *
625 626 get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
626 627 {
627 628 int fstype = 0; /* Index into vfssw[] */
628 629 vopstats_t *vsp = NULL;
629 630
630 631 if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
631 632 !vopstats_enabled)
632 633 return (NULL);
633 634 /*
634 635 * Set up the fstype. We go to so much trouble because all versions
635 636 * of NFS use the same fstype in their vfs even though they have
636 637 * distinct entries in the vfssw[] table.
637 638 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
638 639 */
639 640 if (vswp) {
640 641 fstype = vswp - vfssw; /* Gets us the index */
641 642 } else {
642 643 fstype = vfsp->vfs_fstype;
643 644 }
644 645
645 646 /*
646 647 * Point to the per-fstype vopstats. The only valid values are
647 648 * non-zero positive values less than the number of vfssw[] table
648 649 * entries.
649 650 */
650 651 if (fstype > 0 && fstype < nfstype) {
651 652 vsp = vopstats_fstype[fstype];
652 653 }
653 654
654 655 return (vsp);
655 656 }
656 657
657 658 /*
658 659 * Generate a kstat name, create the kstat structure, and allocate a
659 660 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
660 661 * to the caller. This must only be called from a mount.
661 662 */
662 663 vsk_anchor_t *
663 664 get_vskstat_anchor(vfs_t *vfsp)
664 665 {
665 666 char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
666 667 statvfs64_t statvfsbuf; /* Needed to find f_fsid */
667 668 vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */
668 669 kstat_t *ksp; /* Ptr to new kstat */
669 670 avl_index_t where; /* Location in the AVL tree */
670 671
671 672 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
672 673 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
673 674 return (NULL);
674 675
675 676 /* Need to get the fsid to build a kstat name */
676 677 if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
677 678 /* Create a name for our kstats based on fsid */
678 679 (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
679 680 VOPSTATS_STR, statvfsbuf.f_fsid);
680 681
681 682 /* Allocate and initialize the vsk_anchor_t */
682 683 vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
683 684 bzero(vskp, sizeof (*vskp));
684 685 vskp->vsk_fsid = statvfsbuf.f_fsid;
685 686
686 687 mutex_enter(&vskstat_tree_lock);
687 688 if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
688 689 avl_insert(&vskstat_tree, vskp, where);
689 690 mutex_exit(&vskstat_tree_lock);
690 691
691 692 /*
692 693 * Now that we've got the anchor in the AVL
693 694 * tree, we can create the kstat.
694 695 */
695 696 ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
696 697 if (ksp) {
697 698 vskp->vsk_ksp = ksp;
698 699 }
699 700 } else {
700 701 /* Oops, found one! Release memory and lock. */
701 702 mutex_exit(&vskstat_tree_lock);
702 703 kmem_cache_free(vsk_anchor_cache, vskp);
703 704 vskp = NULL;
704 705 }
705 706 }
706 707 return (vskp);
707 708 }
708 709
709 710 /*
710 711 * We're in the process of tearing down the vfs and need to cleanup
711 712 * the data structures associated with the vopstats. Must only be called
712 713 * from dounmount().
713 714 */
714 715 void
715 716 teardown_vopstats(vfs_t *vfsp)
716 717 {
717 718 vsk_anchor_t *vskap;
718 719 avl_index_t where;
719 720
720 721 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
721 722 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
722 723 return;
723 724
724 725 /* This is a safe check since VFS_STATS must be set (see above) */
725 726 if ((vskap = vfsp->vfs_vskap) == NULL)
726 727 return;
727 728
728 729 /* Whack the pointer right away */
729 730 vfsp->vfs_vskap = NULL;
730 731
731 732 /* Lock the tree, remove the node, and delete the kstat */
732 733 mutex_enter(&vskstat_tree_lock);
733 734 if (avl_find(&vskstat_tree, vskap, &where)) {
734 735 avl_remove(&vskstat_tree, vskap);
735 736 }
736 737
737 738 if (vskap->vsk_ksp) {
738 739 kstat_delete(vskap->vsk_ksp);
739 740 }
740 741 mutex_exit(&vskstat_tree_lock);
741 742
742 743 kmem_cache_free(vsk_anchor_cache, vskap);
743 744 }
744 745
745 746 /*
746 747 * Read or write a vnode. Called from kernel code.
747 748 */
748 749 int
749 750 vn_rdwr(
750 751 enum uio_rw rw,
751 752 struct vnode *vp,
752 753 caddr_t base,
753 754 ssize_t len,
754 755 offset_t offset,
755 756 enum uio_seg seg,
756 757 int ioflag,
757 758 rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */
758 759 cred_t *cr,
759 760 ssize_t *residp)
760 761 {
761 762 struct uio uio;
762 763 struct iovec iov;
763 764 int error;
764 765 int in_crit = 0;
765 766
766 767 if (rw == UIO_WRITE && ISROFILE(vp))
767 768 return (EROFS);
768 769
769 770 if (len < 0)
770 771 return (EIO);
771 772
772 773 VOPXID_MAP_CR(vp, cr);
773 774
774 775 iov.iov_base = base;
775 776 iov.iov_len = len;
776 777 uio.uio_iov = &iov;
777 778 uio.uio_iovcnt = 1;
778 779 uio.uio_loffset = offset;
779 780 uio.uio_segflg = (short)seg;
780 781 uio.uio_resid = len;
781 782 uio.uio_llimit = ulimit;
782 783
783 784 /*
784 785 * We have to enter the critical region before calling VOP_RWLOCK
785 786 * to avoid a deadlock with ufs.
786 787 */
787 788 if (nbl_need_check(vp)) {
788 789 int svmand;
789 790
790 791 nbl_start_crit(vp, RW_READER);
791 792 in_crit = 1;
792 793 error = nbl_svmand(vp, cr, &svmand);
793 794 if (error != 0)
794 795 goto done;
795 796 if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
796 797 uio.uio_offset, uio.uio_resid, svmand, NULL)) {
797 798 error = EACCES;
798 799 goto done;
799 800 }
800 801 }
801 802
802 803 (void) VOP_RWLOCK(vp,
803 804 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
804 805 if (rw == UIO_WRITE) {
805 806 uio.uio_fmode = FWRITE;
806 807 uio.uio_extflg = UIO_COPY_DEFAULT;
807 808 error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
808 809 } else {
809 810 uio.uio_fmode = FREAD;
810 811 uio.uio_extflg = UIO_COPY_CACHED;
811 812 error = VOP_READ(vp, &uio, ioflag, cr, NULL);
812 813 }
813 814 VOP_RWUNLOCK(vp,
814 815 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
815 816 if (residp)
816 817 *residp = uio.uio_resid;
817 818 else if (uio.uio_resid)
818 819 error = EIO;
819 820
820 821 done:
821 822 if (in_crit)
822 823 nbl_end_crit(vp);
823 824 return (error);
824 825 }
825 826
826 827 /*
827 828 * Release a vnode. Call VOP_INACTIVE on last reference or
828 829 * decrement reference count.
829 830 *
|
↓ open down ↓ |
795 lines elided |
↑ open up ↑ |
830 831 * To avoid race conditions, the v_count is left at 1 for
831 832 * the call to VOP_INACTIVE. This prevents another thread
832 833 * from reclaiming and releasing the vnode *before* the
833 834 * VOP_INACTIVE routine has a chance to destroy the vnode.
834 835 * We can't have more than 1 thread calling VOP_INACTIVE
835 836 * on a vnode.
836 837 */
837 838 void
838 839 vn_rele(vnode_t *vp)
839 840 {
840 - VERIFY(vp->v_count > 0);
841 841 mutex_enter(&vp->v_lock);
842 842 if (vp->v_count == 1) {
843 843 mutex_exit(&vp->v_lock);
844 844 VOP_INACTIVE(vp, CRED(), NULL);
845 845 return;
846 + } else {
847 + VERIFY(vp->v_count > 0);
846 848 }
847 849 VN_RELE_LOCKED(vp);
848 850 mutex_exit(&vp->v_lock);
849 851 }
850 852
851 853 /*
852 854 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
853 855 * as a single reference, so v_count is not decremented until the last DNLC hold
854 856 * is released. This makes it possible to distinguish vnodes that are referenced
855 857 * only by the DNLC.
856 858 */
857 859 void
858 860 vn_rele_dnlc(vnode_t *vp)
859 861 {
860 - VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
861 862 mutex_enter(&vp->v_lock);
863 + VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
862 864 if (--vp->v_count_dnlc == 0) {
863 865 if (vp->v_count == 1) {
864 866 mutex_exit(&vp->v_lock);
865 867 VOP_INACTIVE(vp, CRED(), NULL);
866 868 return;
867 869 }
868 870 VN_RELE_LOCKED(vp);
869 871 }
870 872 mutex_exit(&vp->v_lock);
871 873 }
872 874
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
873 875 /*
874 876 * Like vn_rele() except that it clears v_stream under v_lock.
875 877 * This is used by sockfs when it dismantles the association between
876 878 * the sockfs node and the vnode in the underlying file system.
877 879 * v_lock has to be held to prevent a thread coming through the lookupname
878 880 * path from accessing a stream head that is going away.
879 881 */
880 882 void
881 883 vn_rele_stream(vnode_t *vp)
882 884 {
883 - VERIFY(vp->v_count > 0);
884 885 mutex_enter(&vp->v_lock);
885 886 vp->v_stream = NULL;
886 887 if (vp->v_count == 1) {
887 888 mutex_exit(&vp->v_lock);
888 889 VOP_INACTIVE(vp, CRED(), NULL);
889 890 return;
891 + } else {
892 + VERIFY(vp->v_count > 0);
890 893 }
891 894 VN_RELE_LOCKED(vp);
892 895 mutex_exit(&vp->v_lock);
893 896 }
894 897
895 898 static void
896 899 vn_rele_inactive(vnode_t *vp)
897 900 {
898 901 VOP_INACTIVE(vp, CRED(), NULL);
899 902 }
900 903
901 904 /*
902 905 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
903 906 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
|
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
904 907 * the file system as a result of releasing the vnode. Note, file systems
905 908 * already have to handle the race where the vnode is incremented before the
906 909 * inactive routine is called and does its locking.
907 910 *
908 911 * Warning: Excessive use of this routine can lead to performance problems.
909 912 * This is because taskqs throttle back allocation if too many are created.
910 913 */
911 914 void
912 915 vn_rele_async(vnode_t *vp, taskq_t *taskq)
913 916 {
914 - VERIFY(vp->v_count > 0);
915 917 mutex_enter(&vp->v_lock);
916 918 if (vp->v_count == 1) {
917 919 mutex_exit(&vp->v_lock);
918 920 VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
919 921 vp, TQ_SLEEP) != TASKQID_INVALID);
920 922 return;
923 + } else {
924 + VERIFY(vp->v_count > 0);
921 925 }
922 926 VN_RELE_LOCKED(vp);
923 927 mutex_exit(&vp->v_lock);
924 928 }
925 929
926 930 int
927 931 vn_open(
928 932 char *pnamep,
929 933 enum uio_seg seg,
930 934 int filemode,
931 935 int createmode,
932 936 struct vnode **vpp,
933 937 enum create crwhy,
934 938 mode_t umask)
935 939 {
936 940 return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
937 941 umask, NULL, -1));
938 942 }
939 943
940 944
941 945 /*
942 946 * Open/create a vnode.
943 947 * This may be callable by the kernel, the only known use
944 948 * of user context being that the current user credentials
945 949 * are used for permissions. crwhy is defined iff filemode & FCREAT.
946 950 */
947 951 int
948 952 vn_openat(
949 953 char *pnamep,
950 954 enum uio_seg seg,
951 955 int filemode,
952 956 int createmode,
953 957 struct vnode **vpp,
954 958 enum create crwhy,
955 959 mode_t umask,
956 960 struct vnode *startvp,
957 961 int fd)
958 962 {
959 963 struct vnode *vp;
960 964 int mode;
961 965 int accessflags;
962 966 int error;
963 967 int in_crit = 0;
964 968 int open_done = 0;
965 969 int shrlock_done = 0;
966 970 struct vattr vattr;
967 971 enum symfollow follow;
968 972 int estale_retry = 0;
969 973 struct shrlock shr;
970 974 struct shr_locowner shr_own;
971 975 boolean_t create;
972 976
973 977 mode = 0;
974 978 accessflags = 0;
975 979 if (filemode & FREAD)
976 980 mode |= VREAD;
977 981 if (filemode & (FWRITE|FTRUNC))
978 982 mode |= VWRITE;
979 983 if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
980 984 mode |= VEXEC;
981 985
982 986 /* symlink interpretation */
983 987 if (filemode & FNOFOLLOW)
984 988 follow = NO_FOLLOW;
985 989 else
986 990 follow = FOLLOW;
987 991
988 992 if (filemode & FAPPEND)
989 993 accessflags |= V_APPEND;
990 994
991 995 /*
992 996 * We need to handle the case of FCREAT | FDIRECTORY and the case of
993 997 * FEXCL. If all three are specified, then we always fail because we
994 998 * cannot create a directory through this interface and FEXCL says we
995 999 * need to fail the request if we can't create it. If, however, only
996 1000 * FCREAT | FDIRECTORY are specified, then we can treat this as the case
997 1001 * of opening a file that already exists. If it exists, we can do
998 1002 * something and if not, we fail. Effectively FCREAT | FDIRECTORY is
999 1003 * treated as FDIRECTORY.
1000 1004 */
1001 1005 if ((filemode & (FCREAT | FDIRECTORY | FEXCL)) ==
1002 1006 (FCREAT | FDIRECTORY | FEXCL)) {
1003 1007 return (EINVAL);
1004 1008 }
1005 1009
1006 1010 if ((filemode & (FCREAT | FDIRECTORY)) == (FCREAT | FDIRECTORY)) {
1007 1011 create = B_FALSE;
1008 1012 } else if ((filemode & FCREAT) != 0) {
1009 1013 create = B_TRUE;
1010 1014 } else {
1011 1015 create = B_FALSE;
1012 1016 }
1013 1017
1014 1018 top:
1015 1019 if (create) {
1016 1020 enum vcexcl excl;
1017 1021
1018 1022 /*
1019 1023 * Wish to create a file.
1020 1024 */
1021 1025 vattr.va_type = VREG;
1022 1026 vattr.va_mode = createmode;
1023 1027 vattr.va_mask = AT_TYPE|AT_MODE;
1024 1028 if (filemode & FTRUNC) {
1025 1029 vattr.va_size = 0;
1026 1030 vattr.va_mask |= AT_SIZE;
1027 1031 }
1028 1032 if (filemode & FEXCL)
1029 1033 excl = EXCL;
1030 1034 else
1031 1035 excl = NONEXCL;
1032 1036
1033 1037 if (error =
1034 1038 vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1035 1039 (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1036 1040 return (error);
1037 1041 } else {
1038 1042 /*
1039 1043 * Wish to open a file. Just look it up.
1040 1044 */
1041 1045 if (error = lookupnameat(pnamep, seg, follow,
1042 1046 NULLVPP, &vp, startvp)) {
1043 1047 if ((error == ESTALE) &&
1044 1048 fs_need_estale_retry(estale_retry++))
1045 1049 goto top;
1046 1050 return (error);
1047 1051 }
1048 1052
1049 1053 /*
1050 1054 * Get the attributes to check whether file is large.
1051 1055 * We do this only if the FOFFMAX flag is not set and
1052 1056 * only for regular files.
1053 1057 */
1054 1058
1055 1059 if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1056 1060 vattr.va_mask = AT_SIZE;
1057 1061 if ((error = VOP_GETATTR(vp, &vattr, 0,
1058 1062 CRED(), NULL))) {
1059 1063 goto out;
1060 1064 }
1061 1065 if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
1062 1066 /*
1063 1067 * Large File API - regular open fails
1064 1068 * if FOFFMAX flag is set in file mode
1065 1069 */
1066 1070 error = EOVERFLOW;
1067 1071 goto out;
1068 1072 }
1069 1073 }
1070 1074 /*
1071 1075 * Can't write directories, active texts, or
1072 1076 * read-only filesystems. Can't truncate files
1073 1077 * on which mandatory locking is in effect.
1074 1078 */
1075 1079 if (filemode & (FWRITE|FTRUNC)) {
1076 1080 /*
1077 1081 * Allow writable directory if VDIROPEN flag is set.
1078 1082 */
1079 1083 if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1080 1084 error = EISDIR;
1081 1085 goto out;
1082 1086 }
1083 1087 if (ISROFILE(vp)) {
1084 1088 error = EROFS;
1085 1089 goto out;
1086 1090 }
1087 1091 /*
1088 1092 * Can't truncate files on which
1089 1093 * sysv mandatory locking is in effect.
1090 1094 */
1091 1095 if (filemode & FTRUNC) {
1092 1096 vnode_t *rvp;
1093 1097
1094 1098 if (VOP_REALVP(vp, &rvp, NULL) != 0)
1095 1099 rvp = vp;
1096 1100 if (rvp->v_filocks != NULL) {
1097 1101 vattr.va_mask = AT_MODE;
1098 1102 if ((error = VOP_GETATTR(vp,
1099 1103 &vattr, 0, CRED(), NULL)) == 0 &&
1100 1104 MANDLOCK(vp, vattr.va_mode))
1101 1105 error = EAGAIN;
1102 1106 }
1103 1107 }
1104 1108 if (error)
1105 1109 goto out;
1106 1110 }
1107 1111 /*
1108 1112 * Check permissions.
1109 1113 */
1110 1114 if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
1111 1115 goto out;
1112 1116
1113 1117 /*
1114 1118 * Require FSEARCH and FDIRECTORY to return a directory. Require
1115 1119 * FEXEC to return a regular file.
1116 1120 */
1117 1121 if ((filemode & (FSEARCH|FDIRECTORY)) != 0 &&
1118 1122 vp->v_type != VDIR) {
1119 1123 error = ENOTDIR;
1120 1124 goto out;
1121 1125 }
1122 1126 if ((filemode & FEXEC) && vp->v_type != VREG) {
1123 1127 error = ENOEXEC; /* XXX: error code? */
1124 1128 goto out;
1125 1129 }
1126 1130 }
1127 1131
1128 1132 /*
1129 1133 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1130 1134 */
1131 1135 if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1132 1136 error = ELOOP;
1133 1137 goto out;
1134 1138 }
1135 1139 if (filemode & FNOLINKS) {
1136 1140 vattr.va_mask = AT_NLINK;
1137 1141 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
1138 1142 goto out;
1139 1143 }
1140 1144 if (vattr.va_nlink != 1) {
1141 1145 error = EMLINK;
1142 1146 goto out;
1143 1147 }
1144 1148 }
1145 1149
1146 1150 /*
1147 1151 * Opening a socket corresponding to the AF_UNIX pathname
1148 1152 * in the filesystem name space is not supported.
1149 1153 * However, VSOCK nodes in namefs are supported in order
1150 1154 * to make fattach work for sockets.
1151 1155 *
1152 1156 * XXX This uses VOP_REALVP to distinguish between
1153 1157 * an unopened namefs node (where VOP_REALVP returns a
1154 1158 * different VSOCK vnode) and a VSOCK created by vn_create
1155 1159 * in some file system (where VOP_REALVP would never return
1156 1160 * a different vnode).
1157 1161 */
1158 1162 if (vp->v_type == VSOCK) {
1159 1163 struct vnode *nvp;
1160 1164
1161 1165 error = VOP_REALVP(vp, &nvp, NULL);
1162 1166 if (error != 0 || nvp == NULL || nvp == vp ||
1163 1167 nvp->v_type != VSOCK) {
1164 1168 error = EOPNOTSUPP;
1165 1169 goto out;
1166 1170 }
1167 1171 }
1168 1172
1169 1173 if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1170 1174 /* get share reservation */
1171 1175 shr.s_access = 0;
1172 1176 if (filemode & FWRITE)
1173 1177 shr.s_access |= F_WRACC;
1174 1178 if (filemode & FREAD)
1175 1179 shr.s_access |= F_RDACC;
1176 1180 shr.s_deny = 0;
1177 1181 shr.s_sysid = 0;
1178 1182 shr.s_pid = ttoproc(curthread)->p_pid;
1179 1183 shr_own.sl_pid = shr.s_pid;
1180 1184 shr_own.sl_id = fd;
1181 1185 shr.s_own_len = sizeof (shr_own);
1182 1186 shr.s_owner = (caddr_t)&shr_own;
1183 1187 error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1184 1188 NULL);
1185 1189 if (error)
1186 1190 goto out;
1187 1191 shrlock_done = 1;
1188 1192
1189 1193 /* nbmand conflict check if truncating file */
1190 1194 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1191 1195 nbl_start_crit(vp, RW_READER);
1192 1196 in_crit = 1;
1193 1197
1194 1198 vattr.va_mask = AT_SIZE;
1195 1199 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
1196 1200 goto out;
1197 1201 if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1198 1202 NULL)) {
1199 1203 error = EACCES;
1200 1204 goto out;
1201 1205 }
1202 1206 }
1203 1207 }
1204 1208
1205 1209 /*
1206 1210 * Do opening protocol.
1207 1211 */
1208 1212 error = VOP_OPEN(&vp, filemode, CRED(), NULL);
1209 1213 if (error)
1210 1214 goto out;
1211 1215 open_done = 1;
1212 1216
1213 1217 /*
1214 1218 * Truncate if required.
1215 1219 */
1216 1220 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1217 1221 vattr.va_size = 0;
1218 1222 vattr.va_mask = AT_SIZE;
1219 1223 if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
1220 1224 goto out;
1221 1225 }
1222 1226
1223 1227 /*
1224 1228 * Turn on directio, if requested.
1225 1229 */
1226 1230 if (filemode & FDIRECT) {
1227 1231 if ((error = VOP_IOCTL(vp, _FIODIRECTIO, DIRECTIO_ON, 0,
1228 1232 CRED(), NULL, NULL)) != 0) {
1229 1233 /*
1230 1234 * On Linux, O_DIRECT returns EINVAL when the file
1231 1235 * system does not support directio, so we'll do the
1232 1236 * same.
1233 1237 */
1234 1238 error = EINVAL;
1235 1239 goto out;
1236 1240 }
1237 1241 }
1238 1242 out:
1239 1243 ASSERT(vp->v_count > 0);
1240 1244
1241 1245 if (in_crit) {
1242 1246 nbl_end_crit(vp);
1243 1247 in_crit = 0;
1244 1248 }
1245 1249 if (error) {
1246 1250 if (open_done) {
1247 1251 (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
1248 1252 NULL);
1249 1253 open_done = 0;
1250 1254 shrlock_done = 0;
1251 1255 }
1252 1256 if (shrlock_done) {
1253 1257 (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
1254 1258 NULL);
1255 1259 shrlock_done = 0;
1256 1260 }
1257 1261
1258 1262 /*
1259 1263 * The following clause was added to handle a problem
1260 1264 * with NFS consistency. It is possible that a lookup
1261 1265 * of the file to be opened succeeded, but the file
1262 1266 * itself doesn't actually exist on the server. This
1263 1267 * is chiefly due to the DNLC containing an entry for
1264 1268 * the file which has been removed on the server. In
1265 1269 * this case, we just start over. If there was some
1266 1270 * other cause for the ESTALE error, then the lookup
1267 1271 * of the file will fail and the error will be returned
1268 1272 * above instead of looping around from here.
1269 1273 */
1270 1274 VN_RELE(vp);
1271 1275 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1272 1276 goto top;
1273 1277 } else
1274 1278 *vpp = vp;
1275 1279 return (error);
1276 1280 }
1277 1281
1278 1282 /*
1279 1283 * The following two accessor functions are for the NFSv4 server. Since there
1280 1284 * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
1281 1285 * vnode open counts correct when a client "upgrades" an open or does an
1282 1286 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1283 1287 * open mode (add or subtract read or write), but also change the share/deny
1284 1288 * modes. However, share reservations are not integrated with OPEN, yet, so
1285 1289 * we need to handle each separately. These functions are cleaner than having
1286 1290 * the NFS server manipulate the counts directly, however, nobody else should
1287 1291 * use these functions.
1288 1292 */
1289 1293 void
1290 1294 vn_open_upgrade(
1291 1295 vnode_t *vp,
1292 1296 int filemode)
1293 1297 {
1294 1298 ASSERT(vp->v_type == VREG);
1295 1299
1296 1300 if (filemode & FREAD)
1297 1301 atomic_inc_32(&vp->v_rdcnt);
1298 1302 if (filemode & FWRITE)
1299 1303 atomic_inc_32(&vp->v_wrcnt);
1300 1304
1301 1305 }
1302 1306
1303 1307 void
1304 1308 vn_open_downgrade(
1305 1309 vnode_t *vp,
1306 1310 int filemode)
1307 1311 {
1308 1312 ASSERT(vp->v_type == VREG);
1309 1313
1310 1314 if (filemode & FREAD) {
1311 1315 ASSERT(vp->v_rdcnt > 0);
1312 1316 atomic_dec_32(&vp->v_rdcnt);
1313 1317 }
1314 1318 if (filemode & FWRITE) {
1315 1319 ASSERT(vp->v_wrcnt > 0);
1316 1320 atomic_dec_32(&vp->v_wrcnt);
1317 1321 }
1318 1322
1319 1323 }
1320 1324
1321 1325 int
1322 1326 vn_create(
1323 1327 char *pnamep,
1324 1328 enum uio_seg seg,
1325 1329 struct vattr *vap,
1326 1330 enum vcexcl excl,
1327 1331 int mode,
1328 1332 struct vnode **vpp,
1329 1333 enum create why,
1330 1334 int flag,
1331 1335 mode_t umask)
1332 1336 {
1333 1337 return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1334 1338 umask, NULL));
1335 1339 }
1336 1340
1337 1341 /*
1338 1342 * Create a vnode (makenode).
1339 1343 */
1340 1344 int
1341 1345 vn_createat(
1342 1346 char *pnamep,
1343 1347 enum uio_seg seg,
1344 1348 struct vattr *vap,
1345 1349 enum vcexcl excl,
1346 1350 int mode,
1347 1351 struct vnode **vpp,
1348 1352 enum create why,
1349 1353 int flag,
1350 1354 mode_t umask,
1351 1355 struct vnode *startvp)
1352 1356 {
1353 1357 struct vnode *dvp; /* ptr to parent dir vnode */
1354 1358 struct vnode *vp = NULL;
1355 1359 struct pathname pn;
1356 1360 int error;
1357 1361 int in_crit = 0;
1358 1362 struct vattr vattr;
1359 1363 enum symfollow follow;
1360 1364 int estale_retry = 0;
1361 1365 uint32_t auditing = AU_AUDITING();
1362 1366
1363 1367 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1364 1368
1365 1369 /* symlink interpretation */
1366 1370 if ((flag & FNOFOLLOW) || excl == EXCL)
1367 1371 follow = NO_FOLLOW;
1368 1372 else
1369 1373 follow = FOLLOW;
1370 1374 flag &= ~(FNOFOLLOW|FNOLINKS);
1371 1375
1372 1376 top:
1373 1377 /*
1374 1378 * Lookup directory.
1375 1379 * If new object is a file, call lower level to create it.
1376 1380 * Note that it is up to the lower level to enforce exclusive
1377 1381 * creation, if the file is already there.
1378 1382 * This allows the lower level to do whatever
1379 1383 * locking or protocol that is needed to prevent races.
1380 1384 * If the new object is directory call lower level to make
1381 1385 * the new directory, with "." and "..".
1382 1386 */
1383 1387 if (error = pn_get(pnamep, seg, &pn))
1384 1388 return (error);
1385 1389 if (auditing)
1386 1390 audit_vncreate_start();
1387 1391 dvp = NULL;
1388 1392 *vpp = NULL;
1389 1393 /*
1390 1394 * lookup will find the parent directory for the vnode.
1391 1395 * When it is done the pn holds the name of the entry
1392 1396 * in the directory.
1393 1397 * If this is a non-exclusive create we also find the node itself.
1394 1398 */
1395 1399 error = lookuppnat(&pn, NULL, follow, &dvp,
1396 1400 (excl == EXCL) ? NULLVPP : vpp, startvp);
1397 1401 if (error) {
1398 1402 pn_free(&pn);
1399 1403 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1400 1404 goto top;
1401 1405 if (why == CRMKDIR && error == EINVAL)
1402 1406 error = EEXIST; /* SVID */
1403 1407 return (error);
1404 1408 }
1405 1409
1406 1410 if (why != CRMKNOD)
1407 1411 vap->va_mode &= ~VSVTX;
1408 1412
1409 1413 /*
1410 1414 * If default ACLs are defined for the directory don't apply the
1411 1415 * umask if umask is passed.
1412 1416 */
1413 1417
1414 1418 if (umask) {
1415 1419
1416 1420 vsecattr_t vsec;
1417 1421
1418 1422 vsec.vsa_aclcnt = 0;
1419 1423 vsec.vsa_aclentp = NULL;
1420 1424 vsec.vsa_dfaclcnt = 0;
1421 1425 vsec.vsa_dfaclentp = NULL;
1422 1426 vsec.vsa_mask = VSA_DFACLCNT;
1423 1427 error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
1424 1428 /*
1425 1429 * If error is ENOSYS then treat it as no error
1426 1430 * Don't want to force all file systems to support
1427 1431 * aclent_t style of ACL's.
1428 1432 */
1429 1433 if (error == ENOSYS)
1430 1434 error = 0;
1431 1435 if (error) {
1432 1436 if (*vpp != NULL)
1433 1437 VN_RELE(*vpp);
1434 1438 goto out;
1435 1439 } else {
1436 1440 /*
1437 1441 * Apply the umask if no default ACLs.
1438 1442 */
1439 1443 if (vsec.vsa_dfaclcnt == 0)
1440 1444 vap->va_mode &= ~umask;
1441 1445
1442 1446 /*
1443 1447 * VOP_GETSECATTR() may have allocated memory for
1444 1448 * ACLs we didn't request, so double-check and
1445 1449 * free it if necessary.
1446 1450 */
1447 1451 if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1448 1452 kmem_free((caddr_t)vsec.vsa_aclentp,
1449 1453 vsec.vsa_aclcnt * sizeof (aclent_t));
1450 1454 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1451 1455 kmem_free((caddr_t)vsec.vsa_dfaclentp,
1452 1456 vsec.vsa_dfaclcnt * sizeof (aclent_t));
1453 1457 }
1454 1458 }
1455 1459
1456 1460 /*
1457 1461 * In general we want to generate EROFS if the file system is
1458 1462 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1459 1463 * documents the open system call, and it says that O_CREAT has no
1460 1464 * effect if the file already exists. Bug 1119649 states
1461 1465 * that open(path, O_CREAT, ...) fails when attempting to open an
1462 1466 * existing file on a read only file system. Thus, the first part
1463 1467 * of the following if statement has 3 checks:
1464 1468 * if the file exists &&
1465 1469 * it is being open with write access &&
1466 1470 * the file system is read only
1467 1471 * then generate EROFS
1468 1472 */
1469 1473 if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1470 1474 (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1471 1475 if (*vpp)
1472 1476 VN_RELE(*vpp);
1473 1477 error = EROFS;
1474 1478 } else if (excl == NONEXCL && *vpp != NULL) {
1475 1479 vnode_t *rvp;
1476 1480
1477 1481 /*
1478 1482 * File already exists. If a mandatory lock has been
1479 1483 * applied, return error.
1480 1484 */
1481 1485 vp = *vpp;
1482 1486 if (VOP_REALVP(vp, &rvp, NULL) != 0)
1483 1487 rvp = vp;
1484 1488 if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1485 1489 nbl_start_crit(vp, RW_READER);
1486 1490 in_crit = 1;
1487 1491 }
1488 1492 if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1489 1493 vattr.va_mask = AT_MODE|AT_SIZE;
1490 1494 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
1491 1495 goto out;
1492 1496 }
1493 1497 if (MANDLOCK(vp, vattr.va_mode)) {
1494 1498 error = EAGAIN;
1495 1499 goto out;
1496 1500 }
1497 1501 /*
1498 1502 * File cannot be truncated if non-blocking mandatory
1499 1503 * locks are currently on the file.
1500 1504 */
1501 1505 if ((vap->va_mask & AT_SIZE) && in_crit) {
1502 1506 u_offset_t offset;
1503 1507 ssize_t length;
1504 1508
1505 1509 offset = vap->va_size > vattr.va_size ?
1506 1510 vattr.va_size : vap->va_size;
1507 1511 length = vap->va_size > vattr.va_size ?
1508 1512 vap->va_size - vattr.va_size :
1509 1513 vattr.va_size - vap->va_size;
1510 1514 if (nbl_conflict(vp, NBL_WRITE, offset,
1511 1515 length, 0, NULL)) {
1512 1516 error = EACCES;
1513 1517 goto out;
1514 1518 }
1515 1519 }
1516 1520 }
1517 1521
1518 1522 /*
1519 1523 * If the file is the root of a VFS, we've crossed a
1520 1524 * mount point and the "containing" directory that we
1521 1525 * acquired above (dvp) is irrelevant because it's in
1522 1526 * a different file system. We apply VOP_CREATE to the
1523 1527 * target itself instead of to the containing directory
1524 1528 * and supply a null path name to indicate (conventionally)
1525 1529 * the node itself as the "component" of interest.
1526 1530 *
1527 1531 * The call to VOP_CREATE() is necessary to ensure
1528 1532 * that the appropriate permission checks are made,
1529 1533 * i.e. EISDIR, EACCES, etc. We already know that vpp
1530 1534 * exists since we are in the else condition where this
1531 1535 * was checked.
1532 1536 */
1533 1537 if (vp->v_flag & VROOT) {
1534 1538 ASSERT(why != CRMKDIR);
1535 1539 error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
1536 1540 CRED(), flag, NULL, NULL);
1537 1541 /*
1538 1542 * If the create succeeded, it will have created a
1539 1543 * new reference on a new vnode (*vpp) in the child
1540 1544 * file system, so we want to drop our reference on
1541 1545 * the old (vp) upon exit.
1542 1546 */
1543 1547 goto out;
1544 1548 }
1545 1549
1546 1550 /*
1547 1551 * Large File API - non-large open (FOFFMAX flag not set)
1548 1552 * of regular file fails if the file size exceeds MAXOFF32_T.
1549 1553 */
1550 1554 if (why != CRMKDIR &&
1551 1555 !(flag & FOFFMAX) &&
1552 1556 (vp->v_type == VREG)) {
1553 1557 vattr.va_mask = AT_SIZE;
1554 1558 if ((error = VOP_GETATTR(vp, &vattr, 0,
1555 1559 CRED(), NULL))) {
1556 1560 goto out;
1557 1561 }
1558 1562 if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
1559 1563 error = EOVERFLOW;
1560 1564 goto out;
1561 1565 }
1562 1566 }
1563 1567 }
1564 1568
1565 1569 if (error == 0) {
1566 1570 /*
1567 1571 * Call mkdir() if specified, otherwise create().
1568 1572 */
1569 1573 int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */
1570 1574
1571 1575 if (why == CRMKDIR)
1572 1576 /*
1573 1577 * N.B., if vn_createat() ever requests
1574 1578 * case-insensitive behavior then it will need
1575 1579 * to be passed to VOP_MKDIR(). VOP_CREATE()
1576 1580 * will already get it via "flag"
1577 1581 */
1578 1582 error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
1579 1583 NULL, 0, NULL);
1580 1584 else if (!must_be_dir)
1581 1585 error = VOP_CREATE(dvp, pn.pn_path, vap,
1582 1586 excl, mode, vpp, CRED(), flag, NULL, NULL);
1583 1587 else
1584 1588 error = ENOTDIR;
1585 1589 }
1586 1590
1587 1591 out:
1588 1592
1589 1593 if (auditing)
1590 1594 audit_vncreate_finish(*vpp, error);
1591 1595 if (in_crit) {
1592 1596 nbl_end_crit(vp);
1593 1597 in_crit = 0;
1594 1598 }
1595 1599 if (vp != NULL) {
1596 1600 VN_RELE(vp);
1597 1601 vp = NULL;
1598 1602 }
1599 1603 pn_free(&pn);
1600 1604 VN_RELE(dvp);
1601 1605 /*
1602 1606 * The following clause was added to handle a problem
1603 1607 * with NFS consistency. It is possible that a lookup
1604 1608 * of the file to be created succeeded, but the file
1605 1609 * itself doesn't actually exist on the server. This
1606 1610 * is chiefly due to the DNLC containing an entry for
1607 1611 * the file which has been removed on the server. In
1608 1612 * this case, we just start over. If there was some
1609 1613 * other cause for the ESTALE error, then the lookup
1610 1614 * of the file will fail and the error will be returned
1611 1615 * above instead of looping around from here.
1612 1616 */
1613 1617 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1614 1618 goto top;
1615 1619 return (error);
1616 1620 }
1617 1621
1618 1622 int
1619 1623 vn_link(char *from, char *to, enum uio_seg seg)
1620 1624 {
1621 1625 return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1622 1626 }
1623 1627
1624 1628 int
1625 1629 vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1626 1630 vnode_t *tstartvp, char *to, enum uio_seg seg)
1627 1631 {
1628 1632 struct vnode *fvp; /* from vnode ptr */
1629 1633 struct vnode *tdvp; /* to directory vnode ptr */
1630 1634 struct pathname pn;
1631 1635 int error;
1632 1636 struct vattr vattr;
1633 1637 dev_t fsid;
1634 1638 int estale_retry = 0;
1635 1639 uint32_t auditing = AU_AUDITING();
1636 1640
1637 1641 top:
1638 1642 fvp = tdvp = NULL;
1639 1643 if (error = pn_get(to, seg, &pn))
1640 1644 return (error);
1641 1645 if (auditing && fstartvp != NULL)
1642 1646 audit_setfsat_path(1);
1643 1647 if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1644 1648 goto out;
1645 1649 if (auditing && tstartvp != NULL)
1646 1650 audit_setfsat_path(3);
1647 1651 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1648 1652 goto out;
1649 1653 /*
1650 1654 * Make sure both source vnode and target directory vnode are
1651 1655 * in the same vfs and that it is writeable.
1652 1656 */
1653 1657 vattr.va_mask = AT_FSID;
1654 1658 if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
1655 1659 goto out;
1656 1660 fsid = vattr.va_fsid;
1657 1661 vattr.va_mask = AT_FSID;
1658 1662 if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
1659 1663 goto out;
1660 1664 if (fsid != vattr.va_fsid) {
1661 1665 error = EXDEV;
1662 1666 goto out;
1663 1667 }
1664 1668 if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1665 1669 error = EROFS;
1666 1670 goto out;
1667 1671 }
1668 1672 /*
1669 1673 * Do the link.
1670 1674 */
1671 1675 (void) pn_fixslash(&pn);
1672 1676 error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1673 1677 out:
1674 1678 pn_free(&pn);
1675 1679 if (fvp)
1676 1680 VN_RELE(fvp);
1677 1681 if (tdvp)
1678 1682 VN_RELE(tdvp);
1679 1683 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1680 1684 goto top;
1681 1685 return (error);
1682 1686 }
1683 1687
1684 1688 int
1685 1689 vn_rename(char *from, char *to, enum uio_seg seg)
1686 1690 {
1687 1691 return (vn_renameat(NULL, from, NULL, to, seg));
1688 1692 }
1689 1693
1690 1694 int
1691 1695 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1692 1696 char *tname, enum uio_seg seg)
1693 1697 {
1694 1698 int error;
1695 1699 struct vattr vattr;
1696 1700 struct pathname fpn; /* from pathname */
1697 1701 struct pathname tpn; /* to pathname */
1698 1702 dev_t fsid;
1699 1703 int in_crit_src, in_crit_targ;
1700 1704 vnode_t *fromvp, *fvp;
1701 1705 vnode_t *tovp, *targvp;
1702 1706 int estale_retry = 0;
1703 1707 uint32_t auditing = AU_AUDITING();
1704 1708
1705 1709 top:
1706 1710 fvp = fromvp = tovp = targvp = NULL;
1707 1711 in_crit_src = in_crit_targ = 0;
1708 1712 /*
1709 1713 * Get to and from pathnames.
1710 1714 */
1711 1715 if (error = pn_get(fname, seg, &fpn))
1712 1716 return (error);
1713 1717 if (error = pn_get(tname, seg, &tpn)) {
1714 1718 pn_free(&fpn);
1715 1719 return (error);
1716 1720 }
1717 1721
1718 1722 /*
1719 1723 * First we need to resolve the correct directories
1720 1724 * The passed in directories may only be a starting point,
1721 1725 * but we need the real directories the file(s) live in.
1722 1726 * For example the fname may be something like usr/lib/sparc
1723 1727 * and we were passed in the / directory, but we need to
1724 1728 * use the lib directory for the rename.
1725 1729 */
1726 1730
1727 1731 if (auditing && fdvp != NULL)
1728 1732 audit_setfsat_path(1);
1729 1733 /*
1730 1734 * Lookup to and from directories.
1731 1735 */
1732 1736 if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1733 1737 goto out;
1734 1738 }
1735 1739
1736 1740 /*
1737 1741 * Make sure there is an entry.
1738 1742 */
1739 1743 if (fvp == NULL) {
1740 1744 error = ENOENT;
1741 1745 goto out;
1742 1746 }
1743 1747
1744 1748 if (auditing && tdvp != NULL)
1745 1749 audit_setfsat_path(3);
1746 1750 if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1747 1751 goto out;
1748 1752 }
1749 1753
1750 1754 /*
1751 1755 * Make sure both the from vnode directory and the to directory
1752 1756 * are in the same vfs and the to directory is writable.
1753 1757 * We check fsid's, not vfs pointers, so loopback fs works.
1754 1758 */
1755 1759 if (fromvp != tovp) {
1756 1760 vattr.va_mask = AT_FSID;
1757 1761 if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
1758 1762 goto out;
1759 1763 fsid = vattr.va_fsid;
1760 1764 vattr.va_mask = AT_FSID;
1761 1765 if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
1762 1766 goto out;
1763 1767 if (fsid != vattr.va_fsid) {
1764 1768 error = EXDEV;
1765 1769 goto out;
1766 1770 }
1767 1771 }
1768 1772
1769 1773 if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1770 1774 error = EROFS;
1771 1775 goto out;
1772 1776 }
1773 1777
1774 1778 /*
1775 1779 * Make sure "from" vp is not a mount point.
1776 1780 * Note, lookup did traverse() already, so
1777 1781 * we'll be looking at the mounted FS root.
1778 1782 * (but allow files like mnttab)
1779 1783 */
1780 1784 if ((fvp->v_flag & VROOT) != 0 && fvp->v_type == VDIR) {
1781 1785 error = EBUSY;
1782 1786 goto out;
1783 1787 }
1784 1788
1785 1789 if (targvp && (fvp != targvp)) {
1786 1790 nbl_start_crit(targvp, RW_READER);
1787 1791 in_crit_targ = 1;
1788 1792 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1789 1793 error = EACCES;
1790 1794 goto out;
1791 1795 }
1792 1796 }
1793 1797
1794 1798 if (nbl_need_check(fvp)) {
1795 1799 nbl_start_crit(fvp, RW_READER);
1796 1800 in_crit_src = 1;
1797 1801 if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1798 1802 error = EACCES;
1799 1803 goto out;
1800 1804 }
1801 1805 }
1802 1806
1803 1807 /*
1804 1808 * Do the rename.
1805 1809 */
1806 1810 (void) pn_fixslash(&tpn);
1807 1811 error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1808 1812 NULL, 0);
1809 1813
1810 1814 out:
1811 1815 pn_free(&fpn);
1812 1816 pn_free(&tpn);
1813 1817 if (in_crit_src)
1814 1818 nbl_end_crit(fvp);
1815 1819 if (in_crit_targ)
1816 1820 nbl_end_crit(targvp);
1817 1821 if (fromvp)
1818 1822 VN_RELE(fromvp);
1819 1823 if (tovp)
1820 1824 VN_RELE(tovp);
1821 1825 if (targvp)
1822 1826 VN_RELE(targvp);
1823 1827 if (fvp)
1824 1828 VN_RELE(fvp);
1825 1829 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1826 1830 goto top;
1827 1831 return (error);
1828 1832 }
1829 1833
1830 1834 /*
1831 1835 * Remove a file or directory.
1832 1836 */
1833 1837 int
1834 1838 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1835 1839 {
1836 1840 return (vn_removeat(NULL, fnamep, seg, dirflag));
1837 1841 }
1838 1842
1839 1843 int
1840 1844 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1841 1845 {
1842 1846 struct vnode *vp; /* entry vnode */
1843 1847 struct vnode *dvp; /* ptr to parent dir vnode */
1844 1848 struct vnode *coveredvp;
1845 1849 struct pathname pn; /* name of entry */
1846 1850 enum vtype vtype;
1847 1851 int error;
1848 1852 struct vfs *vfsp;
1849 1853 struct vfs *dvfsp; /* ptr to parent dir vfs */
1850 1854 int in_crit = 0;
1851 1855 int estale_retry = 0;
1852 1856
1853 1857 top:
1854 1858 if (error = pn_get(fnamep, seg, &pn))
1855 1859 return (error);
1856 1860 dvp = vp = NULL;
1857 1861 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1858 1862 pn_free(&pn);
1859 1863 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1860 1864 goto top;
1861 1865 return (error);
1862 1866 }
1863 1867
1864 1868 /*
1865 1869 * Make sure there is an entry.
1866 1870 */
1867 1871 if (vp == NULL) {
1868 1872 error = ENOENT;
1869 1873 goto out;
1870 1874 }
1871 1875
1872 1876 vfsp = vp->v_vfsp;
1873 1877 dvfsp = dvp->v_vfsp;
1874 1878
1875 1879 /*
1876 1880 * If the named file is the root of a mounted filesystem, fail,
1877 1881 * unless it's marked unlinkable. In that case, unmount the
1878 1882 * filesystem and proceed to unlink the covered vnode. (If the
1879 1883 * covered vnode is a directory, use rmdir instead of unlink,
1880 1884 * to avoid file system corruption.)
1881 1885 */
1882 1886 if (vp->v_flag & VROOT) {
1883 1887 if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1884 1888 error = EBUSY;
1885 1889 goto out;
1886 1890 }
1887 1891
1888 1892 /*
1889 1893 * Namefs specific code starts here.
1890 1894 */
1891 1895
1892 1896 if (dirflag == RMDIRECTORY) {
1893 1897 /*
1894 1898 * User called rmdir(2) on a file that has
1895 1899 * been namefs mounted on top of. Since
1896 1900 * namefs doesn't allow directories to
1897 1901 * be mounted on other files we know
1898 1902 * vp is not of type VDIR so fail to operation.
1899 1903 */
1900 1904 error = ENOTDIR;
1901 1905 goto out;
1902 1906 }
1903 1907
1904 1908 /*
1905 1909 * If VROOT is still set after grabbing vp->v_lock,
1906 1910 * noone has finished nm_unmount so far and coveredvp
1907 1911 * is valid.
1908 1912 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1909 1913 * vp->v_lock, any race window is eliminated.
1910 1914 */
1911 1915
1912 1916 mutex_enter(&vp->v_lock);
1913 1917 if ((vp->v_flag & VROOT) == 0) {
1914 1918 /* Someone beat us to the unmount */
1915 1919 mutex_exit(&vp->v_lock);
1916 1920 error = EBUSY;
1917 1921 goto out;
1918 1922 }
1919 1923 vfsp = vp->v_vfsp;
1920 1924 coveredvp = vfsp->vfs_vnodecovered;
1921 1925 ASSERT(coveredvp);
1922 1926 /*
1923 1927 * Note: Implementation of vn_vfswlock shows that ordering of
1924 1928 * v_lock / vn_vfswlock is not an issue here.
1925 1929 */
1926 1930 error = vn_vfswlock(coveredvp);
1927 1931 mutex_exit(&vp->v_lock);
1928 1932
1929 1933 if (error)
1930 1934 goto out;
1931 1935
1932 1936 VN_HOLD(coveredvp);
1933 1937 VN_RELE(vp);
1934 1938 error = dounmount(vfsp, 0, CRED());
1935 1939
1936 1940 /*
1937 1941 * Unmounted the namefs file system; now get
1938 1942 * the object it was mounted over.
1939 1943 */
1940 1944 vp = coveredvp;
1941 1945 /*
1942 1946 * If namefs was mounted over a directory, then
1943 1947 * we want to use rmdir() instead of unlink().
1944 1948 */
1945 1949 if (vp->v_type == VDIR)
1946 1950 dirflag = RMDIRECTORY;
1947 1951
1948 1952 if (error)
1949 1953 goto out;
1950 1954 }
1951 1955
1952 1956 /*
1953 1957 * Make sure filesystem is writeable.
1954 1958 * We check the parent directory's vfs in case this is an lofs vnode.
1955 1959 */
1956 1960 if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1957 1961 error = EROFS;
1958 1962 goto out;
1959 1963 }
1960 1964
1961 1965 vtype = vp->v_type;
1962 1966
1963 1967 /*
1964 1968 * If there is the possibility of an nbmand share reservation, make
1965 1969 * sure it's okay to remove the file. Keep a reference to the
1966 1970 * vnode, so that we can exit the nbl critical region after
1967 1971 * calling VOP_REMOVE.
1968 1972 * If there is no possibility of an nbmand share reservation,
1969 1973 * release the vnode reference now. Filesystems like NFS may
1970 1974 * behave differently if there is an extra reference, so get rid of
1971 1975 * this one. Fortunately, we can't have nbmand mounts on NFS
1972 1976 * filesystems.
1973 1977 */
1974 1978 if (nbl_need_check(vp)) {
1975 1979 nbl_start_crit(vp, RW_READER);
1976 1980 in_crit = 1;
1977 1981 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1978 1982 error = EACCES;
1979 1983 goto out;
1980 1984 }
1981 1985 } else {
1982 1986 VN_RELE(vp);
1983 1987 vp = NULL;
1984 1988 }
1985 1989
1986 1990 if (dirflag == RMDIRECTORY) {
1987 1991 /*
1988 1992 * Caller is using rmdir(2), which can only be applied to
1989 1993 * directories.
1990 1994 */
1991 1995 if (vtype != VDIR) {
1992 1996 error = ENOTDIR;
1993 1997 } else {
1994 1998 vnode_t *cwd;
1995 1999 proc_t *pp = curproc;
1996 2000
1997 2001 mutex_enter(&pp->p_lock);
1998 2002 cwd = PTOU(pp)->u_cdir;
1999 2003 VN_HOLD(cwd);
2000 2004 mutex_exit(&pp->p_lock);
2001 2005 error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
2002 2006 NULL, 0);
2003 2007 VN_RELE(cwd);
2004 2008 }
2005 2009 } else {
2006 2010 /*
2007 2011 * Unlink(2) can be applied to anything.
2008 2012 */
2009 2013 error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
2010 2014 }
2011 2015
2012 2016 out:
2013 2017 pn_free(&pn);
2014 2018 if (in_crit) {
2015 2019 nbl_end_crit(vp);
2016 2020 in_crit = 0;
2017 2021 }
2018 2022 if (vp != NULL)
2019 2023 VN_RELE(vp);
2020 2024 if (dvp != NULL)
2021 2025 VN_RELE(dvp);
2022 2026 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
2023 2027 goto top;
2024 2028 return (error);
2025 2029 }
2026 2030
2027 2031 /*
2028 2032 * Utility function to compare equality of vnodes.
2029 2033 * Compare the underlying real vnodes, if there are underlying vnodes.
2030 2034 * This is a more thorough comparison than the VN_CMP() macro provides.
2031 2035 */
2032 2036 int
2033 2037 vn_compare(vnode_t *vp1, vnode_t *vp2)
2034 2038 {
2035 2039 vnode_t *realvp;
2036 2040
2037 2041 if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
2038 2042 vp1 = realvp;
2039 2043 if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
2040 2044 vp2 = realvp;
2041 2045 return (VN_CMP(vp1, vp2));
2042 2046 }
2043 2047
2044 2048 /*
2045 2049 * The number of locks to hash into. This value must be a power
2046 2050 * of 2 minus 1 and should probably also be prime.
2047 2051 */
2048 2052 #define NUM_BUCKETS 1023
2049 2053
2050 2054 struct vn_vfslocks_bucket {
2051 2055 kmutex_t vb_lock;
2052 2056 vn_vfslocks_entry_t *vb_list;
2053 2057 char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
2054 2058 };
2055 2059
2056 2060 /*
2057 2061 * Total number of buckets will be NUM_BUCKETS + 1 .
2058 2062 */
2059 2063
2060 2064 #pragma align 64(vn_vfslocks_buckets)
2061 2065 static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1];
2062 2066
2063 2067 #define VN_VFSLOCKS_SHIFT 9
2064 2068
2065 2069 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2066 2070 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2067 2071
2068 2072 /*
2069 2073 * vn_vfslocks_getlock() uses an HASH scheme to generate
2070 2074 * rwstlock using vfs/vnode pointer passed to it.
2071 2075 *
2072 2076 * vn_vfslocks_rele() releases a reference in the
2073 2077 * HASH table which allows the entry allocated by
2074 2078 * vn_vfslocks_getlock() to be freed at a later
2075 2079 * stage when the refcount drops to zero.
2076 2080 */
2077 2081
2078 2082 vn_vfslocks_entry_t *
2079 2083 vn_vfslocks_getlock(void *vfsvpptr)
2080 2084 {
2081 2085 struct vn_vfslocks_bucket *bp;
2082 2086 vn_vfslocks_entry_t *vep;
2083 2087 vn_vfslocks_entry_t *tvep;
2084 2088
2085 2089 ASSERT(vfsvpptr != NULL);
2086 2090 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2087 2091
2088 2092 mutex_enter(&bp->vb_lock);
2089 2093 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2090 2094 if (vep->ve_vpvfs == vfsvpptr) {
2091 2095 vep->ve_refcnt++;
2092 2096 mutex_exit(&bp->vb_lock);
2093 2097 return (vep);
2094 2098 }
2095 2099 }
2096 2100 mutex_exit(&bp->vb_lock);
2097 2101 vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2098 2102 rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2099 2103 vep->ve_vpvfs = (char *)vfsvpptr;
2100 2104 vep->ve_refcnt = 1;
2101 2105 mutex_enter(&bp->vb_lock);
2102 2106 for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2103 2107 if (tvep->ve_vpvfs == vfsvpptr) {
2104 2108 tvep->ve_refcnt++;
2105 2109 mutex_exit(&bp->vb_lock);
2106 2110
2107 2111 /*
2108 2112 * There is already an entry in the hash
2109 2113 * destroy what we just allocated.
2110 2114 */
2111 2115 rwst_destroy(&vep->ve_lock);
2112 2116 kmem_free(vep, sizeof (*vep));
2113 2117 return (tvep);
2114 2118 }
2115 2119 }
2116 2120 vep->ve_next = bp->vb_list;
2117 2121 bp->vb_list = vep;
2118 2122 mutex_exit(&bp->vb_lock);
2119 2123 return (vep);
2120 2124 }
2121 2125
2122 2126 void
2123 2127 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2124 2128 {
2125 2129 struct vn_vfslocks_bucket *bp;
2126 2130 vn_vfslocks_entry_t *vep;
2127 2131 vn_vfslocks_entry_t *pvep;
2128 2132
2129 2133 ASSERT(vepent != NULL);
2130 2134 ASSERT(vepent->ve_vpvfs != NULL);
2131 2135
2132 2136 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2133 2137
2134 2138 mutex_enter(&bp->vb_lock);
2135 2139 vepent->ve_refcnt--;
2136 2140
2137 2141 if ((int32_t)vepent->ve_refcnt < 0)
2138 2142 cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2139 2143
2140 2144 pvep = NULL;
2141 2145 if (vepent->ve_refcnt == 0) {
2142 2146 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2143 2147 if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2144 2148 if (pvep == NULL)
2145 2149 bp->vb_list = vep->ve_next;
2146 2150 else {
2147 2151 pvep->ve_next = vep->ve_next;
2148 2152 }
2149 2153 mutex_exit(&bp->vb_lock);
2150 2154 rwst_destroy(&vep->ve_lock);
2151 2155 kmem_free(vep, sizeof (*vep));
2152 2156 return;
2153 2157 }
2154 2158 pvep = vep;
2155 2159 }
2156 2160 cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2157 2161 }
2158 2162 mutex_exit(&bp->vb_lock);
2159 2163 }
2160 2164
2161 2165 /*
2162 2166 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2163 2167 * lock protecting the v_vfsmountedhere field.
2164 2168 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2165 2169 * except that it blocks to acquire the lock VVFSLOCK.
2166 2170 *
2167 2171 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2168 2172 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2169 2173 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2170 2174 */
2171 2175 int
2172 2176 vn_vfswlock_wait(vnode_t *vp)
2173 2177 {
2174 2178 int retval;
2175 2179 vn_vfslocks_entry_t *vpvfsentry;
2176 2180 ASSERT(vp != NULL);
2177 2181
2178 2182 vpvfsentry = vn_vfslocks_getlock(vp);
2179 2183 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2180 2184
2181 2185 if (retval == EINTR) {
2182 2186 vn_vfslocks_rele(vpvfsentry);
2183 2187 return (EINTR);
2184 2188 }
2185 2189 return (retval);
2186 2190 }
2187 2191
2188 2192 int
2189 2193 vn_vfsrlock_wait(vnode_t *vp)
2190 2194 {
2191 2195 int retval;
2192 2196 vn_vfslocks_entry_t *vpvfsentry;
2193 2197 ASSERT(vp != NULL);
2194 2198
2195 2199 vpvfsentry = vn_vfslocks_getlock(vp);
2196 2200 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2197 2201
2198 2202 if (retval == EINTR) {
2199 2203 vn_vfslocks_rele(vpvfsentry);
2200 2204 return (EINTR);
2201 2205 }
2202 2206
2203 2207 return (retval);
2204 2208 }
2205 2209
2206 2210
2207 2211 /*
2208 2212 * vn_vfswlock is used to implement a lock which is logically a writers lock
2209 2213 * protecting the v_vfsmountedhere field.
2210 2214 */
2211 2215 int
2212 2216 vn_vfswlock(vnode_t *vp)
2213 2217 {
2214 2218 vn_vfslocks_entry_t *vpvfsentry;
2215 2219
2216 2220 /*
2217 2221 * If vp is NULL then somebody is trying to lock the covered vnode
2218 2222 * of /. (vfs_vnodecovered is NULL for /). This situation will
2219 2223 * only happen when unmounting /. Since that operation will fail
2220 2224 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2221 2225 */
2222 2226 if (vp == NULL)
2223 2227 return (EBUSY);
2224 2228
2225 2229 vpvfsentry = vn_vfslocks_getlock(vp);
2226 2230
2227 2231 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2228 2232 return (0);
2229 2233
2230 2234 vn_vfslocks_rele(vpvfsentry);
2231 2235 return (EBUSY);
2232 2236 }
2233 2237
2234 2238 int
2235 2239 vn_vfsrlock(vnode_t *vp)
2236 2240 {
2237 2241 vn_vfslocks_entry_t *vpvfsentry;
2238 2242
2239 2243 /*
2240 2244 * If vp is NULL then somebody is trying to lock the covered vnode
2241 2245 * of /. (vfs_vnodecovered is NULL for /). This situation will
2242 2246 * only happen when unmounting /. Since that operation will fail
2243 2247 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2244 2248 */
2245 2249 if (vp == NULL)
2246 2250 return (EBUSY);
2247 2251
2248 2252 vpvfsentry = vn_vfslocks_getlock(vp);
2249 2253
2250 2254 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2251 2255 return (0);
2252 2256
2253 2257 vn_vfslocks_rele(vpvfsentry);
2254 2258 return (EBUSY);
2255 2259 }
2256 2260
2257 2261 void
2258 2262 vn_vfsunlock(vnode_t *vp)
2259 2263 {
2260 2264 vn_vfslocks_entry_t *vpvfsentry;
2261 2265
2262 2266 /*
2263 2267 * ve_refcnt needs to be decremented twice.
2264 2268 * 1. To release refernce after a call to vn_vfslocks_getlock()
2265 2269 * 2. To release the reference from the locking routines like
2266 2270 * vn_vfsrlock/vn_vfswlock etc,.
2267 2271 */
2268 2272 vpvfsentry = vn_vfslocks_getlock(vp);
2269 2273 vn_vfslocks_rele(vpvfsentry);
2270 2274
2271 2275 rwst_exit(&vpvfsentry->ve_lock);
2272 2276 vn_vfslocks_rele(vpvfsentry);
2273 2277 }
2274 2278
2275 2279 int
2276 2280 vn_vfswlock_held(vnode_t *vp)
2277 2281 {
2278 2282 int held;
2279 2283 vn_vfslocks_entry_t *vpvfsentry;
2280 2284
2281 2285 ASSERT(vp != NULL);
2282 2286
2283 2287 vpvfsentry = vn_vfslocks_getlock(vp);
2284 2288 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2285 2289
2286 2290 vn_vfslocks_rele(vpvfsentry);
2287 2291 return (held);
2288 2292 }
2289 2293
2290 2294
2291 2295 int
2292 2296 vn_make_ops(
2293 2297 const char *name, /* Name of file system */
2294 2298 const fs_operation_def_t *templ, /* Operation specification */
2295 2299 vnodeops_t **actual) /* Return the vnodeops */
2296 2300 {
2297 2301 int unused_ops;
2298 2302 int error;
2299 2303
2300 2304 *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2301 2305
2302 2306 (*actual)->vnop_name = name;
2303 2307
2304 2308 error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2305 2309 if (error) {
2306 2310 kmem_free(*actual, sizeof (vnodeops_t));
2307 2311 }
2308 2312
2309 2313 #if DEBUG
2310 2314 if (unused_ops != 0)
2311 2315 cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2312 2316 "but not used", name, unused_ops);
2313 2317 #endif
2314 2318
2315 2319 return (error);
2316 2320 }
2317 2321
2318 2322 /*
2319 2323 * Free the vnodeops created as a result of vn_make_ops()
2320 2324 */
2321 2325 void
2322 2326 vn_freevnodeops(vnodeops_t *vnops)
2323 2327 {
2324 2328 kmem_free(vnops, sizeof (vnodeops_t));
2325 2329 }
2326 2330
2327 2331 /*
2328 2332 * Vnode cache.
2329 2333 */
2330 2334
2331 2335 /* ARGSUSED */
2332 2336 static int
2333 2337 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2334 2338 {
2335 2339 struct vnode *vp;
2336 2340
2337 2341 vp = buf;
2338 2342
2339 2343 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2340 2344 mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2341 2345 cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2342 2346 rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2343 2347 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2344 2348 vp->v_path = vn_vpath_empty;
2345 2349 vp->v_path_stamp = 0;
2346 2350 vp->v_mpssdata = NULL;
2347 2351 vp->v_vsd = NULL;
2348 2352 vp->v_fopdata = NULL;
2349 2353
2350 2354 return (0);
2351 2355 }
2352 2356
2353 2357 /* ARGSUSED */
2354 2358 static void
2355 2359 vn_cache_destructor(void *buf, void *cdrarg)
2356 2360 {
2357 2361 struct vnode *vp;
2358 2362
2359 2363 vp = buf;
2360 2364
2361 2365 rw_destroy(&vp->v_nbllock);
2362 2366 cv_destroy(&vp->v_cv);
2363 2367 mutex_destroy(&vp->v_vsd_lock);
2364 2368 mutex_destroy(&vp->v_lock);
2365 2369 }
2366 2370
2367 2371 void
2368 2372 vn_create_cache(void)
2369 2373 {
2370 2374 /* LINTED */
2371 2375 ASSERT((1 << VNODE_ALIGN_LOG2) ==
2372 2376 P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2373 2377 vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2374 2378 VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2375 2379 NULL, 0);
2376 2380 }
2377 2381
2378 2382 void
2379 2383 vn_destroy_cache(void)
2380 2384 {
2381 2385 kmem_cache_destroy(vn_cache);
2382 2386 }
2383 2387
2384 2388 /*
2385 2389 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2386 2390 * cached by the file system and vnodes remain associated.
2387 2391 */
2388 2392 void
2389 2393 vn_recycle(vnode_t *vp)
2390 2394 {
2391 2395 ASSERT(vp->v_pages == NULL);
2392 2396 VERIFY(vp->v_path != NULL);
2393 2397
2394 2398 /*
2395 2399 * XXX - This really belongs in vn_reinit(), but we have some issues
2396 2400 * with the counts. Best to have it here for clean initialization.
2397 2401 */
2398 2402 vp->v_rdcnt = 0;
2399 2403 vp->v_wrcnt = 0;
2400 2404 vp->v_mmap_read = 0;
2401 2405 vp->v_mmap_write = 0;
2402 2406
2403 2407 /*
2404 2408 * If FEM was in use, make sure everything gets cleaned up
2405 2409 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2406 2410 * constructor.
2407 2411 */
2408 2412 if (vp->v_femhead) {
2409 2413 /* XXX - There should be a free_femhead() that does all this */
2410 2414 ASSERT(vp->v_femhead->femh_list == NULL);
2411 2415 mutex_destroy(&vp->v_femhead->femh_lock);
2412 2416 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2413 2417 vp->v_femhead = NULL;
2414 2418 }
2415 2419 if (vp->v_path != vn_vpath_empty) {
2416 2420 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2417 2421 vp->v_path = vn_vpath_empty;
2418 2422 }
2419 2423 vp->v_path_stamp = 0;
2420 2424
2421 2425 if (vp->v_fopdata != NULL) {
2422 2426 free_fopdata(vp);
2423 2427 }
2424 2428 vp->v_mpssdata = NULL;
2425 2429 vsd_free(vp);
2426 2430 }
2427 2431
2428 2432 /*
2429 2433 * Used to reset the vnode fields including those that are directly accessible
2430 2434 * as well as those which require an accessor function.
2431 2435 *
2432 2436 * Does not initialize:
2433 2437 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2434 2438 * v_data (since FS-nodes and vnodes point to each other and should
2435 2439 * be updated simultaneously)
2436 2440 * v_op (in case someone needs to make a VOP call on this object)
2437 2441 */
2438 2442 void
2439 2443 vn_reinit(vnode_t *vp)
2440 2444 {
2441 2445 vp->v_count = 1;
2442 2446 vp->v_count_dnlc = 0;
2443 2447 vp->v_vfsp = NULL;
2444 2448 vp->v_stream = NULL;
2445 2449 vp->v_vfsmountedhere = NULL;
2446 2450 vp->v_flag = 0;
2447 2451 vp->v_type = VNON;
2448 2452 vp->v_rdev = NODEV;
2449 2453
2450 2454 vp->v_filocks = NULL;
2451 2455 vp->v_shrlocks = NULL;
2452 2456 vp->v_pages = NULL;
2453 2457
2454 2458 vp->v_locality = NULL;
2455 2459 vp->v_xattrdir = NULL;
2456 2460
2457 2461 /*
2458 2462 * In a few specific instances, vn_reinit() is used to initialize
2459 2463 * locally defined vnode_t instances. Lacking the construction offered
2460 2464 * by vn_alloc(), these vnodes require v_path initialization.
2461 2465 */
2462 2466 if (vp->v_path == NULL) {
2463 2467 vp->v_path = vn_vpath_empty;
2464 2468 }
2465 2469
2466 2470 /* Handles v_femhead, v_path, and the r/w/map counts */
2467 2471 vn_recycle(vp);
2468 2472 }
2469 2473
2470 2474 vnode_t *
2471 2475 vn_alloc(int kmflag)
2472 2476 {
2473 2477 vnode_t *vp;
2474 2478
2475 2479 vp = kmem_cache_alloc(vn_cache, kmflag);
2476 2480
2477 2481 if (vp != NULL) {
2478 2482 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2479 2483 vp->v_fopdata = NULL;
2480 2484 vn_reinit(vp);
2481 2485 }
2482 2486
2483 2487 return (vp);
2484 2488 }
2485 2489
2486 2490 void
2487 2491 vn_free(vnode_t *vp)
2488 2492 {
2489 2493 ASSERT(vp->v_shrlocks == NULL);
2490 2494 ASSERT(vp->v_filocks == NULL);
2491 2495
2492 2496 /*
2493 2497 * Some file systems call vn_free() with v_count of zero,
2494 2498 * some with v_count of 1. In any case, the value should
2495 2499 * never be anything else.
2496 2500 */
2497 2501 ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2498 2502 ASSERT(vp->v_count_dnlc == 0);
2499 2503 VERIFY(vp->v_path != NULL);
2500 2504 if (vp->v_path != vn_vpath_empty) {
2501 2505 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2502 2506 vp->v_path = vn_vpath_empty;
2503 2507 }
2504 2508
2505 2509 /* If FEM was in use, make sure everything gets cleaned up */
2506 2510 if (vp->v_femhead) {
2507 2511 /* XXX - There should be a free_femhead() that does all this */
2508 2512 ASSERT(vp->v_femhead->femh_list == NULL);
2509 2513 mutex_destroy(&vp->v_femhead->femh_lock);
2510 2514 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2511 2515 vp->v_femhead = NULL;
2512 2516 }
2513 2517
2514 2518 if (vp->v_fopdata != NULL) {
2515 2519 free_fopdata(vp);
2516 2520 }
2517 2521 vp->v_mpssdata = NULL;
2518 2522 vsd_free(vp);
2519 2523 kmem_cache_free(vn_cache, vp);
2520 2524 }
2521 2525
2522 2526 /*
2523 2527 * vnode status changes, should define better states than 1, 0.
2524 2528 */
2525 2529 void
2526 2530 vn_reclaim(vnode_t *vp)
2527 2531 {
2528 2532 vfs_t *vfsp = vp->v_vfsp;
2529 2533
2530 2534 if (vfsp == NULL ||
2531 2535 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2532 2536 return;
2533 2537 }
2534 2538 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2535 2539 }
2536 2540
2537 2541 void
2538 2542 vn_idle(vnode_t *vp)
2539 2543 {
2540 2544 vfs_t *vfsp = vp->v_vfsp;
2541 2545
2542 2546 if (vfsp == NULL ||
2543 2547 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2544 2548 return;
2545 2549 }
2546 2550 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2547 2551 }
2548 2552 void
2549 2553 vn_exists(vnode_t *vp)
2550 2554 {
2551 2555 vfs_t *vfsp = vp->v_vfsp;
2552 2556
2553 2557 if (vfsp == NULL ||
2554 2558 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2555 2559 return;
2556 2560 }
2557 2561 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2558 2562 }
2559 2563
2560 2564 void
2561 2565 vn_invalid(vnode_t *vp)
2562 2566 {
2563 2567 vfs_t *vfsp = vp->v_vfsp;
2564 2568
2565 2569 if (vfsp == NULL ||
2566 2570 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2567 2571 return;
2568 2572 }
2569 2573 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2570 2574 }
2571 2575
2572 2576 /* Vnode event notification */
2573 2577
2574 2578 int
2575 2579 vnevent_support(vnode_t *vp, caller_context_t *ct)
2576 2580 {
2577 2581 if (vp == NULL)
2578 2582 return (EINVAL);
2579 2583
2580 2584 return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2581 2585 }
2582 2586
2583 2587 void
2584 2588 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2585 2589 {
2586 2590 if (vp == NULL || vp->v_femhead == NULL) {
2587 2591 return;
2588 2592 }
2589 2593 (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2590 2594 }
2591 2595
2592 2596 void
2593 2597 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2594 2598 caller_context_t *ct)
2595 2599 {
2596 2600 if (vp == NULL || vp->v_femhead == NULL) {
2597 2601 return;
2598 2602 }
2599 2603 (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2600 2604 }
2601 2605
2602 2606 void
2603 2607 vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2604 2608 {
2605 2609 if (vp == NULL || vp->v_femhead == NULL) {
2606 2610 return;
2607 2611 }
2608 2612 (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2609 2613 }
2610 2614
2611 2615 void
2612 2616 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2613 2617 {
2614 2618 if (vp == NULL || vp->v_femhead == NULL) {
2615 2619 return;
2616 2620 }
2617 2621 (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2618 2622 }
2619 2623
2620 2624 void
2621 2625 vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2622 2626 {
2623 2627 if (vp == NULL || vp->v_femhead == NULL) {
2624 2628 return;
2625 2629 }
2626 2630 (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
2627 2631 }
2628 2632
2629 2633 void
2630 2634 vnevent_pre_rename_src(vnode_t *vp, vnode_t *dvp, char *name,
2631 2635 caller_context_t *ct)
2632 2636 {
2633 2637 if (vp == NULL || vp->v_femhead == NULL) {
2634 2638 return;
2635 2639 }
2636 2640 (void) VOP_VNEVENT(vp, VE_PRE_RENAME_SRC, dvp, name, ct);
2637 2641 }
2638 2642
2639 2643 void
2640 2644 vnevent_pre_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2641 2645 caller_context_t *ct)
2642 2646 {
2643 2647 if (vp == NULL || vp->v_femhead == NULL) {
2644 2648 return;
2645 2649 }
2646 2650 (void) VOP_VNEVENT(vp, VE_PRE_RENAME_DEST, dvp, name, ct);
2647 2651 }
2648 2652
2649 2653 void
2650 2654 vnevent_pre_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
2651 2655 caller_context_t *ct)
2652 2656 {
2653 2657 if (vp == NULL || vp->v_femhead == NULL) {
2654 2658 return;
2655 2659 }
2656 2660 (void) VOP_VNEVENT(vp, VE_PRE_RENAME_DEST_DIR, nvp, name, ct);
2657 2661 }
2658 2662
2659 2663 void
2660 2664 vnevent_create(vnode_t *vp, caller_context_t *ct)
2661 2665 {
2662 2666 if (vp == NULL || vp->v_femhead == NULL) {
2663 2667 return;
2664 2668 }
2665 2669 (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
2666 2670 }
2667 2671
2668 2672 void
2669 2673 vnevent_link(vnode_t *vp, caller_context_t *ct)
2670 2674 {
2671 2675 if (vp == NULL || vp->v_femhead == NULL) {
2672 2676 return;
2673 2677 }
2674 2678 (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
2675 2679 }
2676 2680
2677 2681 void
2678 2682 vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2679 2683 {
2680 2684 if (vp == NULL || vp->v_femhead == NULL) {
2681 2685 return;
2682 2686 }
2683 2687 (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2684 2688 }
2685 2689
2686 2690 void
2687 2691 vnevent_truncate(vnode_t *vp, caller_context_t *ct)
2688 2692 {
2689 2693 if (vp == NULL || vp->v_femhead == NULL) {
2690 2694 return;
2691 2695 }
2692 2696 (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
2693 2697 }
2694 2698
2695 2699 /*
2696 2700 * Vnode accessors.
2697 2701 */
2698 2702
2699 2703 int
2700 2704 vn_is_readonly(vnode_t *vp)
2701 2705 {
2702 2706 return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2703 2707 }
2704 2708
2705 2709 int
2706 2710 vn_has_flocks(vnode_t *vp)
2707 2711 {
2708 2712 return (vp->v_filocks != NULL);
2709 2713 }
2710 2714
2711 2715 int
2712 2716 vn_has_mandatory_locks(vnode_t *vp, int mode)
2713 2717 {
2714 2718 return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2715 2719 }
2716 2720
2717 2721 int
2718 2722 vn_has_cached_data(vnode_t *vp)
2719 2723 {
2720 2724 return (vp->v_pages != NULL);
2721 2725 }
2722 2726
2723 2727 /*
2724 2728 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2725 2729 * zone_enter(2).
2726 2730 */
2727 2731 int
2728 2732 vn_can_change_zones(vnode_t *vp)
2729 2733 {
2730 2734 struct vfssw *vswp;
2731 2735 int allow = 1;
2732 2736 vnode_t *rvp;
2733 2737
2734 2738 if (nfs_global_client_only != 0)
2735 2739 return (1);
2736 2740
2737 2741 /*
2738 2742 * We always want to look at the underlying vnode if there is one.
2739 2743 */
2740 2744 if (VOP_REALVP(vp, &rvp, NULL) != 0)
2741 2745 rvp = vp;
2742 2746 /*
2743 2747 * Some pseudo filesystems (including doorfs) don't actually register
2744 2748 * their vfsops_t, so the following may return NULL; we happily let
2745 2749 * such vnodes switch zones.
2746 2750 */
2747 2751 vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2748 2752 if (vswp != NULL) {
2749 2753 if (vswp->vsw_flag & VSW_NOTZONESAFE)
2750 2754 allow = 0;
2751 2755 vfs_unrefvfssw(vswp);
2752 2756 }
2753 2757 return (allow);
2754 2758 }
2755 2759
2756 2760 /*
2757 2761 * Return nonzero if the vnode is a mount point, zero if not.
2758 2762 */
2759 2763 int
2760 2764 vn_ismntpt(vnode_t *vp)
2761 2765 {
2762 2766 return (vp->v_vfsmountedhere != NULL);
2763 2767 }
2764 2768
2765 2769 /* Retrieve the vfs (if any) mounted on this vnode */
2766 2770 vfs_t *
2767 2771 vn_mountedvfs(vnode_t *vp)
2768 2772 {
2769 2773 return (vp->v_vfsmountedhere);
2770 2774 }
2771 2775
2772 2776 /*
2773 2777 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2774 2778 */
2775 2779 int
2776 2780 vn_in_dnlc(vnode_t *vp)
2777 2781 {
2778 2782 return (vp->v_count_dnlc > 0);
2779 2783 }
2780 2784
2781 2785 /*
2782 2786 * vn_has_other_opens() checks whether a particular file is opened by more than
2783 2787 * just the caller and whether the open is for read and/or write.
2784 2788 * This routine is for calling after the caller has already called VOP_OPEN()
2785 2789 * and the caller wishes to know if they are the only one with it open for
2786 2790 * the mode(s) specified.
2787 2791 *
2788 2792 * Vnode counts are only kept on regular files (v_type=VREG).
2789 2793 */
2790 2794 int
2791 2795 vn_has_other_opens(
2792 2796 vnode_t *vp,
2793 2797 v_mode_t mode)
2794 2798 {
2795 2799
2796 2800 ASSERT(vp != NULL);
2797 2801
2798 2802 switch (mode) {
2799 2803 case V_WRITE:
2800 2804 if (vp->v_wrcnt > 1)
2801 2805 return (V_TRUE);
2802 2806 break;
2803 2807 case V_RDORWR:
2804 2808 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2805 2809 return (V_TRUE);
2806 2810 break;
2807 2811 case V_RDANDWR:
2808 2812 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2809 2813 return (V_TRUE);
2810 2814 break;
2811 2815 case V_READ:
2812 2816 if (vp->v_rdcnt > 1)
2813 2817 return (V_TRUE);
2814 2818 break;
2815 2819 }
2816 2820
2817 2821 return (V_FALSE);
2818 2822 }
2819 2823
2820 2824 /*
2821 2825 * vn_is_opened() checks whether a particular file is opened and
2822 2826 * whether the open is for read and/or write.
2823 2827 *
2824 2828 * Vnode counts are only kept on regular files (v_type=VREG).
2825 2829 */
2826 2830 int
2827 2831 vn_is_opened(
2828 2832 vnode_t *vp,
2829 2833 v_mode_t mode)
2830 2834 {
2831 2835
2832 2836 ASSERT(vp != NULL);
2833 2837
2834 2838 switch (mode) {
2835 2839 case V_WRITE:
2836 2840 if (vp->v_wrcnt)
2837 2841 return (V_TRUE);
2838 2842 break;
2839 2843 case V_RDANDWR:
2840 2844 if (vp->v_rdcnt && vp->v_wrcnt)
2841 2845 return (V_TRUE);
2842 2846 break;
2843 2847 case V_RDORWR:
2844 2848 if (vp->v_rdcnt || vp->v_wrcnt)
2845 2849 return (V_TRUE);
2846 2850 break;
2847 2851 case V_READ:
2848 2852 if (vp->v_rdcnt)
2849 2853 return (V_TRUE);
2850 2854 break;
2851 2855 }
2852 2856
2853 2857 return (V_FALSE);
2854 2858 }
2855 2859
2856 2860 /*
2857 2861 * vn_is_mapped() checks whether a particular file is mapped and whether
2858 2862 * the file is mapped read and/or write.
2859 2863 */
2860 2864 int
2861 2865 vn_is_mapped(
2862 2866 vnode_t *vp,
2863 2867 v_mode_t mode)
2864 2868 {
2865 2869
2866 2870 ASSERT(vp != NULL);
2867 2871
2868 2872 #if !defined(_LP64)
2869 2873 switch (mode) {
2870 2874 /*
2871 2875 * The atomic_add_64_nv functions force atomicity in the
2872 2876 * case of 32 bit architectures. Otherwise the 64 bit values
2873 2877 * require two fetches. The value of the fields may be
2874 2878 * (potentially) changed between the first fetch and the
2875 2879 * second
2876 2880 */
2877 2881 case V_WRITE:
2878 2882 if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2879 2883 return (V_TRUE);
2880 2884 break;
2881 2885 case V_RDANDWR:
2882 2886 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2883 2887 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2884 2888 return (V_TRUE);
2885 2889 break;
2886 2890 case V_RDORWR:
2887 2891 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2888 2892 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2889 2893 return (V_TRUE);
2890 2894 break;
2891 2895 case V_READ:
2892 2896 if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2893 2897 return (V_TRUE);
2894 2898 break;
2895 2899 }
2896 2900 #else
2897 2901 switch (mode) {
2898 2902 case V_WRITE:
2899 2903 if (vp->v_mmap_write)
2900 2904 return (V_TRUE);
2901 2905 break;
2902 2906 case V_RDANDWR:
2903 2907 if (vp->v_mmap_read && vp->v_mmap_write)
2904 2908 return (V_TRUE);
2905 2909 break;
2906 2910 case V_RDORWR:
2907 2911 if (vp->v_mmap_read || vp->v_mmap_write)
2908 2912 return (V_TRUE);
2909 2913 break;
2910 2914 case V_READ:
2911 2915 if (vp->v_mmap_read)
2912 2916 return (V_TRUE);
2913 2917 break;
2914 2918 }
2915 2919 #endif
2916 2920
2917 2921 return (V_FALSE);
2918 2922 }
2919 2923
2920 2924 /*
2921 2925 * Set the operations vector for a vnode.
2922 2926 *
2923 2927 * FEM ensures that the v_femhead pointer is filled in before the
2924 2928 * v_op pointer is changed. This means that if the v_femhead pointer
2925 2929 * is NULL, and the v_op field hasn't changed since before which checked
2926 2930 * the v_femhead pointer; then our update is ok - we are not racing with
2927 2931 * FEM.
2928 2932 */
2929 2933 void
2930 2934 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2931 2935 {
2932 2936 vnodeops_t *op;
2933 2937
2934 2938 ASSERT(vp != NULL);
2935 2939 ASSERT(vnodeops != NULL);
2936 2940
2937 2941 op = vp->v_op;
2938 2942 membar_consumer();
2939 2943 /*
2940 2944 * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2941 2945 * the compare-and-swap on vp->v_op. If either fails, then FEM is
2942 2946 * in effect on the vnode and we need to have FEM deal with it.
2943 2947 */
2944 2948 if (vp->v_femhead != NULL || atomic_cas_ptr(&vp->v_op, op, vnodeops) !=
2945 2949 op) {
2946 2950 fem_setvnops(vp, vnodeops);
2947 2951 }
2948 2952 }
2949 2953
2950 2954 /*
2951 2955 * Retrieve the operations vector for a vnode
2952 2956 * As with vn_setops(above); make sure we aren't racing with FEM.
2953 2957 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2954 2958 * make sense to the callers of this routine.
2955 2959 */
2956 2960 vnodeops_t *
2957 2961 vn_getops(vnode_t *vp)
2958 2962 {
2959 2963 vnodeops_t *op;
2960 2964
2961 2965 ASSERT(vp != NULL);
2962 2966
2963 2967 op = vp->v_op;
2964 2968 membar_consumer();
2965 2969 if (vp->v_femhead == NULL && op == vp->v_op) {
2966 2970 return (op);
2967 2971 } else {
2968 2972 return (fem_getvnops(vp));
2969 2973 }
2970 2974 }
2971 2975
2972 2976 /*
2973 2977 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2974 2978 * Returns zero (0) if not.
2975 2979 */
2976 2980 int
2977 2981 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2978 2982 {
2979 2983 return (vn_getops(vp) == vnodeops);
2980 2984 }
2981 2985
2982 2986 /*
2983 2987 * Returns non-zero (1) if the specified operation matches the
2984 2988 * corresponding operation for that the vnode.
2985 2989 * Returns zero (0) if not.
2986 2990 */
2987 2991
2988 2992 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2989 2993
2990 2994 int
2991 2995 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2992 2996 {
2993 2997 const fs_operation_trans_def_t *otdp;
2994 2998 fs_generic_func_p *loc = NULL;
2995 2999 vnodeops_t *vop = vn_getops(vp);
2996 3000
2997 3001 ASSERT(vopname != NULL);
2998 3002
2999 3003 for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
3000 3004 if (MATCHNAME(otdp->name, vopname)) {
3001 3005 loc = (fs_generic_func_p *)
3002 3006 ((char *)(vop) + otdp->offset);
3003 3007 break;
3004 3008 }
3005 3009 }
3006 3010
3007 3011 return ((loc != NULL) && (*loc == funcp));
3008 3012 }
3009 3013
3010 3014 /*
3011 3015 * fs_new_caller_id() needs to return a unique ID on a given local system.
3012 3016 * The IDs do not need to survive across reboots. These are primarily
3013 3017 * used so that (FEM) monitors can detect particular callers (such as
3014 3018 * the NFS server) to a given vnode/vfs operation.
3015 3019 */
3016 3020 u_longlong_t
3017 3021 fs_new_caller_id()
3018 3022 {
3019 3023 static uint64_t next_caller_id = 0LL; /* First call returns 1 */
3020 3024
3021 3025 return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
3022 3026 }
3023 3027
3024 3028 /*
3025 3029 * The value stored in v_path is relative to rootdir, located in the global
3026 3030 * zone. Zones or chroot environments which reside deeper inside the VFS
3027 3031 * hierarchy will have a relative view of MAXPATHLEN since they are unaware of
3028 3032 * what lies below their perceived root. In order to keep v_path usable for
3029 3033 * these child environments, its allocations are allowed to exceed MAXPATHLEN.
3030 3034 *
3031 3035 * An upper bound of max_vnode_path is placed upon v_path allocations to
3032 3036 * prevent the system from going too wild at the behest of pathological
3033 3037 * behavior from the operator.
3034 3038 */
3035 3039 size_t max_vnode_path = 4 * MAXPATHLEN;
3036 3040
3037 3041
3038 3042 void
3039 3043 vn_clearpath(vnode_t *vp, hrtime_t compare_stamp)
3040 3044 {
3041 3045 char *buf;
3042 3046
3043 3047 mutex_enter(&vp->v_lock);
3044 3048 /*
3045 3049 * If the snapshot of v_path_stamp passed in via compare_stamp does not
3046 3050 * match the present value on the vnode, it indicates that subsequent
3047 3051 * changes have occurred. The v_path value is not cleared in this case
3048 3052 * since the new value may be valid.
3049 3053 */
3050 3054 if (compare_stamp != 0 && vp->v_path_stamp != compare_stamp) {
3051 3055 mutex_exit(&vp->v_lock);
3052 3056 return;
3053 3057 }
3054 3058 buf = vp->v_path;
3055 3059 vp->v_path = vn_vpath_empty;
3056 3060 vp->v_path_stamp = 0;
3057 3061 mutex_exit(&vp->v_lock);
3058 3062 if (buf != vn_vpath_empty) {
3059 3063 kmem_free(buf, strlen(buf) + 1);
3060 3064 }
3061 3065 }
3062 3066
3063 3067 static void
3064 3068 vn_setpath_common(vnode_t *pvp, vnode_t *vp, const char *name, size_t len,
3065 3069 boolean_t is_rename)
3066 3070 {
3067 3071 char *buf, *oldbuf;
3068 3072 hrtime_t pstamp;
3069 3073 size_t baselen, buflen = 0;
3070 3074
3071 3075 /* Handle the vn_setpath_str case. */
3072 3076 if (pvp == NULL) {
3073 3077 if (len + 1 > max_vnode_path) {
3074 3078 DTRACE_PROBE4(vn__setpath__too__long, vnode_t *, pvp,
3075 3079 vnode_t *, vp, char *, name, size_t, len + 1);
3076 3080 return;
3077 3081 }
3078 3082 buf = kmem_alloc(len + 1, KM_SLEEP);
3079 3083 bcopy(name, buf, len);
3080 3084 buf[len] = '\0';
3081 3085
3082 3086 mutex_enter(&vp->v_lock);
3083 3087 oldbuf = vp->v_path;
3084 3088 vp->v_path = buf;
3085 3089 vp->v_path_stamp = gethrtime();
3086 3090 mutex_exit(&vp->v_lock);
3087 3091 if (oldbuf != vn_vpath_empty) {
3088 3092 kmem_free(oldbuf, strlen(oldbuf) + 1);
3089 3093 }
3090 3094 return;
3091 3095 }
3092 3096
3093 3097 /* Take snapshot of parent dir */
3094 3098 mutex_enter(&pvp->v_lock);
3095 3099
3096 3100 if ((pvp->v_flag & VTRAVERSE) != 0) {
3097 3101 /*
3098 3102 * When the parent vnode has VTRAVERSE set in its flags, normal
3099 3103 * assumptions about v_path calculation no longer apply. The
3100 3104 * primary situation where this occurs is via the VFS tricks
3101 3105 * which procfs plays in order to allow /proc/PID/(root|cwd) to
3102 3106 * yield meaningful results.
3103 3107 *
3104 3108 * When this flag is set, v_path on the child must not be
3105 3109 * updated since the calculated value is likely to be
3106 3110 * incorrect, given the current context.
3107 3111 */
3108 3112 mutex_exit(&pvp->v_lock);
3109 3113 return;
3110 3114 }
3111 3115
3112 3116 retrybuf:
3113 3117 if (pvp->v_path == vn_vpath_empty) {
3114 3118 /*
3115 3119 * Without v_path from the parent directory, generating a child
3116 3120 * path from the name is impossible.
3117 3121 */
3118 3122 if (len > 0) {
3119 3123 pstamp = pvp->v_path_stamp;
3120 3124 mutex_exit(&pvp->v_lock);
3121 3125 vn_clearpath(vp, pstamp);
3122 3126 return;
3123 3127 }
3124 3128
3125 3129 /*
3126 3130 * The only feasible case here is where a NUL lookup is being
3127 3131 * performed on rootdir prior to its v_path being populated.
3128 3132 */
3129 3133 ASSERT(pvp->v_path_stamp == 0);
3130 3134 baselen = 0;
3131 3135 pstamp = 0;
3132 3136 } else {
3133 3137 pstamp = pvp->v_path_stamp;
3134 3138 baselen = strlen(pvp->v_path);
3135 3139 /* ignore a trailing slash if present */
3136 3140 if (pvp->v_path[baselen - 1] == '/') {
3137 3141 /* This should only the be case for rootdir */
3138 3142 ASSERT(baselen == 1 && pvp == rootdir);
3139 3143 baselen--;
3140 3144 }
3141 3145 }
3142 3146 mutex_exit(&pvp->v_lock);
3143 3147
3144 3148 if (buflen != 0) {
3145 3149 /* Free the existing (mis-sized) buffer in case of retry */
3146 3150 kmem_free(buf, buflen);
3147 3151 }
3148 3152 /* base, '/', name and trailing NUL */
3149 3153 buflen = baselen + len + 2;
3150 3154 if (buflen > max_vnode_path) {
3151 3155 DTRACE_PROBE4(vn__setpath_too__long, vnode_t *, pvp,
3152 3156 vnode_t *, vp, char *, name, size_t, buflen);
3153 3157 return;
3154 3158 }
3155 3159 buf = kmem_alloc(buflen, KM_SLEEP);
3156 3160
3157 3161 mutex_enter(&pvp->v_lock);
3158 3162 if (pvp->v_path_stamp != pstamp) {
3159 3163 size_t vlen;
3160 3164
3161 3165 /*
3162 3166 * Since v_path_stamp changed on the parent, it is likely that
3163 3167 * v_path has been altered as well. If the length does not
3164 3168 * exactly match what was previously measured, the buffer
3165 3169 * allocation must be repeated for proper sizing.
3166 3170 */
3167 3171 if (pvp->v_path == vn_vpath_empty) {
3168 3172 /* Give up if parent lack v_path */
3169 3173 mutex_exit(&pvp->v_lock);
3170 3174 kmem_free(buf, buflen);
3171 3175 return;
3172 3176 }
3173 3177 vlen = strlen(pvp->v_path);
3174 3178 if (pvp->v_path[vlen - 1] == '/') {
3175 3179 vlen--;
3176 3180 }
3177 3181 if (vlen != baselen) {
3178 3182 goto retrybuf;
3179 3183 }
3180 3184 }
3181 3185 bcopy(pvp->v_path, buf, baselen);
3182 3186 mutex_exit(&pvp->v_lock);
3183 3187
3184 3188 buf[baselen] = '/';
3185 3189 baselen++;
3186 3190 bcopy(name, &buf[baselen], len + 1);
3187 3191
3188 3192 mutex_enter(&vp->v_lock);
3189 3193 if (vp->v_path_stamp == 0) {
3190 3194 /* never-visited vnode can inherit stamp from parent */
3191 3195 ASSERT(vp->v_path == vn_vpath_empty);
3192 3196 vp->v_path_stamp = pstamp;
3193 3197 vp->v_path = buf;
3194 3198 mutex_exit(&vp->v_lock);
3195 3199 } else if (vp->v_path_stamp < pstamp || is_rename) {
3196 3200 /*
3197 3201 * Install the updated path and stamp, ensuring that the v_path
3198 3202 * pointer is valid at all times for dtrace.
3199 3203 */
3200 3204 oldbuf = vp->v_path;
3201 3205 vp->v_path = buf;
3202 3206 vp->v_path_stamp = gethrtime();
3203 3207 mutex_exit(&vp->v_lock);
3204 3208 kmem_free(oldbuf, strlen(oldbuf) + 1);
3205 3209 } else {
3206 3210 /*
3207 3211 * If the timestamp matches or is greater, it means another
3208 3212 * thread performed the update first while locks were dropped
3209 3213 * here to make the allocation. We defer to the newer value.
3210 3214 */
3211 3215 mutex_exit(&vp->v_lock);
3212 3216 kmem_free(buf, buflen);
3213 3217 }
3214 3218 ASSERT(MUTEX_NOT_HELD(&vp->v_lock));
3215 3219 }
3216 3220
3217 3221 void
3218 3222 vn_updatepath(vnode_t *pvp, vnode_t *vp, const char *name)
3219 3223 {
3220 3224 size_t len;
3221 3225
3222 3226 /*
3223 3227 * If the parent is older or empty, there's nothing further to do.
3224 3228 */
3225 3229 if (pvp->v_path == vn_vpath_empty ||
3226 3230 pvp->v_path_stamp <= vp->v_path_stamp) {
3227 3231 return;
3228 3232 }
3229 3233
3230 3234 /*
3231 3235 * Given the lack of appropriate context, meaningful updates to v_path
3232 3236 * cannot be made for during lookups for the '.' or '..' entries.
3233 3237 */
3234 3238 len = strlen(name);
3235 3239 if (len == 0 || (len == 1 && name[0] == '.') ||
3236 3240 (len == 2 && name[0] == '.' && name[1] == '.')) {
3237 3241 return;
3238 3242 }
3239 3243
3240 3244 vn_setpath_common(pvp, vp, name, len, B_FALSE);
3241 3245 }
3242 3246
3243 3247 /*
3244 3248 * Given a starting vnode and a path, updates the path in the target vnode in
3245 3249 * a safe manner. If the vnode already has path information embedded, then the
3246 3250 * cached path is left untouched.
3247 3251 */
3248 3252 /* ARGSUSED */
3249 3253 void
3250 3254 vn_setpath(vnode_t *rootvp, vnode_t *pvp, vnode_t *vp, const char *name,
3251 3255 size_t len)
3252 3256 {
3253 3257 vn_setpath_common(pvp, vp, name, len, B_FALSE);
3254 3258 }
3255 3259
3256 3260 /*
3257 3261 * Sets the path to the vnode to be the given string, regardless of current
3258 3262 * context. The string must be a complete path from rootdir. This is only used
3259 3263 * by fsop_root() for setting the path based on the mountpoint.
3260 3264 */
3261 3265 void
3262 3266 vn_setpath_str(vnode_t *vp, const char *str, size_t len)
3263 3267 {
3264 3268 vn_setpath_common(NULL, vp, str, len, B_FALSE);
3265 3269 }
3266 3270
3267 3271 /*
3268 3272 * Called from within filesystem's vop_rename() to handle renames once the
3269 3273 * target vnode is available.
3270 3274 */
3271 3275 void
3272 3276 vn_renamepath(vnode_t *pvp, vnode_t *vp, const char *name, size_t len)
3273 3277 {
3274 3278 vn_setpath_common(pvp, vp, name, len, B_TRUE);
3275 3279 }
3276 3280
3277 3281 /*
3278 3282 * Similar to vn_setpath_str(), this function sets the path of the destination
3279 3283 * vnode to the be the same as the source vnode.
3280 3284 */
3281 3285 void
3282 3286 vn_copypath(struct vnode *src, struct vnode *dst)
3283 3287 {
3284 3288 char *buf;
3285 3289 hrtime_t stamp;
3286 3290 size_t buflen;
3287 3291
3288 3292 mutex_enter(&src->v_lock);
3289 3293 if (src->v_path == vn_vpath_empty) {
3290 3294 mutex_exit(&src->v_lock);
3291 3295 return;
3292 3296 }
3293 3297 buflen = strlen(src->v_path) + 1;
3294 3298 mutex_exit(&src->v_lock);
3295 3299
3296 3300 buf = kmem_alloc(buflen, KM_SLEEP);
3297 3301
3298 3302 mutex_enter(&src->v_lock);
3299 3303 if (src->v_path == vn_vpath_empty ||
3300 3304 strlen(src->v_path) + 1 != buflen) {
3301 3305 mutex_exit(&src->v_lock);
3302 3306 kmem_free(buf, buflen);
3303 3307 return;
3304 3308 }
3305 3309 bcopy(src->v_path, buf, buflen);
3306 3310 stamp = src->v_path_stamp;
3307 3311 mutex_exit(&src->v_lock);
3308 3312
3309 3313 mutex_enter(&dst->v_lock);
3310 3314 if (dst->v_path != vn_vpath_empty) {
3311 3315 mutex_exit(&dst->v_lock);
3312 3316 kmem_free(buf, buflen);
3313 3317 return;
3314 3318 }
3315 3319 dst->v_path = buf;
3316 3320 dst->v_path_stamp = stamp;
3317 3321 mutex_exit(&dst->v_lock);
3318 3322 }
3319 3323
3320 3324
3321 3325 /*
3322 3326 * XXX Private interface for segvn routines that handle vnode
3323 3327 * large page segments.
3324 3328 *
3325 3329 * return 1 if vp's file system VOP_PAGEIO() implementation
3326 3330 * can be safely used instead of VOP_GETPAGE() for handling
3327 3331 * pagefaults against regular non swap files. VOP_PAGEIO()
3328 3332 * interface is considered safe here if its implementation
3329 3333 * is very close to VOP_GETPAGE() implementation.
3330 3334 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3331 3335 * panic if there're file holes but instead returns an error.
3332 3336 * Doesn't assume file won't be changed by user writes, etc.
3333 3337 *
3334 3338 * return 0 otherwise.
3335 3339 *
3336 3340 * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
3337 3341 */
3338 3342 int
3339 3343 vn_vmpss_usepageio(vnode_t *vp)
3340 3344 {
3341 3345 vfs_t *vfsp = vp->v_vfsp;
3342 3346 char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3343 3347 char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3344 3348 char **fsok = pageio_ok_fss;
3345 3349
3346 3350 if (fsname == NULL) {
3347 3351 return (0);
3348 3352 }
3349 3353
3350 3354 for (; *fsok; fsok++) {
3351 3355 if (strcmp(*fsok, fsname) == 0) {
3352 3356 return (1);
3353 3357 }
3354 3358 }
3355 3359 return (0);
3356 3360 }
3357 3361
3358 3362 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3359 3363
3360 3364 int
3361 3365 fop_open(
3362 3366 vnode_t **vpp,
3363 3367 int mode,
3364 3368 cred_t *cr,
3365 3369 caller_context_t *ct)
3366 3370 {
3367 3371 int ret;
3368 3372 vnode_t *vp = *vpp;
3369 3373
3370 3374 VN_HOLD(vp);
3371 3375 /*
3372 3376 * Adding to the vnode counts before calling open
3373 3377 * avoids the need for a mutex. It circumvents a race
3374 3378 * condition where a query made on the vnode counts results in a
3375 3379 * false negative. The inquirer goes away believing the file is
3376 3380 * not open when there is an open on the file already under way.
3377 3381 *
3378 3382 * The counts are meant to prevent NFS from granting a delegation
3379 3383 * when it would be dangerous to do so.
3380 3384 *
3381 3385 * The vnode counts are only kept on regular files
3382 3386 */
3383 3387 if ((*vpp)->v_type == VREG) {
3384 3388 if (mode & FREAD)
3385 3389 atomic_inc_32(&(*vpp)->v_rdcnt);
3386 3390 if (mode & FWRITE)
3387 3391 atomic_inc_32(&(*vpp)->v_wrcnt);
3388 3392 }
3389 3393
3390 3394 VOPXID_MAP_CR(vp, cr);
3391 3395
3392 3396 ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
3393 3397
3394 3398 if (ret) {
3395 3399 /*
3396 3400 * Use the saved vp just in case the vnode ptr got trashed
3397 3401 * by the error.
3398 3402 */
3399 3403 VOPSTATS_UPDATE(vp, open);
3400 3404 if ((vp->v_type == VREG) && (mode & FREAD))
3401 3405 atomic_dec_32(&vp->v_rdcnt);
3402 3406 if ((vp->v_type == VREG) && (mode & FWRITE))
3403 3407 atomic_dec_32(&vp->v_wrcnt);
3404 3408 } else {
3405 3409 /*
3406 3410 * Some filesystems will return a different vnode,
3407 3411 * but the same path was still used to open it.
3408 3412 * So if we do change the vnode and need to
3409 3413 * copy over the path, do so here, rather than special
3410 3414 * casing each filesystem. Adjust the vnode counts to
3411 3415 * reflect the vnode switch.
3412 3416 */
3413 3417 VOPSTATS_UPDATE(*vpp, open);
3414 3418 if (*vpp != vp) {
3415 3419 vn_copypath(vp, *vpp);
3416 3420 if (((*vpp)->v_type == VREG) && (mode & FREAD))
3417 3421 atomic_inc_32(&(*vpp)->v_rdcnt);
3418 3422 if ((vp->v_type == VREG) && (mode & FREAD))
3419 3423 atomic_dec_32(&vp->v_rdcnt);
3420 3424 if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3421 3425 atomic_inc_32(&(*vpp)->v_wrcnt);
3422 3426 if ((vp->v_type == VREG) && (mode & FWRITE))
3423 3427 atomic_dec_32(&vp->v_wrcnt);
3424 3428 }
3425 3429 }
3426 3430 VN_RELE(vp);
3427 3431 return (ret);
3428 3432 }
3429 3433
3430 3434 int
3431 3435 fop_close(
3432 3436 vnode_t *vp,
3433 3437 int flag,
3434 3438 int count,
3435 3439 offset_t offset,
3436 3440 cred_t *cr,
3437 3441 caller_context_t *ct)
3438 3442 {
3439 3443 int err;
3440 3444
3441 3445 VOPXID_MAP_CR(vp, cr);
3442 3446
3443 3447 err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct);
3444 3448 VOPSTATS_UPDATE(vp, close);
3445 3449 /*
3446 3450 * Check passed in count to handle possible dups. Vnode counts are only
3447 3451 * kept on regular files
3448 3452 */
3449 3453 if ((vp->v_type == VREG) && (count == 1)) {
3450 3454 if (flag & FREAD) {
3451 3455 ASSERT(vp->v_rdcnt > 0);
3452 3456 atomic_dec_32(&vp->v_rdcnt);
3453 3457 }
3454 3458 if (flag & FWRITE) {
3455 3459 ASSERT(vp->v_wrcnt > 0);
3456 3460 atomic_dec_32(&vp->v_wrcnt);
3457 3461 }
3458 3462 }
3459 3463 return (err);
3460 3464 }
3461 3465
3462 3466 int
3463 3467 fop_read(
3464 3468 vnode_t *vp,
3465 3469 uio_t *uiop,
3466 3470 int ioflag,
3467 3471 cred_t *cr,
3468 3472 caller_context_t *ct)
3469 3473 {
3470 3474 int err;
3471 3475 ssize_t resid_start = uiop->uio_resid;
3472 3476
3473 3477 VOPXID_MAP_CR(vp, cr);
3474 3478
3475 3479 err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
3476 3480 VOPSTATS_UPDATE_IO(vp, read,
3477 3481 read_bytes, (resid_start - uiop->uio_resid));
3478 3482 return (err);
3479 3483 }
3480 3484
3481 3485 int
3482 3486 fop_write(
3483 3487 vnode_t *vp,
3484 3488 uio_t *uiop,
3485 3489 int ioflag,
3486 3490 cred_t *cr,
3487 3491 caller_context_t *ct)
3488 3492 {
3489 3493 int err;
3490 3494 ssize_t resid_start = uiop->uio_resid;
3491 3495
3492 3496 VOPXID_MAP_CR(vp, cr);
3493 3497
3494 3498 err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
3495 3499 VOPSTATS_UPDATE_IO(vp, write,
3496 3500 write_bytes, (resid_start - uiop->uio_resid));
3497 3501 return (err);
3498 3502 }
3499 3503
3500 3504 int
3501 3505 fop_ioctl(
3502 3506 vnode_t *vp,
3503 3507 int cmd,
3504 3508 intptr_t arg,
3505 3509 int flag,
3506 3510 cred_t *cr,
3507 3511 int *rvalp,
3508 3512 caller_context_t *ct)
3509 3513 {
3510 3514 int err;
3511 3515
3512 3516 VOPXID_MAP_CR(vp, cr);
3513 3517
3514 3518 err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct);
3515 3519 VOPSTATS_UPDATE(vp, ioctl);
3516 3520 return (err);
3517 3521 }
3518 3522
3519 3523 int
3520 3524 fop_setfl(
3521 3525 vnode_t *vp,
3522 3526 int oflags,
3523 3527 int nflags,
3524 3528 cred_t *cr,
3525 3529 caller_context_t *ct)
3526 3530 {
3527 3531 int err;
3528 3532
3529 3533 VOPXID_MAP_CR(vp, cr);
3530 3534
3531 3535 err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct);
3532 3536 VOPSTATS_UPDATE(vp, setfl);
3533 3537 return (err);
3534 3538 }
3535 3539
3536 3540 int
3537 3541 fop_getattr(
3538 3542 vnode_t *vp,
3539 3543 vattr_t *vap,
3540 3544 int flags,
3541 3545 cred_t *cr,
3542 3546 caller_context_t *ct)
3543 3547 {
3544 3548 int err;
3545 3549
3546 3550 VOPXID_MAP_CR(vp, cr);
3547 3551
3548 3552 /*
3549 3553 * If this file system doesn't understand the xvattr extensions
3550 3554 * then turn off the xvattr bit.
3551 3555 */
3552 3556 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3553 3557 vap->va_mask &= ~AT_XVATTR;
3554 3558 }
3555 3559
3556 3560 /*
3557 3561 * We're only allowed to skip the ACL check iff we used a 32 bit
3558 3562 * ACE mask with VOP_ACCESS() to determine permissions.
3559 3563 */
3560 3564 if ((flags & ATTR_NOACLCHECK) &&
3561 3565 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3562 3566 return (EINVAL);
3563 3567 }
3564 3568 err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct);
3565 3569 VOPSTATS_UPDATE(vp, getattr);
3566 3570 return (err);
3567 3571 }
3568 3572
3569 3573 int
3570 3574 fop_setattr(
3571 3575 vnode_t *vp,
3572 3576 vattr_t *vap,
3573 3577 int flags,
3574 3578 cred_t *cr,
3575 3579 caller_context_t *ct)
3576 3580 {
3577 3581 int err;
3578 3582
3579 3583 VOPXID_MAP_CR(vp, cr);
3580 3584
3581 3585 /*
3582 3586 * If this file system doesn't understand the xvattr extensions
3583 3587 * then turn off the xvattr bit.
3584 3588 */
3585 3589 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3586 3590 vap->va_mask &= ~AT_XVATTR;
3587 3591 }
3588 3592
3589 3593 /*
3590 3594 * We're only allowed to skip the ACL check iff we used a 32 bit
3591 3595 * ACE mask with VOP_ACCESS() to determine permissions.
3592 3596 */
3593 3597 if ((flags & ATTR_NOACLCHECK) &&
3594 3598 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3595 3599 return (EINVAL);
3596 3600 }
3597 3601 err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
3598 3602 VOPSTATS_UPDATE(vp, setattr);
3599 3603 return (err);
3600 3604 }
3601 3605
3602 3606 int
3603 3607 fop_access(
3604 3608 vnode_t *vp,
3605 3609 int mode,
3606 3610 int flags,
3607 3611 cred_t *cr,
3608 3612 caller_context_t *ct)
3609 3613 {
3610 3614 int err;
3611 3615
3612 3616 if ((flags & V_ACE_MASK) &&
3613 3617 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3614 3618 return (EINVAL);
3615 3619 }
3616 3620
3617 3621 VOPXID_MAP_CR(vp, cr);
3618 3622
3619 3623 err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct);
3620 3624 VOPSTATS_UPDATE(vp, access);
3621 3625 return (err);
3622 3626 }
3623 3627
3624 3628 int
3625 3629 fop_lookup(
3626 3630 vnode_t *dvp,
3627 3631 char *nm,
3628 3632 vnode_t **vpp,
3629 3633 pathname_t *pnp,
3630 3634 int flags,
3631 3635 vnode_t *rdir,
3632 3636 cred_t *cr,
3633 3637 caller_context_t *ct,
3634 3638 int *deflags, /* Returned per-dirent flags */
3635 3639 pathname_t *ppnp) /* Returned case-preserved name in directory */
3636 3640 {
3637 3641 int ret;
3638 3642
3639 3643 /*
3640 3644 * If this file system doesn't support case-insensitive access
3641 3645 * and said access is requested, fail quickly. It is required
3642 3646 * that if the vfs supports case-insensitive lookup, it also
3643 3647 * supports extended dirent flags.
3644 3648 */
3645 3649 if (flags & FIGNORECASE &&
3646 3650 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3647 3651 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3648 3652 return (EINVAL);
3649 3653
3650 3654 VOPXID_MAP_CR(dvp, cr);
3651 3655
3652 3656 if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3653 3657 ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3654 3658 } else {
3655 3659 ret = (*(dvp)->v_op->vop_lookup)
3656 3660 (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
3657 3661 }
3658 3662 if (ret == 0 && *vpp) {
3659 3663 VOPSTATS_UPDATE(*vpp, lookup);
3660 3664 vn_updatepath(dvp, *vpp, nm);
3661 3665 }
3662 3666
3663 3667 return (ret);
3664 3668 }
3665 3669
3666 3670 int
3667 3671 fop_create(
3668 3672 vnode_t *dvp,
3669 3673 char *name,
3670 3674 vattr_t *vap,
3671 3675 vcexcl_t excl,
3672 3676 int mode,
3673 3677 vnode_t **vpp,
3674 3678 cred_t *cr,
3675 3679 int flags,
3676 3680 caller_context_t *ct,
3677 3681 vsecattr_t *vsecp) /* ACL to set during create */
3678 3682 {
3679 3683 int ret;
3680 3684
3681 3685 if (vsecp != NULL &&
3682 3686 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3683 3687 return (EINVAL);
3684 3688 }
3685 3689 /*
3686 3690 * If this file system doesn't support case-insensitive access
3687 3691 * and said access is requested, fail quickly.
3688 3692 */
3689 3693 if (flags & FIGNORECASE &&
3690 3694 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3691 3695 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3692 3696 return (EINVAL);
3693 3697
3694 3698 VOPXID_MAP_CR(dvp, cr);
3695 3699
3696 3700 ret = (*(dvp)->v_op->vop_create)
3697 3701 (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
3698 3702 if (ret == 0 && *vpp) {
3699 3703 VOPSTATS_UPDATE(*vpp, create);
3700 3704 vn_updatepath(dvp, *vpp, name);
3701 3705 }
3702 3706
3703 3707 return (ret);
3704 3708 }
3705 3709
3706 3710 int
3707 3711 fop_remove(
3708 3712 vnode_t *dvp,
3709 3713 char *nm,
3710 3714 cred_t *cr,
3711 3715 caller_context_t *ct,
3712 3716 int flags)
3713 3717 {
3714 3718 int err;
3715 3719
3716 3720 /*
3717 3721 * If this file system doesn't support case-insensitive access
3718 3722 * and said access is requested, fail quickly.
3719 3723 */
3720 3724 if (flags & FIGNORECASE &&
3721 3725 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3722 3726 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3723 3727 return (EINVAL);
3724 3728
3725 3729 VOPXID_MAP_CR(dvp, cr);
3726 3730
3727 3731 err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags);
3728 3732 VOPSTATS_UPDATE(dvp, remove);
3729 3733 return (err);
3730 3734 }
3731 3735
3732 3736 int
3733 3737 fop_link(
3734 3738 vnode_t *tdvp,
3735 3739 vnode_t *svp,
3736 3740 char *tnm,
3737 3741 cred_t *cr,
3738 3742 caller_context_t *ct,
3739 3743 int flags)
3740 3744 {
3741 3745 int err;
3742 3746
3743 3747 /*
3744 3748 * If the target file system doesn't support case-insensitive access
3745 3749 * and said access is requested, fail quickly.
3746 3750 */
3747 3751 if (flags & FIGNORECASE &&
3748 3752 (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3749 3753 vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3750 3754 return (EINVAL);
3751 3755
3752 3756 VOPXID_MAP_CR(tdvp, cr);
3753 3757
3754 3758 err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags);
3755 3759 VOPSTATS_UPDATE(tdvp, link);
3756 3760 return (err);
3757 3761 }
3758 3762
3759 3763 int
3760 3764 fop_rename(
3761 3765 vnode_t *sdvp,
3762 3766 char *snm,
3763 3767 vnode_t *tdvp,
3764 3768 char *tnm,
3765 3769 cred_t *cr,
3766 3770 caller_context_t *ct,
3767 3771 int flags)
3768 3772 {
3769 3773 int err;
3770 3774
3771 3775 /*
3772 3776 * If the file system involved does not support
3773 3777 * case-insensitive access and said access is requested, fail
3774 3778 * quickly.
3775 3779 */
3776 3780 if (flags & FIGNORECASE &&
3777 3781 ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3778 3782 vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3779 3783 return (EINVAL);
3780 3784
3781 3785 VOPXID_MAP_CR(tdvp, cr);
3782 3786
3783 3787 err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags);
3784 3788 VOPSTATS_UPDATE(sdvp, rename);
3785 3789 return (err);
3786 3790 }
3787 3791
3788 3792 int
3789 3793 fop_mkdir(
3790 3794 vnode_t *dvp,
3791 3795 char *dirname,
3792 3796 vattr_t *vap,
3793 3797 vnode_t **vpp,
3794 3798 cred_t *cr,
3795 3799 caller_context_t *ct,
3796 3800 int flags,
3797 3801 vsecattr_t *vsecp) /* ACL to set during create */
3798 3802 {
3799 3803 int ret;
3800 3804
3801 3805 if (vsecp != NULL &&
3802 3806 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3803 3807 return (EINVAL);
3804 3808 }
3805 3809 /*
3806 3810 * If this file system doesn't support case-insensitive access
3807 3811 * and said access is requested, fail quickly.
3808 3812 */
3809 3813 if (flags & FIGNORECASE &&
3810 3814 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3811 3815 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3812 3816 return (EINVAL);
3813 3817
3814 3818 VOPXID_MAP_CR(dvp, cr);
3815 3819
3816 3820 ret = (*(dvp)->v_op->vop_mkdir)
3817 3821 (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
3818 3822 if (ret == 0 && *vpp) {
3819 3823 VOPSTATS_UPDATE(*vpp, mkdir);
3820 3824 vn_updatepath(dvp, *vpp, dirname);
3821 3825 }
3822 3826
3823 3827 return (ret);
3824 3828 }
3825 3829
3826 3830 int
3827 3831 fop_rmdir(
3828 3832 vnode_t *dvp,
3829 3833 char *nm,
3830 3834 vnode_t *cdir,
3831 3835 cred_t *cr,
3832 3836 caller_context_t *ct,
3833 3837 int flags)
3834 3838 {
3835 3839 int err;
3836 3840
3837 3841 /*
3838 3842 * If this file system doesn't support case-insensitive access
3839 3843 * and said access is requested, fail quickly.
3840 3844 */
3841 3845 if (flags & FIGNORECASE &&
3842 3846 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3843 3847 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3844 3848 return (EINVAL);
3845 3849
3846 3850 VOPXID_MAP_CR(dvp, cr);
3847 3851
3848 3852 err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags);
3849 3853 VOPSTATS_UPDATE(dvp, rmdir);
3850 3854 return (err);
3851 3855 }
3852 3856
3853 3857 int
3854 3858 fop_readdir(
3855 3859 vnode_t *vp,
3856 3860 uio_t *uiop,
3857 3861 cred_t *cr,
3858 3862 int *eofp,
3859 3863 caller_context_t *ct,
3860 3864 int flags)
3861 3865 {
3862 3866 int err;
3863 3867 ssize_t resid_start = uiop->uio_resid;
3864 3868
3865 3869 /*
3866 3870 * If this file system doesn't support retrieving directory
3867 3871 * entry flags and said access is requested, fail quickly.
3868 3872 */
3869 3873 if (flags & V_RDDIR_ENTFLAGS &&
3870 3874 vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3871 3875 return (EINVAL);
3872 3876
3873 3877 VOPXID_MAP_CR(vp, cr);
3874 3878
3875 3879 err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags);
3876 3880 VOPSTATS_UPDATE_IO(vp, readdir,
3877 3881 readdir_bytes, (resid_start - uiop->uio_resid));
3878 3882 return (err);
3879 3883 }
3880 3884
3881 3885 int
3882 3886 fop_symlink(
3883 3887 vnode_t *dvp,
3884 3888 char *linkname,
3885 3889 vattr_t *vap,
3886 3890 char *target,
3887 3891 cred_t *cr,
3888 3892 caller_context_t *ct,
3889 3893 int flags)
3890 3894 {
3891 3895 int err;
3892 3896 xvattr_t xvattr;
3893 3897
3894 3898 /*
3895 3899 * If this file system doesn't support case-insensitive access
3896 3900 * and said access is requested, fail quickly.
3897 3901 */
3898 3902 if (flags & FIGNORECASE &&
3899 3903 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3900 3904 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3901 3905 return (EINVAL);
3902 3906
3903 3907 VOPXID_MAP_CR(dvp, cr);
3904 3908
3905 3909 /* check for reparse point */
3906 3910 if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3907 3911 (strncmp(target, FS_REPARSE_TAG_STR,
3908 3912 strlen(FS_REPARSE_TAG_STR)) == 0)) {
3909 3913 if (!fs_reparse_mark(target, vap, &xvattr))
3910 3914 vap = (vattr_t *)&xvattr;
3911 3915 }
3912 3916
3913 3917 err = (*(dvp)->v_op->vop_symlink)
3914 3918 (dvp, linkname, vap, target, cr, ct, flags);
3915 3919 VOPSTATS_UPDATE(dvp, symlink);
3916 3920 return (err);
3917 3921 }
3918 3922
3919 3923 int
3920 3924 fop_readlink(
3921 3925 vnode_t *vp,
3922 3926 uio_t *uiop,
3923 3927 cred_t *cr,
3924 3928 caller_context_t *ct)
3925 3929 {
3926 3930 int err;
3927 3931
3928 3932 VOPXID_MAP_CR(vp, cr);
3929 3933
3930 3934 err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct);
3931 3935 VOPSTATS_UPDATE(vp, readlink);
3932 3936 return (err);
3933 3937 }
3934 3938
3935 3939 int
3936 3940 fop_fsync(
3937 3941 vnode_t *vp,
3938 3942 int syncflag,
3939 3943 cred_t *cr,
3940 3944 caller_context_t *ct)
3941 3945 {
3942 3946 int err;
3943 3947
3944 3948 VOPXID_MAP_CR(vp, cr);
3945 3949
3946 3950 err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct);
3947 3951 VOPSTATS_UPDATE(vp, fsync);
3948 3952 return (err);
3949 3953 }
3950 3954
3951 3955 void
3952 3956 fop_inactive(
3953 3957 vnode_t *vp,
3954 3958 cred_t *cr,
3955 3959 caller_context_t *ct)
3956 3960 {
3957 3961 /* Need to update stats before vop call since we may lose the vnode */
3958 3962 VOPSTATS_UPDATE(vp, inactive);
3959 3963
3960 3964 VOPXID_MAP_CR(vp, cr);
3961 3965
3962 3966 (*(vp)->v_op->vop_inactive)(vp, cr, ct);
3963 3967 }
3964 3968
3965 3969 int
3966 3970 fop_fid(
3967 3971 vnode_t *vp,
3968 3972 fid_t *fidp,
3969 3973 caller_context_t *ct)
3970 3974 {
3971 3975 int err;
3972 3976
3973 3977 err = (*(vp)->v_op->vop_fid)(vp, fidp, ct);
3974 3978 VOPSTATS_UPDATE(vp, fid);
3975 3979 return (err);
3976 3980 }
3977 3981
3978 3982 int
3979 3983 fop_rwlock(
3980 3984 vnode_t *vp,
3981 3985 int write_lock,
3982 3986 caller_context_t *ct)
3983 3987 {
3984 3988 int ret;
3985 3989
3986 3990 ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
3987 3991 VOPSTATS_UPDATE(vp, rwlock);
3988 3992 return (ret);
3989 3993 }
3990 3994
3991 3995 void
3992 3996 fop_rwunlock(
3993 3997 vnode_t *vp,
3994 3998 int write_lock,
3995 3999 caller_context_t *ct)
3996 4000 {
3997 4001 (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
3998 4002 VOPSTATS_UPDATE(vp, rwunlock);
3999 4003 }
4000 4004
4001 4005 int
4002 4006 fop_seek(
4003 4007 vnode_t *vp,
4004 4008 offset_t ooff,
4005 4009 offset_t *noffp,
4006 4010 caller_context_t *ct)
4007 4011 {
4008 4012 int err;
4009 4013
4010 4014 err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct);
4011 4015 VOPSTATS_UPDATE(vp, seek);
4012 4016 return (err);
4013 4017 }
4014 4018
4015 4019 int
4016 4020 fop_cmp(
4017 4021 vnode_t *vp1,
4018 4022 vnode_t *vp2,
4019 4023 caller_context_t *ct)
4020 4024 {
4021 4025 int err;
4022 4026
4023 4027 err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct);
4024 4028 VOPSTATS_UPDATE(vp1, cmp);
4025 4029 return (err);
4026 4030 }
4027 4031
4028 4032 int
4029 4033 fop_frlock(
4030 4034 vnode_t *vp,
4031 4035 int cmd,
4032 4036 flock64_t *bfp,
4033 4037 int flag,
4034 4038 offset_t offset,
4035 4039 struct flk_callback *flk_cbp,
4036 4040 cred_t *cr,
4037 4041 caller_context_t *ct)
4038 4042 {
4039 4043 int err;
4040 4044
4041 4045 VOPXID_MAP_CR(vp, cr);
4042 4046
4043 4047 err = (*(vp)->v_op->vop_frlock)
4044 4048 (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
4045 4049 VOPSTATS_UPDATE(vp, frlock);
4046 4050 return (err);
4047 4051 }
4048 4052
4049 4053 int
4050 4054 fop_space(
4051 4055 vnode_t *vp,
4052 4056 int cmd,
4053 4057 flock64_t *bfp,
4054 4058 int flag,
4055 4059 offset_t offset,
4056 4060 cred_t *cr,
4057 4061 caller_context_t *ct)
4058 4062 {
4059 4063 int err;
4060 4064
4061 4065 VOPXID_MAP_CR(vp, cr);
4062 4066
4063 4067 err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
4064 4068 VOPSTATS_UPDATE(vp, space);
4065 4069 return (err);
4066 4070 }
4067 4071
4068 4072 int
4069 4073 fop_realvp(
4070 4074 vnode_t *vp,
4071 4075 vnode_t **vpp,
4072 4076 caller_context_t *ct)
4073 4077 {
4074 4078 int err;
4075 4079
4076 4080 err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct);
4077 4081 VOPSTATS_UPDATE(vp, realvp);
4078 4082 return (err);
4079 4083 }
4080 4084
4081 4085 int
4082 4086 fop_getpage(
4083 4087 vnode_t *vp,
4084 4088 offset_t off,
4085 4089 size_t len,
4086 4090 uint_t *protp,
4087 4091 page_t **plarr,
4088 4092 size_t plsz,
4089 4093 struct seg *seg,
4090 4094 caddr_t addr,
4091 4095 enum seg_rw rw,
4092 4096 cred_t *cr,
4093 4097 caller_context_t *ct)
4094 4098 {
4095 4099 int err;
4096 4100
4097 4101 VOPXID_MAP_CR(vp, cr);
4098 4102
4099 4103 err = (*(vp)->v_op->vop_getpage)
4100 4104 (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct);
4101 4105 VOPSTATS_UPDATE(vp, getpage);
4102 4106 return (err);
4103 4107 }
4104 4108
4105 4109 int
4106 4110 fop_putpage(
4107 4111 vnode_t *vp,
4108 4112 offset_t off,
4109 4113 size_t len,
4110 4114 int flags,
4111 4115 cred_t *cr,
4112 4116 caller_context_t *ct)
4113 4117 {
4114 4118 int err;
4115 4119
4116 4120 VOPXID_MAP_CR(vp, cr);
4117 4121
4118 4122 err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
4119 4123 VOPSTATS_UPDATE(vp, putpage);
4120 4124 return (err);
4121 4125 }
4122 4126
4123 4127 int
4124 4128 fop_map(
4125 4129 vnode_t *vp,
4126 4130 offset_t off,
4127 4131 struct as *as,
4128 4132 caddr_t *addrp,
4129 4133 size_t len,
4130 4134 uchar_t prot,
4131 4135 uchar_t maxprot,
4132 4136 uint_t flags,
4133 4137 cred_t *cr,
4134 4138 caller_context_t *ct)
4135 4139 {
4136 4140 int err;
4137 4141
4138 4142 VOPXID_MAP_CR(vp, cr);
4139 4143
4140 4144 err = (*(vp)->v_op->vop_map)
4141 4145 (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct);
4142 4146 VOPSTATS_UPDATE(vp, map);
4143 4147 return (err);
4144 4148 }
4145 4149
4146 4150 int
4147 4151 fop_addmap(
4148 4152 vnode_t *vp,
4149 4153 offset_t off,
4150 4154 struct as *as,
4151 4155 caddr_t addr,
4152 4156 size_t len,
4153 4157 uchar_t prot,
4154 4158 uchar_t maxprot,
4155 4159 uint_t flags,
4156 4160 cred_t *cr,
4157 4161 caller_context_t *ct)
4158 4162 {
4159 4163 int error;
4160 4164 u_longlong_t delta;
4161 4165
4162 4166 VOPXID_MAP_CR(vp, cr);
4163 4167
4164 4168 error = (*(vp)->v_op->vop_addmap)
4165 4169 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
4166 4170
4167 4171 if ((!error) && (vp->v_type == VREG)) {
4168 4172 delta = (u_longlong_t)btopr(len);
4169 4173 /*
4170 4174 * If file is declared MAP_PRIVATE, it can't be written back
4171 4175 * even if open for write. Handle as read.
4172 4176 */
4173 4177 if (flags & MAP_PRIVATE) {
4174 4178 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4175 4179 (int64_t)delta);
4176 4180 } else {
4177 4181 /*
4178 4182 * atomic_add_64 forces the fetch of a 64 bit value to
4179 4183 * be atomic on 32 bit machines
4180 4184 */
4181 4185 if (maxprot & PROT_WRITE)
4182 4186 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4183 4187 (int64_t)delta);
4184 4188 if (maxprot & PROT_READ)
4185 4189 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4186 4190 (int64_t)delta);
4187 4191 if (maxprot & PROT_EXEC)
4188 4192 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4189 4193 (int64_t)delta);
4190 4194 }
4191 4195 }
4192 4196 VOPSTATS_UPDATE(vp, addmap);
4193 4197 return (error);
4194 4198 }
4195 4199
4196 4200 int
4197 4201 fop_delmap(
4198 4202 vnode_t *vp,
4199 4203 offset_t off,
4200 4204 struct as *as,
4201 4205 caddr_t addr,
4202 4206 size_t len,
4203 4207 uint_t prot,
4204 4208 uint_t maxprot,
4205 4209 uint_t flags,
4206 4210 cred_t *cr,
4207 4211 caller_context_t *ct)
4208 4212 {
4209 4213 int error;
4210 4214 u_longlong_t delta;
4211 4215
4212 4216 VOPXID_MAP_CR(vp, cr);
4213 4217
4214 4218 error = (*(vp)->v_op->vop_delmap)
4215 4219 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
4216 4220
4217 4221 /*
4218 4222 * NFS calls into delmap twice, the first time
4219 4223 * it simply establishes a callback mechanism and returns EAGAIN
4220 4224 * while the real work is being done upon the second invocation.
4221 4225 * We have to detect this here and only decrement the counts upon
4222 4226 * the second delmap request.
4223 4227 */
4224 4228 if ((error != EAGAIN) && (vp->v_type == VREG)) {
4225 4229
4226 4230 delta = (u_longlong_t)btopr(len);
4227 4231
4228 4232 if (flags & MAP_PRIVATE) {
4229 4233 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4230 4234 (int64_t)(-delta));
4231 4235 } else {
4232 4236 /*
4233 4237 * atomic_add_64 forces the fetch of a 64 bit value
4234 4238 * to be atomic on 32 bit machines
4235 4239 */
4236 4240 if (maxprot & PROT_WRITE)
4237 4241 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4238 4242 (int64_t)(-delta));
4239 4243 if (maxprot & PROT_READ)
4240 4244 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4241 4245 (int64_t)(-delta));
4242 4246 if (maxprot & PROT_EXEC)
4243 4247 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4244 4248 (int64_t)(-delta));
4245 4249 }
4246 4250 }
4247 4251 VOPSTATS_UPDATE(vp, delmap);
4248 4252 return (error);
4249 4253 }
4250 4254
4251 4255
4252 4256 int
4253 4257 fop_poll(
4254 4258 vnode_t *vp,
4255 4259 short events,
4256 4260 int anyyet,
4257 4261 short *reventsp,
4258 4262 struct pollhead **phpp,
4259 4263 caller_context_t *ct)
4260 4264 {
4261 4265 int err;
4262 4266
4263 4267 err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct);
4264 4268 VOPSTATS_UPDATE(vp, poll);
4265 4269 return (err);
4266 4270 }
4267 4271
4268 4272 int
4269 4273 fop_dump(
4270 4274 vnode_t *vp,
4271 4275 caddr_t addr,
4272 4276 offset_t lbdn,
4273 4277 offset_t dblks,
4274 4278 caller_context_t *ct)
4275 4279 {
4276 4280 int err;
4277 4281
4278 4282 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4279 4283 if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4280 4284 return (EIO);
4281 4285
4282 4286 err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct);
4283 4287 VOPSTATS_UPDATE(vp, dump);
4284 4288 return (err);
4285 4289 }
4286 4290
4287 4291 int
4288 4292 fop_pathconf(
4289 4293 vnode_t *vp,
4290 4294 int cmd,
4291 4295 ulong_t *valp,
4292 4296 cred_t *cr,
4293 4297 caller_context_t *ct)
4294 4298 {
4295 4299 int err;
4296 4300
4297 4301 VOPXID_MAP_CR(vp, cr);
4298 4302
4299 4303 err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct);
4300 4304 VOPSTATS_UPDATE(vp, pathconf);
4301 4305 return (err);
4302 4306 }
4303 4307
4304 4308 int
4305 4309 fop_pageio(
4306 4310 vnode_t *vp,
4307 4311 struct page *pp,
4308 4312 u_offset_t io_off,
4309 4313 size_t io_len,
4310 4314 int flags,
4311 4315 cred_t *cr,
4312 4316 caller_context_t *ct)
4313 4317 {
4314 4318 int err;
4315 4319
4316 4320 VOPXID_MAP_CR(vp, cr);
4317 4321
4318 4322 err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct);
4319 4323 VOPSTATS_UPDATE(vp, pageio);
4320 4324 return (err);
4321 4325 }
4322 4326
4323 4327 int
4324 4328 fop_dumpctl(
4325 4329 vnode_t *vp,
4326 4330 int action,
4327 4331 offset_t *blkp,
4328 4332 caller_context_t *ct)
4329 4333 {
4330 4334 int err;
4331 4335 err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct);
4332 4336 VOPSTATS_UPDATE(vp, dumpctl);
4333 4337 return (err);
4334 4338 }
4335 4339
4336 4340 void
4337 4341 fop_dispose(
4338 4342 vnode_t *vp,
4339 4343 page_t *pp,
4340 4344 int flag,
4341 4345 int dn,
4342 4346 cred_t *cr,
4343 4347 caller_context_t *ct)
4344 4348 {
4345 4349 /* Must do stats first since it's possible to lose the vnode */
4346 4350 VOPSTATS_UPDATE(vp, dispose);
4347 4351
4348 4352 VOPXID_MAP_CR(vp, cr);
4349 4353
4350 4354 (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct);
4351 4355 }
4352 4356
4353 4357 int
4354 4358 fop_setsecattr(
4355 4359 vnode_t *vp,
4356 4360 vsecattr_t *vsap,
4357 4361 int flag,
4358 4362 cred_t *cr,
4359 4363 caller_context_t *ct)
4360 4364 {
4361 4365 int err;
4362 4366
4363 4367 VOPXID_MAP_CR(vp, cr);
4364 4368
4365 4369 /*
4366 4370 * We're only allowed to skip the ACL check iff we used a 32 bit
4367 4371 * ACE mask with VOP_ACCESS() to determine permissions.
4368 4372 */
4369 4373 if ((flag & ATTR_NOACLCHECK) &&
4370 4374 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4371 4375 return (EINVAL);
4372 4376 }
4373 4377 err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct);
4374 4378 VOPSTATS_UPDATE(vp, setsecattr);
4375 4379 return (err);
4376 4380 }
4377 4381
4378 4382 int
4379 4383 fop_getsecattr(
4380 4384 vnode_t *vp,
4381 4385 vsecattr_t *vsap,
4382 4386 int flag,
4383 4387 cred_t *cr,
4384 4388 caller_context_t *ct)
4385 4389 {
4386 4390 int err;
4387 4391
4388 4392 /*
4389 4393 * We're only allowed to skip the ACL check iff we used a 32 bit
4390 4394 * ACE mask with VOP_ACCESS() to determine permissions.
4391 4395 */
4392 4396 if ((flag & ATTR_NOACLCHECK) &&
4393 4397 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4394 4398 return (EINVAL);
4395 4399 }
4396 4400
4397 4401 VOPXID_MAP_CR(vp, cr);
4398 4402
4399 4403 err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct);
4400 4404 VOPSTATS_UPDATE(vp, getsecattr);
4401 4405 return (err);
4402 4406 }
4403 4407
4404 4408 int
4405 4409 fop_shrlock(
4406 4410 vnode_t *vp,
4407 4411 int cmd,
4408 4412 struct shrlock *shr,
4409 4413 int flag,
4410 4414 cred_t *cr,
4411 4415 caller_context_t *ct)
4412 4416 {
4413 4417 int err;
4414 4418
4415 4419 VOPXID_MAP_CR(vp, cr);
4416 4420
4417 4421 err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct);
4418 4422 VOPSTATS_UPDATE(vp, shrlock);
4419 4423 return (err);
4420 4424 }
4421 4425
4422 4426 int
4423 4427 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4424 4428 caller_context_t *ct)
4425 4429 {
4426 4430 int err;
4427 4431
4428 4432 err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct);
4429 4433 VOPSTATS_UPDATE(vp, vnevent);
4430 4434 return (err);
4431 4435 }
4432 4436
4433 4437 int
4434 4438 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4435 4439 caller_context_t *ct)
4436 4440 {
4437 4441 int err;
4438 4442
4439 4443 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4440 4444 return (ENOTSUP);
4441 4445 err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct);
4442 4446 VOPSTATS_UPDATE(vp, reqzcbuf);
4443 4447 return (err);
4444 4448 }
4445 4449
4446 4450 int
4447 4451 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4448 4452 {
4449 4453 int err;
4450 4454
4451 4455 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4452 4456 return (ENOTSUP);
4453 4457 err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct);
4454 4458 VOPSTATS_UPDATE(vp, retzcbuf);
4455 4459 return (err);
4456 4460 }
4457 4461
4458 4462 /*
4459 4463 * Default destructor
4460 4464 * Needed because NULL destructor means that the key is unused
4461 4465 */
4462 4466 /* ARGSUSED */
4463 4467 void
4464 4468 vsd_defaultdestructor(void *value)
4465 4469 {}
4466 4470
4467 4471 /*
4468 4472 * Create a key (index into per vnode array)
4469 4473 * Locks out vsd_create, vsd_destroy, and vsd_free
4470 4474 * May allocate memory with lock held
4471 4475 */
4472 4476 void
4473 4477 vsd_create(uint_t *keyp, void (*destructor)(void *))
4474 4478 {
4475 4479 int i;
4476 4480 uint_t nkeys;
4477 4481
4478 4482 /*
4479 4483 * if key is allocated, do nothing
4480 4484 */
4481 4485 mutex_enter(&vsd_lock);
4482 4486 if (*keyp) {
4483 4487 mutex_exit(&vsd_lock);
4484 4488 return;
4485 4489 }
4486 4490 /*
4487 4491 * find an unused key
4488 4492 */
4489 4493 if (destructor == NULL)
4490 4494 destructor = vsd_defaultdestructor;
4491 4495
4492 4496 for (i = 0; i < vsd_nkeys; ++i)
4493 4497 if (vsd_destructor[i] == NULL)
4494 4498 break;
4495 4499
4496 4500 /*
4497 4501 * if no unused keys, increase the size of the destructor array
4498 4502 */
4499 4503 if (i == vsd_nkeys) {
4500 4504 if ((nkeys = (vsd_nkeys << 1)) == 0)
4501 4505 nkeys = 1;
4502 4506 vsd_destructor =
4503 4507 (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4504 4508 (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4505 4509 (size_t)(nkeys * sizeof (void (*)(void *))));
4506 4510 vsd_nkeys = nkeys;
4507 4511 }
4508 4512
4509 4513 /*
4510 4514 * allocate the next available unused key
4511 4515 */
4512 4516 vsd_destructor[i] = destructor;
4513 4517 *keyp = i + 1;
4514 4518
4515 4519 /* create vsd_list, if it doesn't exist */
4516 4520 if (vsd_list == NULL) {
4517 4521 vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4518 4522 list_create(vsd_list, sizeof (struct vsd_node),
4519 4523 offsetof(struct vsd_node, vs_nodes));
4520 4524 }
4521 4525
4522 4526 mutex_exit(&vsd_lock);
4523 4527 }
4524 4528
4525 4529 /*
4526 4530 * Destroy a key
4527 4531 *
4528 4532 * Assumes that the caller is preventing vsd_set and vsd_get
4529 4533 * Locks out vsd_create, vsd_destroy, and vsd_free
4530 4534 * May free memory with lock held
4531 4535 */
4532 4536 void
4533 4537 vsd_destroy(uint_t *keyp)
4534 4538 {
4535 4539 uint_t key;
4536 4540 struct vsd_node *vsd;
4537 4541
4538 4542 /*
4539 4543 * protect the key namespace and our destructor lists
4540 4544 */
4541 4545 mutex_enter(&vsd_lock);
4542 4546 key = *keyp;
4543 4547 *keyp = 0;
4544 4548
4545 4549 ASSERT(key <= vsd_nkeys);
4546 4550
4547 4551 /*
4548 4552 * if the key is valid
4549 4553 */
4550 4554 if (key != 0) {
4551 4555 uint_t k = key - 1;
4552 4556 /*
4553 4557 * for every vnode with VSD, call key's destructor
4554 4558 */
4555 4559 for (vsd = list_head(vsd_list); vsd != NULL;
4556 4560 vsd = list_next(vsd_list, vsd)) {
4557 4561 /*
4558 4562 * no VSD for key in this vnode
4559 4563 */
4560 4564 if (key > vsd->vs_nkeys)
4561 4565 continue;
4562 4566 /*
4563 4567 * call destructor for key
4564 4568 */
4565 4569 if (vsd->vs_value[k] && vsd_destructor[k])
4566 4570 (*vsd_destructor[k])(vsd->vs_value[k]);
4567 4571 /*
4568 4572 * reset value for key
4569 4573 */
4570 4574 vsd->vs_value[k] = NULL;
4571 4575 }
4572 4576 /*
4573 4577 * actually free the key (NULL destructor == unused)
4574 4578 */
4575 4579 vsd_destructor[k] = NULL;
4576 4580 }
4577 4581
4578 4582 mutex_exit(&vsd_lock);
4579 4583 }
4580 4584
4581 4585 /*
4582 4586 * Quickly return the per vnode value that was stored with the specified key
4583 4587 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4584 4588 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4585 4589 */
4586 4590 void *
4587 4591 vsd_get(vnode_t *vp, uint_t key)
4588 4592 {
4589 4593 struct vsd_node *vsd;
4590 4594
4591 4595 ASSERT(vp != NULL);
4592 4596 ASSERT(mutex_owned(&vp->v_vsd_lock));
4593 4597
4594 4598 vsd = vp->v_vsd;
4595 4599
4596 4600 if (key && vsd != NULL && key <= vsd->vs_nkeys)
4597 4601 return (vsd->vs_value[key - 1]);
4598 4602 return (NULL);
4599 4603 }
4600 4604
4601 4605 /*
4602 4606 * Set a per vnode value indexed with the specified key
4603 4607 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4604 4608 */
4605 4609 int
4606 4610 vsd_set(vnode_t *vp, uint_t key, void *value)
4607 4611 {
4608 4612 struct vsd_node *vsd;
4609 4613
4610 4614 ASSERT(vp != NULL);
4611 4615 ASSERT(mutex_owned(&vp->v_vsd_lock));
4612 4616
4613 4617 if (key == 0)
4614 4618 return (EINVAL);
4615 4619
4616 4620 vsd = vp->v_vsd;
4617 4621 if (vsd == NULL)
4618 4622 vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4619 4623
4620 4624 /*
4621 4625 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4622 4626 * code won't happen and we will continue down and allocate space for
4623 4627 * the vs_value array.
4624 4628 * If the caller is replacing one value with another, then it is up
4625 4629 * to the caller to free/rele/destroy the previous value (if needed).
4626 4630 */
4627 4631 if (key <= vsd->vs_nkeys) {
4628 4632 vsd->vs_value[key - 1] = value;
4629 4633 return (0);
4630 4634 }
4631 4635
4632 4636 ASSERT(key <= vsd_nkeys);
4633 4637
4634 4638 if (vsd->vs_nkeys == 0) {
4635 4639 mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4636 4640 /*
4637 4641 * Link onto list of all VSD nodes.
4638 4642 */
4639 4643 list_insert_head(vsd_list, vsd);
4640 4644 mutex_exit(&vsd_lock);
4641 4645 }
4642 4646
4643 4647 /*
4644 4648 * Allocate vnode local storage and set the value for key
4645 4649 */
4646 4650 vsd->vs_value = vsd_realloc(vsd->vs_value,
4647 4651 vsd->vs_nkeys * sizeof (void *),
4648 4652 key * sizeof (void *));
4649 4653 vsd->vs_nkeys = key;
4650 4654 vsd->vs_value[key - 1] = value;
4651 4655
4652 4656 return (0);
4653 4657 }
4654 4658
4655 4659 /*
4656 4660 * Called from vn_free() to run the destructor function for each vsd
4657 4661 * Locks out vsd_create and vsd_destroy
4658 4662 * Assumes that the destructor *DOES NOT* use vsd
4659 4663 */
4660 4664 void
4661 4665 vsd_free(vnode_t *vp)
4662 4666 {
4663 4667 int i;
4664 4668 struct vsd_node *vsd = vp->v_vsd;
4665 4669
4666 4670 if (vsd == NULL)
4667 4671 return;
4668 4672
4669 4673 if (vsd->vs_nkeys == 0) {
4670 4674 kmem_free(vsd, sizeof (*vsd));
4671 4675 vp->v_vsd = NULL;
4672 4676 return;
4673 4677 }
4674 4678
4675 4679 /*
4676 4680 * lock out vsd_create and vsd_destroy, call
4677 4681 * the destructor, and mark the value as destroyed.
4678 4682 */
4679 4683 mutex_enter(&vsd_lock);
4680 4684
4681 4685 for (i = 0; i < vsd->vs_nkeys; i++) {
4682 4686 if (vsd->vs_value[i] && vsd_destructor[i])
4683 4687 (*vsd_destructor[i])(vsd->vs_value[i]);
4684 4688 vsd->vs_value[i] = NULL;
4685 4689 }
4686 4690
4687 4691 /*
4688 4692 * remove from linked list of VSD nodes
4689 4693 */
4690 4694 list_remove(vsd_list, vsd);
4691 4695
4692 4696 mutex_exit(&vsd_lock);
4693 4697
4694 4698 /*
4695 4699 * free up the VSD
4696 4700 */
4697 4701 kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4698 4702 kmem_free(vsd, sizeof (struct vsd_node));
4699 4703 vp->v_vsd = NULL;
4700 4704 }
4701 4705
4702 4706 /*
4703 4707 * realloc
4704 4708 */
4705 4709 static void *
4706 4710 vsd_realloc(void *old, size_t osize, size_t nsize)
4707 4711 {
4708 4712 void *new;
4709 4713
4710 4714 new = kmem_zalloc(nsize, KM_SLEEP);
4711 4715 if (old) {
4712 4716 bcopy(old, new, osize);
4713 4717 kmem_free(old, osize);
4714 4718 }
4715 4719 return (new);
4716 4720 }
4717 4721
4718 4722 /*
4719 4723 * Setup the extensible system attribute for creating a reparse point.
4720 4724 * The symlink data 'target' is validated for proper format of a reparse
4721 4725 * string and a check also made to make sure the symlink data does not
4722 4726 * point to an existing file.
4723 4727 *
4724 4728 * return 0 if ok else -1.
4725 4729 */
4726 4730 static int
4727 4731 fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4728 4732 {
4729 4733 xoptattr_t *xoap;
4730 4734
4731 4735 if ((!target) || (!vap) || (!xvattr))
4732 4736 return (-1);
4733 4737
4734 4738 /* validate reparse string */
4735 4739 if (reparse_validate((const char *)target))
4736 4740 return (-1);
4737 4741
4738 4742 xva_init(xvattr);
4739 4743 xvattr->xva_vattr = *vap;
4740 4744 xvattr->xva_vattr.va_mask |= AT_XVATTR;
4741 4745 xoap = xva_getxoptattr(xvattr);
4742 4746 ASSERT(xoap);
4743 4747 XVA_SET_REQ(xvattr, XAT_REPARSE);
4744 4748 xoap->xoa_reparse = 1;
4745 4749
4746 4750 return (0);
4747 4751 }
4748 4752
4749 4753 /*
4750 4754 * Function to check whether a symlink is a reparse point.
4751 4755 * Return B_TRUE if it is a reparse point, else return B_FALSE
4752 4756 */
4753 4757 boolean_t
4754 4758 vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4755 4759 {
4756 4760 xvattr_t xvattr;
4757 4761 xoptattr_t *xoap;
4758 4762
4759 4763 if ((vp->v_type != VLNK) ||
4760 4764 !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4761 4765 return (B_FALSE);
4762 4766
4763 4767 xva_init(&xvattr);
4764 4768 xoap = xva_getxoptattr(&xvattr);
4765 4769 ASSERT(xoap);
4766 4770 XVA_SET_REQ(&xvattr, XAT_REPARSE);
4767 4771
4768 4772 if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct))
4769 4773 return (B_FALSE);
4770 4774
4771 4775 if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4772 4776 (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4773 4777 return (B_FALSE);
4774 4778
4775 4779 return (xoap->xoa_reparse ? B_TRUE : B_FALSE);
4776 4780 }
|
↓ open down ↓ |
3846 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX