3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
25 * All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
30 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/time.h>
38 #include <sys/vnode.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/file.h>
42 #include <sys/filio.h>
1442 MANDLOCK(vp, va.va_mode))
1443 return (EACCES);
1444
1445 /*
1446 * Access check is based on only
1447 * one of owner, group, public.
1448 * If not owner, then check group.
1449 * If not a member of the group,
1450 * then check public access.
1451 */
1452 if (crgetuid(cr) != va.va_uid) {
1453 shift += 3;
1454 if (!groupmember(va.va_gid, cr))
1455 shift += 3;
1456 }
1457
1458 return (secpolicy_vnode_access2(cr, vp, va.va_uid,
1459 va.va_mode << shift, mode));
1460 }
1461
1462 static int nfs_do_symlink_cache = 1;
1463
1464 /* ARGSUSED */
1465 static int
1466 nfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1467 {
1468 int error;
1469 struct nfsrdlnres rl;
1470 rnode_t *rp;
1471 int douprintf;
1472 failinfo_t fi;
1473
1474 /*
1475 * We want to be consistent with UFS semantics so we will return
1476 * EINVAL instead of ENXIO. This violates the XNFS spec and
1477 * the RFC 1094, which are wrong any way. BUGID 1138002.
1478 */
1479 if (vp->v_type != VLNK)
1480 return (EINVAL);
1481
1482 if (nfs_zone() != VTOMI(vp)->mi_zone)
1745 error = nfslookup(dvp, nm, vpp, pnp, flags, rdir, cr, 0);
1746
1747 nfs_rw_exit(&drp->r_rwlock);
1748
1749 /*
1750 * If vnode is a device, create special vnode.
1751 */
1752 if (!error && IS_DEVVP(*vpp)) {
1753 vp = *vpp;
1754 *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr);
1755 VN_RELE(vp);
1756 }
1757
1758 out:
1759 if (avp != NULL)
1760 VN_RELE(avp);
1761
1762 return (error);
1763 }
1764
1765 static int nfs_lookup_neg_cache = 1;
1766
1767 #ifdef DEBUG
1768 static int nfs_lookup_dnlc_hits = 0;
1769 static int nfs_lookup_dnlc_misses = 0;
1770 static int nfs_lookup_dnlc_neg_hits = 0;
1771 static int nfs_lookup_dnlc_disappears = 0;
1772 static int nfs_lookup_dnlc_lookups = 0;
1773 #endif
1774
1775 /* ARGSUSED */
1776 int
1777 nfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1778 int flags, vnode_t *rdir, cred_t *cr, int rfscall_flags)
1779 {
1780 int error;
1781
1782 ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1783
1784 /*
1785 * If lookup is for "", just return dvp. Don't need
2936 if (HAVE_RDDIR_CACHE(drp))
2937 nfs_purge_rddir_cache(dvp);
2938 } else {
2939 PURGE_STALE_FH(error, dvp, cr);
2940 }
2941 }
2942
2943 nfs_rw_exit(&drp->r_rwlock);
2944
2945 return (error);
2946 }
2947
2948 #ifdef DEBUG
2949 static int nfs_readdir_cache_hits = 0;
2950 static int nfs_readdir_cache_shorts = 0;
2951 static int nfs_readdir_cache_waits = 0;
2952 static int nfs_readdir_cache_misses = 0;
2953 static int nfs_readdir_readahead = 0;
2954 #endif
2955
2956 static int nfs_shrinkreaddir = 0;
2957
2958 /*
2959 * Read directory entries.
2960 * There are some weird things to look out for here. The uio_offset
2961 * field is either 0 or it is the offset returned from a previous
2962 * readdir. It is an opaque value used by the server to find the
2963 * correct directory block to read. The count field is the number
2964 * of blocks to read on the server. This is advisory only, the server
2965 * may return only one block's worth of entries. Entries may be compressed
2966 * on the server.
2967 */
2968 /* ARGSUSED */
2969 static int
2970 nfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
2971 caller_context_t *ct, int flags)
2972 {
2973 int error;
2974 size_t count;
2975 rnode_t *rp;
2976 rddir_cache *rdc;
3269 mi = VTOMI(vp);
3270
3271 rda.rda_fh = *VTOFH(vp);
3272 rda.rda_offset = rdc->nfs_cookie;
3273
3274 /*
3275 * NFS client failover support
3276 * suppress failover unless we have a zero cookie
3277 */
3278 if (rdc->nfs_cookie == (off_t)0) {
3279 fi.vp = vp;
3280 fi.fhp = (caddr_t)&rda.rda_fh;
3281 fi.copyproc = nfscopyfh;
3282 fi.lookupproc = nfslookup;
3283 fi.xattrdirproc = acl_getxattrdir2;
3284 fip = &fi;
3285 } else {
3286 fip = NULL;
3287 }
3288
3289 rd.rd_entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3290 rd.rd_size = count;
3291 rd.rd_offset = rda.rda_offset;
3292
3293 douprintf = 1;
3294
3295 if (mi->mi_io_kstats) {
3296 mutex_enter(&mi->mi_lock);
3297 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
3298 mutex_exit(&mi->mi_lock);
3299 }
3300
3301 do {
3302 rda.rda_count = MIN(count, mi->mi_curread);
3303 error = rfs2call(mi, RFS_READDIR,
3304 xdr_rddirargs, (caddr_t)&rda,
3305 xdr_getrddirres, (caddr_t)&rd, cr,
3306 &douprintf, &rd.rd_status, 0, fip);
3307 } while (error == ENFS_TRYAGAIN);
3308
3309 if (mi->mi_io_kstats) {
3319 * field. The r_statelock in the rnode must be held to
3320 * prevent two different threads from simultaneously
3321 * attempting to update the flags field. This can happen
3322 * if we are turning off RDDIR and the other thread is
3323 * trying to set RDDIRWAIT.
3324 */
3325 ASSERT(rdc->flags & RDDIR);
3326 if (!error) {
3327 error = geterrno(rd.rd_status);
3328 if (!error) {
3329 rdc->nfs_ncookie = rd.rd_offset;
3330 rdc->eof = rd.rd_eof ? 1 : 0;
3331 rdc->entlen = rd.rd_size;
3332 ASSERT(rdc->entlen <= rdc->buflen);
3333 #ifdef DEBUG
3334 rdc->entries = rddir_cache_buf_alloc(rdc->buflen,
3335 KM_SLEEP);
3336 #else
3337 rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3338 #endif
3339 bcopy(rd.rd_entries, rdc->entries, rdc->entlen);
3340 rdc->error = 0;
3341 if (mi->mi_io_kstats) {
3342 mutex_enter(&mi->mi_lock);
3343 KSTAT_IO_PTR(mi->mi_io_kstats)->reads++;
3344 KSTAT_IO_PTR(mi->mi_io_kstats)->nread +=
3345 rd.rd_size;
3346 mutex_exit(&mi->mi_lock);
3347 }
3348 } else {
3349 PURGE_STALE_FH(error, vp, cr);
3350 }
3351 }
3352 if (error) {
3353 rdc->entries = NULL;
3354 rdc->error = error;
3355 }
3356 kmem_free(rd.rd_entries, rdc->buflen);
3357
3358 mutex_enter(&rp->r_statelock);
3359 rdc->flags &= ~RDDIR;
3360 if (rdc->flags & RDDIRWAIT) {
3361 rdc->flags &= ~RDDIRWAIT;
3362 cv_broadcast(&rdc->cv);
3363 }
3364 if (error)
3365 rdc->flags |= RDDIRREQ;
3366 mutex_exit(&rp->r_statelock);
3367
3368 rddir_cache_rele(rdc);
3369
3370 return (error);
3371 }
3372
3373 #ifdef DEBUG
3374 static int nfs_bio_do_stop = 0;
3375 #endif
3376
3596 static int
3597 nfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3598 {
3599
3600 /*
3601 * Because we stuff the readdir cookie into the offset field
3602 * someone may attempt to do an lseek with the cookie which
3603 * we want to succeed.
3604 */
3605 if (vp->v_type == VDIR)
3606 return (0);
3607 if (*noffp < 0 || *noffp > MAXOFF32_T)
3608 return (EINVAL);
3609 return (0);
3610 }
3611
3612 /*
3613 * number of NFS_MAXDATA blocks to read ahead
3614 * optimized for 100 base-T.
3615 */
3616 static int nfs_nra = 4;
3617
3618 #ifdef DEBUG
3619 static int nfs_lostpage = 0; /* number of times we lost original page */
3620 #endif
3621
3622 /*
3623 * Return all the pages from [off..off+len) in file
3624 */
3625 /* ARGSUSED */
3626 static int
3627 nfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3628 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3629 enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3630 {
3631 rnode_t *rp;
3632 int error;
3633 mntinfo_t *mi;
3634
3635 if (vp->v_flag & VNOMAP)
3636 return (ENOSYS);
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 *
25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
26 * All rights reserved.
27 */
28
29 /*
30 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
31 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
32 */
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/time.h>
39 #include <sys/vnode.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/file.h>
43 #include <sys/filio.h>
1443 MANDLOCK(vp, va.va_mode))
1444 return (EACCES);
1445
1446 /*
1447 * Access check is based on only
1448 * one of owner, group, public.
1449 * If not owner, then check group.
1450 * If not a member of the group,
1451 * then check public access.
1452 */
1453 if (crgetuid(cr) != va.va_uid) {
1454 shift += 3;
1455 if (!groupmember(va.va_gid, cr))
1456 shift += 3;
1457 }
1458
1459 return (secpolicy_vnode_access2(cr, vp, va.va_uid,
1460 va.va_mode << shift, mode));
1461 }
1462
1463 volatile int nfs_do_symlink_cache = 1;
1464
1465 /* ARGSUSED */
1466 static int
1467 nfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1468 {
1469 int error;
1470 struct nfsrdlnres rl;
1471 rnode_t *rp;
1472 int douprintf;
1473 failinfo_t fi;
1474
1475 /*
1476 * We want to be consistent with UFS semantics so we will return
1477 * EINVAL instead of ENXIO. This violates the XNFS spec and
1478 * the RFC 1094, which are wrong any way. BUGID 1138002.
1479 */
1480 if (vp->v_type != VLNK)
1481 return (EINVAL);
1482
1483 if (nfs_zone() != VTOMI(vp)->mi_zone)
1746 error = nfslookup(dvp, nm, vpp, pnp, flags, rdir, cr, 0);
1747
1748 nfs_rw_exit(&drp->r_rwlock);
1749
1750 /*
1751 * If vnode is a device, create special vnode.
1752 */
1753 if (!error && IS_DEVVP(*vpp)) {
1754 vp = *vpp;
1755 *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr);
1756 VN_RELE(vp);
1757 }
1758
1759 out:
1760 if (avp != NULL)
1761 VN_RELE(avp);
1762
1763 return (error);
1764 }
1765
1766 volatile int nfs_lookup_neg_cache = 1;
1767
1768 #ifdef DEBUG
1769 static int nfs_lookup_dnlc_hits = 0;
1770 static int nfs_lookup_dnlc_misses = 0;
1771 static int nfs_lookup_dnlc_neg_hits = 0;
1772 static int nfs_lookup_dnlc_disappears = 0;
1773 static int nfs_lookup_dnlc_lookups = 0;
1774 #endif
1775
1776 /* ARGSUSED */
1777 int
1778 nfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1779 int flags, vnode_t *rdir, cred_t *cr, int rfscall_flags)
1780 {
1781 int error;
1782
1783 ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1784
1785 /*
1786 * If lookup is for "", just return dvp. Don't need
2937 if (HAVE_RDDIR_CACHE(drp))
2938 nfs_purge_rddir_cache(dvp);
2939 } else {
2940 PURGE_STALE_FH(error, dvp, cr);
2941 }
2942 }
2943
2944 nfs_rw_exit(&drp->r_rwlock);
2945
2946 return (error);
2947 }
2948
2949 #ifdef DEBUG
2950 static int nfs_readdir_cache_hits = 0;
2951 static int nfs_readdir_cache_shorts = 0;
2952 static int nfs_readdir_cache_waits = 0;
2953 static int nfs_readdir_cache_misses = 0;
2954 static int nfs_readdir_readahead = 0;
2955 #endif
2956
2957 volatile int nfs_shrinkreaddir = 0;
2958
2959 /*
2960 * Read directory entries.
2961 * There are some weird things to look out for here. The uio_offset
2962 * field is either 0 or it is the offset returned from a previous
2963 * readdir. It is an opaque value used by the server to find the
2964 * correct directory block to read. The count field is the number
2965 * of blocks to read on the server. This is advisory only, the server
2966 * may return only one block's worth of entries. Entries may be compressed
2967 * on the server.
2968 */
2969 /* ARGSUSED */
2970 static int
2971 nfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
2972 caller_context_t *ct, int flags)
2973 {
2974 int error;
2975 size_t count;
2976 rnode_t *rp;
2977 rddir_cache *rdc;
3270 mi = VTOMI(vp);
3271
3272 rda.rda_fh = *VTOFH(vp);
3273 rda.rda_offset = rdc->nfs_cookie;
3274
3275 /*
3276 * NFS client failover support
3277 * suppress failover unless we have a zero cookie
3278 */
3279 if (rdc->nfs_cookie == (off_t)0) {
3280 fi.vp = vp;
3281 fi.fhp = (caddr_t)&rda.rda_fh;
3282 fi.copyproc = nfscopyfh;
3283 fi.lookupproc = nfslookup;
3284 fi.xattrdirproc = acl_getxattrdir2;
3285 fip = &fi;
3286 } else {
3287 fip = NULL;
3288 }
3289
3290 rd.rd_dirents = kmem_alloc(rdc->buflen, KM_SLEEP);
3291 rd.rd_size = count;
3292 rd.rd_offset = rda.rda_offset;
3293
3294 douprintf = 1;
3295
3296 if (mi->mi_io_kstats) {
3297 mutex_enter(&mi->mi_lock);
3298 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
3299 mutex_exit(&mi->mi_lock);
3300 }
3301
3302 do {
3303 rda.rda_count = MIN(count, mi->mi_curread);
3304 error = rfs2call(mi, RFS_READDIR,
3305 xdr_rddirargs, (caddr_t)&rda,
3306 xdr_getrddirres, (caddr_t)&rd, cr,
3307 &douprintf, &rd.rd_status, 0, fip);
3308 } while (error == ENFS_TRYAGAIN);
3309
3310 if (mi->mi_io_kstats) {
3320 * field. The r_statelock in the rnode must be held to
3321 * prevent two different threads from simultaneously
3322 * attempting to update the flags field. This can happen
3323 * if we are turning off RDDIR and the other thread is
3324 * trying to set RDDIRWAIT.
3325 */
3326 ASSERT(rdc->flags & RDDIR);
3327 if (!error) {
3328 error = geterrno(rd.rd_status);
3329 if (!error) {
3330 rdc->nfs_ncookie = rd.rd_offset;
3331 rdc->eof = rd.rd_eof ? 1 : 0;
3332 rdc->entlen = rd.rd_size;
3333 ASSERT(rdc->entlen <= rdc->buflen);
3334 #ifdef DEBUG
3335 rdc->entries = rddir_cache_buf_alloc(rdc->buflen,
3336 KM_SLEEP);
3337 #else
3338 rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3339 #endif
3340 bcopy(rd.rd_dirents, rdc->entries, rdc->entlen);
3341 rdc->error = 0;
3342 if (mi->mi_io_kstats) {
3343 mutex_enter(&mi->mi_lock);
3344 KSTAT_IO_PTR(mi->mi_io_kstats)->reads++;
3345 KSTAT_IO_PTR(mi->mi_io_kstats)->nread +=
3346 rd.rd_size;
3347 mutex_exit(&mi->mi_lock);
3348 }
3349 } else {
3350 PURGE_STALE_FH(error, vp, cr);
3351 }
3352 }
3353 if (error) {
3354 rdc->entries = NULL;
3355 rdc->error = error;
3356 }
3357 kmem_free(rd.rd_dirents, rdc->buflen);
3358
3359 mutex_enter(&rp->r_statelock);
3360 rdc->flags &= ~RDDIR;
3361 if (rdc->flags & RDDIRWAIT) {
3362 rdc->flags &= ~RDDIRWAIT;
3363 cv_broadcast(&rdc->cv);
3364 }
3365 if (error)
3366 rdc->flags |= RDDIRREQ;
3367 mutex_exit(&rp->r_statelock);
3368
3369 rddir_cache_rele(rdc);
3370
3371 return (error);
3372 }
3373
3374 #ifdef DEBUG
3375 static int nfs_bio_do_stop = 0;
3376 #endif
3377
3597 static int
3598 nfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3599 {
3600
3601 /*
3602 * Because we stuff the readdir cookie into the offset field
3603 * someone may attempt to do an lseek with the cookie which
3604 * we want to succeed.
3605 */
3606 if (vp->v_type == VDIR)
3607 return (0);
3608 if (*noffp < 0 || *noffp > MAXOFF32_T)
3609 return (EINVAL);
3610 return (0);
3611 }
3612
3613 /*
3614 * number of NFS_MAXDATA blocks to read ahead
3615 * optimized for 100 base-T.
3616 */
3617 volatile int nfs_nra = 4;
3618
3619 #ifdef DEBUG
3620 static int nfs_lostpage = 0; /* number of times we lost original page */
3621 #endif
3622
3623 /*
3624 * Return all the pages from [off..off+len) in file
3625 */
3626 /* ARGSUSED */
3627 static int
3628 nfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3629 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3630 enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3631 {
3632 rnode_t *rp;
3633 int error;
3634 mntinfo_t *mi;
3635
3636 if (vp->v_flag & VNOMAP)
3637 return (ENOSYS);
|