3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  *
  24  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
  25  *      All rights reserved.
  26  */
  27 
  28 /*
  29  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  30  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  31  */
  32 
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/time.h>
  38 #include <sys/vnode.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vfs_opreg.h>
  41 #include <sys/file.h>
  42 #include <sys/filio.h>
 
 
1442             MANDLOCK(vp, va.va_mode))
1443                 return (EACCES);
1444 
1445         /*
1446          * Access check is based on only
1447          * one of owner, group, public.
1448          * If not owner, then check group.
1449          * If not a member of the group,
1450          * then check public access.
1451          */
1452         if (crgetuid(cr) != va.va_uid) {
1453                 shift += 3;
1454                 if (!groupmember(va.va_gid, cr))
1455                         shift += 3;
1456         }
1457 
1458         return (secpolicy_vnode_access2(cr, vp, va.va_uid,
1459             va.va_mode << shift, mode));
1460 }
1461 
1462 static int nfs_do_symlink_cache = 1;
1463 
1464 /* ARGSUSED */
1465 static int
1466 nfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1467 {
1468         int error;
1469         struct nfsrdlnres rl;
1470         rnode_t *rp;
1471         int douprintf;
1472         failinfo_t fi;
1473 
1474         /*
1475          * We want to be consistent with UFS semantics so we will return
1476          * EINVAL instead of ENXIO. This violates the XNFS spec and
1477          * the RFC 1094, which are wrong any way. BUGID 1138002.
1478          */
1479         if (vp->v_type != VLNK)
1480                 return (EINVAL);
1481 
1482         if (nfs_zone() != VTOMI(vp)->mi_zone)
 
1745         error = nfslookup(dvp, nm, vpp, pnp, flags, rdir, cr, 0);
1746 
1747         nfs_rw_exit(&drp->r_rwlock);
1748 
1749         /*
1750          * If vnode is a device, create special vnode.
1751          */
1752         if (!error && IS_DEVVP(*vpp)) {
1753                 vp = *vpp;
1754                 *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr);
1755                 VN_RELE(vp);
1756         }
1757 
1758 out:
1759         if (avp != NULL)
1760                 VN_RELE(avp);
1761 
1762         return (error);
1763 }
1764 
1765 static int nfs_lookup_neg_cache = 1;
1766 
1767 #ifdef DEBUG
1768 static int nfs_lookup_dnlc_hits = 0;
1769 static int nfs_lookup_dnlc_misses = 0;
1770 static int nfs_lookup_dnlc_neg_hits = 0;
1771 static int nfs_lookup_dnlc_disappears = 0;
1772 static int nfs_lookup_dnlc_lookups = 0;
1773 #endif
1774 
1775 /* ARGSUSED */
1776 int
1777 nfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1778         int flags, vnode_t *rdir, cred_t *cr, int rfscall_flags)
1779 {
1780         int error;
1781 
1782         ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1783 
1784         /*
1785          * If lookup is for "", just return dvp.  Don't need
 
2936                         if (HAVE_RDDIR_CACHE(drp))
2937                                 nfs_purge_rddir_cache(dvp);
2938                 } else {
2939                         PURGE_STALE_FH(error, dvp, cr);
2940                 }
2941         }
2942 
2943         nfs_rw_exit(&drp->r_rwlock);
2944 
2945         return (error);
2946 }
2947 
2948 #ifdef DEBUG
2949 static int nfs_readdir_cache_hits = 0;
2950 static int nfs_readdir_cache_shorts = 0;
2951 static int nfs_readdir_cache_waits = 0;
2952 static int nfs_readdir_cache_misses = 0;
2953 static int nfs_readdir_readahead = 0;
2954 #endif
2955 
2956 static int nfs_shrinkreaddir = 0;
2957 
2958 /*
2959  * Read directory entries.
2960  * There are some weird things to look out for here.  The uio_offset
2961  * field is either 0 or it is the offset returned from a previous
2962  * readdir.  It is an opaque value used by the server to find the
2963  * correct directory block to read. The count field is the number
2964  * of blocks to read on the server.  This is advisory only, the server
2965  * may return only one block's worth of entries.  Entries may be compressed
2966  * on the server.
2967  */
2968 /* ARGSUSED */
2969 static int
2970 nfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
2971         caller_context_t *ct, int flags)
2972 {
2973         int error;
2974         size_t count;
2975         rnode_t *rp;
2976         rddir_cache *rdc;
 
3269         mi = VTOMI(vp);
3270 
3271         rda.rda_fh = *VTOFH(vp);
3272         rda.rda_offset = rdc->nfs_cookie;
3273 
3274         /*
3275          * NFS client failover support
3276          * suppress failover unless we have a zero cookie
3277          */
3278         if (rdc->nfs_cookie == (off_t)0) {
3279                 fi.vp = vp;
3280                 fi.fhp = (caddr_t)&rda.rda_fh;
3281                 fi.copyproc = nfscopyfh;
3282                 fi.lookupproc = nfslookup;
3283                 fi.xattrdirproc = acl_getxattrdir2;
3284                 fip = &fi;
3285         } else {
3286                 fip = NULL;
3287         }
3288 
3289         rd.rd_entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3290         rd.rd_size = count;
3291         rd.rd_offset = rda.rda_offset;
3292 
3293         douprintf = 1;
3294 
3295         if (mi->mi_io_kstats) {
3296                 mutex_enter(&mi->mi_lock);
3297                 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
3298                 mutex_exit(&mi->mi_lock);
3299         }
3300 
3301         do {
3302                 rda.rda_count = MIN(count, mi->mi_curread);
3303                 error = rfs2call(mi, RFS_READDIR,
3304                     xdr_rddirargs, (caddr_t)&rda,
3305                     xdr_getrddirres, (caddr_t)&rd, cr,
3306                     &douprintf, &rd.rd_status, 0, fip);
3307         } while (error == ENFS_TRYAGAIN);
3308 
3309         if (mi->mi_io_kstats) {
 
3319          * field.  The r_statelock in the rnode must be held to
3320          * prevent two different threads from simultaneously
3321          * attempting to update the flags field.  This can happen
3322          * if we are turning off RDDIR and the other thread is
3323          * trying to set RDDIRWAIT.
3324          */
3325         ASSERT(rdc->flags & RDDIR);
3326         if (!error) {
3327                 error = geterrno(rd.rd_status);
3328                 if (!error) {
3329                         rdc->nfs_ncookie = rd.rd_offset;
3330                         rdc->eof = rd.rd_eof ? 1 : 0;
3331                         rdc->entlen = rd.rd_size;
3332                         ASSERT(rdc->entlen <= rdc->buflen);
3333 #ifdef DEBUG
3334                         rdc->entries = rddir_cache_buf_alloc(rdc->buflen,
3335                             KM_SLEEP);
3336 #else
3337                         rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3338 #endif
3339                         bcopy(rd.rd_entries, rdc->entries, rdc->entlen);
3340                         rdc->error = 0;
3341                         if (mi->mi_io_kstats) {
3342                                 mutex_enter(&mi->mi_lock);
3343                                 KSTAT_IO_PTR(mi->mi_io_kstats)->reads++;
3344                                 KSTAT_IO_PTR(mi->mi_io_kstats)->nread +=
3345                                     rd.rd_size;
3346                                 mutex_exit(&mi->mi_lock);
3347                         }
3348                 } else {
3349                         PURGE_STALE_FH(error, vp, cr);
3350                 }
3351         }
3352         if (error) {
3353                 rdc->entries = NULL;
3354                 rdc->error = error;
3355         }
3356         kmem_free(rd.rd_entries, rdc->buflen);
3357 
3358         mutex_enter(&rp->r_statelock);
3359         rdc->flags &= ~RDDIR;
3360         if (rdc->flags & RDDIRWAIT) {
3361                 rdc->flags &= ~RDDIRWAIT;
3362                 cv_broadcast(&rdc->cv);
3363         }
3364         if (error)
3365                 rdc->flags |= RDDIRREQ;
3366         mutex_exit(&rp->r_statelock);
3367 
3368         rddir_cache_rele(rdc);
3369 
3370         return (error);
3371 }
3372 
3373 #ifdef DEBUG
3374 static int nfs_bio_do_stop = 0;
3375 #endif
3376 
 
3596 static int
3597 nfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3598 {
3599 
3600         /*
3601          * Because we stuff the readdir cookie into the offset field
3602          * someone may attempt to do an lseek with the cookie which
3603          * we want to succeed.
3604          */
3605         if (vp->v_type == VDIR)
3606                 return (0);
3607         if (*noffp < 0 || *noffp > MAXOFF32_T)
3608                 return (EINVAL);
3609         return (0);
3610 }
3611 
3612 /*
3613  * number of NFS_MAXDATA blocks to read ahead
3614  * optimized for 100 base-T.
3615  */
3616 static int nfs_nra = 4;
3617 
3618 #ifdef DEBUG
3619 static int nfs_lostpage = 0;    /* number of times we lost original page */
3620 #endif
3621 
3622 /*
3623  * Return all the pages from [off..off+len) in file
3624  */
3625 /* ARGSUSED */
3626 static int
3627 nfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3628         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3629         enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3630 {
3631         rnode_t *rp;
3632         int error;
3633         mntinfo_t *mi;
3634 
3635         if (vp->v_flag & VNOMAP)
3636                 return (ENOSYS);
 
 | 
 
 
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24  *
  25  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
  26  *      All rights reserved.
  27  */
  28 
  29 /*
  30  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  31  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  32  */
  33 
  34 #include <sys/param.h>
  35 #include <sys/types.h>
  36 #include <sys/systm.h>
  37 #include <sys/cred.h>
  38 #include <sys/time.h>
  39 #include <sys/vnode.h>
  40 #include <sys/vfs.h>
  41 #include <sys/vfs_opreg.h>
  42 #include <sys/file.h>
  43 #include <sys/filio.h>
 
 
1443             MANDLOCK(vp, va.va_mode))
1444                 return (EACCES);
1445 
1446         /*
1447          * Access check is based on only
1448          * one of owner, group, public.
1449          * If not owner, then check group.
1450          * If not a member of the group,
1451          * then check public access.
1452          */
1453         if (crgetuid(cr) != va.va_uid) {
1454                 shift += 3;
1455                 if (!groupmember(va.va_gid, cr))
1456                         shift += 3;
1457         }
1458 
1459         return (secpolicy_vnode_access2(cr, vp, va.va_uid,
1460             va.va_mode << shift, mode));
1461 }
1462 
1463 volatile int nfs_do_symlink_cache = 1;
1464 
1465 /* ARGSUSED */
1466 static int
1467 nfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1468 {
1469         int error;
1470         struct nfsrdlnres rl;
1471         rnode_t *rp;
1472         int douprintf;
1473         failinfo_t fi;
1474 
1475         /*
1476          * We want to be consistent with UFS semantics so we will return
1477          * EINVAL instead of ENXIO. This violates the XNFS spec and
1478          * the RFC 1094, which are wrong any way. BUGID 1138002.
1479          */
1480         if (vp->v_type != VLNK)
1481                 return (EINVAL);
1482 
1483         if (nfs_zone() != VTOMI(vp)->mi_zone)
 
1746         error = nfslookup(dvp, nm, vpp, pnp, flags, rdir, cr, 0);
1747 
1748         nfs_rw_exit(&drp->r_rwlock);
1749 
1750         /*
1751          * If vnode is a device, create special vnode.
1752          */
1753         if (!error && IS_DEVVP(*vpp)) {
1754                 vp = *vpp;
1755                 *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr);
1756                 VN_RELE(vp);
1757         }
1758 
1759 out:
1760         if (avp != NULL)
1761                 VN_RELE(avp);
1762 
1763         return (error);
1764 }
1765 
1766 volatile int nfs_lookup_neg_cache = 1;
1767 
1768 #ifdef DEBUG
1769 static int nfs_lookup_dnlc_hits = 0;
1770 static int nfs_lookup_dnlc_misses = 0;
1771 static int nfs_lookup_dnlc_neg_hits = 0;
1772 static int nfs_lookup_dnlc_disappears = 0;
1773 static int nfs_lookup_dnlc_lookups = 0;
1774 #endif
1775 
1776 /* ARGSUSED */
1777 int
1778 nfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1779         int flags, vnode_t *rdir, cred_t *cr, int rfscall_flags)
1780 {
1781         int error;
1782 
1783         ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone);
1784 
1785         /*
1786          * If lookup is for "", just return dvp.  Don't need
 
2937                         if (HAVE_RDDIR_CACHE(drp))
2938                                 nfs_purge_rddir_cache(dvp);
2939                 } else {
2940                         PURGE_STALE_FH(error, dvp, cr);
2941                 }
2942         }
2943 
2944         nfs_rw_exit(&drp->r_rwlock);
2945 
2946         return (error);
2947 }
2948 
2949 #ifdef DEBUG
2950 static int nfs_readdir_cache_hits = 0;
2951 static int nfs_readdir_cache_shorts = 0;
2952 static int nfs_readdir_cache_waits = 0;
2953 static int nfs_readdir_cache_misses = 0;
2954 static int nfs_readdir_readahead = 0;
2955 #endif
2956 
2957 volatile int nfs_shrinkreaddir = 0;
2958 
2959 /*
2960  * Read directory entries.
2961  * There are some weird things to look out for here.  The uio_offset
2962  * field is either 0 or it is the offset returned from a previous
2963  * readdir.  It is an opaque value used by the server to find the
2964  * correct directory block to read. The count field is the number
2965  * of blocks to read on the server.  This is advisory only, the server
2966  * may return only one block's worth of entries.  Entries may be compressed
2967  * on the server.
2968  */
2969 /* ARGSUSED */
2970 static int
2971 nfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
2972         caller_context_t *ct, int flags)
2973 {
2974         int error;
2975         size_t count;
2976         rnode_t *rp;
2977         rddir_cache *rdc;
 
3270         mi = VTOMI(vp);
3271 
3272         rda.rda_fh = *VTOFH(vp);
3273         rda.rda_offset = rdc->nfs_cookie;
3274 
3275         /*
3276          * NFS client failover support
3277          * suppress failover unless we have a zero cookie
3278          */
3279         if (rdc->nfs_cookie == (off_t)0) {
3280                 fi.vp = vp;
3281                 fi.fhp = (caddr_t)&rda.rda_fh;
3282                 fi.copyproc = nfscopyfh;
3283                 fi.lookupproc = nfslookup;
3284                 fi.xattrdirproc = acl_getxattrdir2;
3285                 fip = &fi;
3286         } else {
3287                 fip = NULL;
3288         }
3289 
3290         rd.rd_dirents = kmem_alloc(rdc->buflen, KM_SLEEP);
3291         rd.rd_size = count;
3292         rd.rd_offset = rda.rda_offset;
3293 
3294         douprintf = 1;
3295 
3296         if (mi->mi_io_kstats) {
3297                 mutex_enter(&mi->mi_lock);
3298                 kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
3299                 mutex_exit(&mi->mi_lock);
3300         }
3301 
3302         do {
3303                 rda.rda_count = MIN(count, mi->mi_curread);
3304                 error = rfs2call(mi, RFS_READDIR,
3305                     xdr_rddirargs, (caddr_t)&rda,
3306                     xdr_getrddirres, (caddr_t)&rd, cr,
3307                     &douprintf, &rd.rd_status, 0, fip);
3308         } while (error == ENFS_TRYAGAIN);
3309 
3310         if (mi->mi_io_kstats) {
 
3320          * field.  The r_statelock in the rnode must be held to
3321          * prevent two different threads from simultaneously
3322          * attempting to update the flags field.  This can happen
3323          * if we are turning off RDDIR and the other thread is
3324          * trying to set RDDIRWAIT.
3325          */
3326         ASSERT(rdc->flags & RDDIR);
3327         if (!error) {
3328                 error = geterrno(rd.rd_status);
3329                 if (!error) {
3330                         rdc->nfs_ncookie = rd.rd_offset;
3331                         rdc->eof = rd.rd_eof ? 1 : 0;
3332                         rdc->entlen = rd.rd_size;
3333                         ASSERT(rdc->entlen <= rdc->buflen);
3334 #ifdef DEBUG
3335                         rdc->entries = rddir_cache_buf_alloc(rdc->buflen,
3336                             KM_SLEEP);
3337 #else
3338                         rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP);
3339 #endif
3340                         bcopy(rd.rd_dirents, rdc->entries, rdc->entlen);
3341                         rdc->error = 0;
3342                         if (mi->mi_io_kstats) {
3343                                 mutex_enter(&mi->mi_lock);
3344                                 KSTAT_IO_PTR(mi->mi_io_kstats)->reads++;
3345                                 KSTAT_IO_PTR(mi->mi_io_kstats)->nread +=
3346                                     rd.rd_size;
3347                                 mutex_exit(&mi->mi_lock);
3348                         }
3349                 } else {
3350                         PURGE_STALE_FH(error, vp, cr);
3351                 }
3352         }
3353         if (error) {
3354                 rdc->entries = NULL;
3355                 rdc->error = error;
3356         }
3357         kmem_free(rd.rd_dirents, rdc->buflen);
3358 
3359         mutex_enter(&rp->r_statelock);
3360         rdc->flags &= ~RDDIR;
3361         if (rdc->flags & RDDIRWAIT) {
3362                 rdc->flags &= ~RDDIRWAIT;
3363                 cv_broadcast(&rdc->cv);
3364         }
3365         if (error)
3366                 rdc->flags |= RDDIRREQ;
3367         mutex_exit(&rp->r_statelock);
3368 
3369         rddir_cache_rele(rdc);
3370 
3371         return (error);
3372 }
3373 
3374 #ifdef DEBUG
3375 static int nfs_bio_do_stop = 0;
3376 #endif
3377 
 
3597 static int
3598 nfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3599 {
3600 
3601         /*
3602          * Because we stuff the readdir cookie into the offset field
3603          * someone may attempt to do an lseek with the cookie which
3604          * we want to succeed.
3605          */
3606         if (vp->v_type == VDIR)
3607                 return (0);
3608         if (*noffp < 0 || *noffp > MAXOFF32_T)
3609                 return (EINVAL);
3610         return (0);
3611 }
3612 
3613 /*
3614  * number of NFS_MAXDATA blocks to read ahead
3615  * optimized for 100 base-T.
3616  */
3617 volatile int nfs_nra = 4;
3618 
3619 #ifdef DEBUG
3620 static int nfs_lostpage = 0;    /* number of times we lost original page */
3621 #endif
3622 
3623 /*
3624  * Return all the pages from [off..off+len) in file
3625  */
3626 /* ARGSUSED */
3627 static int
3628 nfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3629         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3630         enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3631 {
3632         rnode_t *rp;
3633         int error;
3634         mntinfo_t *mi;
3635 
3636         if (vp->v_flag & VNOMAP)
3637                 return (ENOSYS);
 
 |