Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1984, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2015, Joyent, Inc.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 #include <sys/types.h>
  37 #include <sys/t_lock.h>
  38 #include <sys/ksynch.h>
  39 #include <sys/param.h>
  40 #include <sys/time.h>
  41 #include <sys/systm.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/resource.h>
  44 #include <sys/signal.h>


2084 
2085 out:
2086         return (err);
2087 }
2088 
2089 /*
2090  * Special wrapper to provide a callback for secpolicy_vnode_setattr().
2091  * The i_contents lock is already held by the caller and we need to
2092  * declare the inode as 'void *' argument.
2093  */
2094 static int
2095 ufs_priv_access(void *vip, int mode, struct cred *cr)
2096 {
2097         struct inode *ip = vip;
2098 
2099         return (ufs_iaccess(ip, mode, cr, 0));
2100 }
2101 
2102 /*ARGSUSED4*/
2103 static int
2104 ufs_setattr(
2105         struct vnode *vp,
2106         struct vattr *vap,
2107         int flags,
2108         struct cred *cr,
2109         caller_context_t *ct)
2110 {
2111         struct inode *ip = VTOI(vp);
2112         struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
2113         struct fs *fs;
2114         struct ulockfs *ulp;
2115         char *errmsg1;
2116         char *errmsg2;
2117         long blocks;
2118         long int mask = vap->va_mask;
2119         size_t len1, len2;
2120         int issync;
2121         int trans_size;
2122         int dotrans;
2123         int dorwlock;
2124         int error;
2125         int owner_change;
2126         int dodqlock;
2127         timestruc_t now;
2128         vattr_t oldva;


2600                 }
2601                 if (tflag == 1) {
2602                         /* now, copy it into the user buffer */
2603                         error = uiomove((caddr_t)kbuf,
2604                             MIN(size, uiop->uio_resid),
2605                             UIO_READ, uiop);
2606                 }
2607                 rw_exit(&ip->i_contents);
2608                 rw_exit(&ip->i_rwlock);
2609         }
2610 out:
2611         if (ulp) {
2612                 ufs_lockfs_end(ulp);
2613         }
2614 nolockout:
2615         return (error);
2616 }
2617 
2618 /* ARGSUSED */
2619 static int
2620 ufs_fsync(struct vnode *vp, int syncflag, struct cred *cr,
2621         caller_context_t *ct)
2622 {
2623         struct inode *ip = VTOI(vp);
2624         struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
2625         struct ulockfs *ulp;
2626         int error;
2627 
2628         error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_FSYNC_MASK);
2629         if (error)
2630                 return (error);
2631 
2632         if (TRANS_ISTRANS(ufsvfsp)) {
2633                 /*
2634                  * First push out any data pages
2635                  */
2636                 if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) &&
2637                     (vp->v_type != VCHR) && !(IS_SWAPVP(vp))) {
2638                         error = VOP_PUTPAGE(vp, (offset_t)0, (size_t)0,
2639                             0, CRED(), ct);
2640                         if (error)
2641                                 goto out;


3176         if (ulp)
3177                 ufs_lockfs_end(ulp);
3178 
3179         /*
3180          * If no inodes available, try to free one up out of the
3181          * pending delete queue.
3182          */
3183         if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) {
3184                 ufs_delete_drain_wait(ufsvfsp, 1);
3185                 retry = 0;
3186                 goto again;
3187         }
3188 
3189 out:
3190         return (error);
3191 }
3192 
3193 extern int ufs_idle_max;
3194 /*ARGSUSED*/
3195 static int
3196 ufs_remove(struct vnode *vp, char *nm, struct cred *cr,
3197         caller_context_t *ct, int flags)
3198 {
3199         struct inode *ip = VTOI(vp);
3200         struct ufsvfs *ufsvfsp  = ip->i_ufsvfs;
3201         struct ulockfs *ulp;
3202         vnode_t *rmvp = NULL;   /* Vnode corresponding to name being removed */
3203         int indeadlock;
3204         int error;
3205         int issync;
3206         int trans_size;
3207 
3208         /*
3209          * don't let the delete queue get too long
3210          */
3211         if (ufsvfsp == NULL) {
3212                 error = EIO;
3213                 goto out;
3214         }
3215         if (ufsvfsp->vfs_delete.uq_ne > ufs_idle_max)
3216                 ufs_delete_drain(vp->v_vfsp, 1, 1);
3217 


3338 uint64_t ufs_rename_retry_cnt;
3339 uint64_t ufs_rename_upgrade_retry_cnt;
3340 uint64_t ufs_rename_dircheck_retry_cnt;
3341 clock_t  ufs_rename_backoff_delay = 1;
3342 
3343 /*
3344  * Rename a file or directory.
3345  * We are given the vnode and entry string of the source and the
3346  * vnode and entry string of the place we want to move the source
3347  * to (the target). The essential operation is:
3348  *      unlink(target);
3349  *      link(source, target);
3350  *      unlink(source);
3351  * but "atomically".  Can't do full commit without saving state in
3352  * the inode on disk, which isn't feasible at this time.  Best we
3353  * can do is always guarantee that the TARGET exists.
3354  */
3355 
3356 /*ARGSUSED*/
3357 static int
3358 ufs_rename(
3359         struct vnode *sdvp,             /* old (source) parent vnode */
3360         char *snm,                      /* old (source) entry name */
3361         struct vnode *tdvp,             /* new (target) parent vnode */
3362         char *tnm,                      /* new (target) entry name */
3363         struct cred *cr,
3364         caller_context_t *ct,
3365         int flags)
3366 {
3367         struct inode *sip = NULL;       /* source inode */
3368         struct inode *ip = NULL;        /* check inode */
3369         struct inode *sdp;              /* old (source) parent inode */
3370         struct inode *tdp;              /* new (target) parent inode */
3371         struct vnode *svp = NULL;       /* source vnode */
3372         struct vnode *tvp = NULL;       /* target vnode, if it exists */
3373         struct vnode *realvp;
3374         struct ufsvfs *ufsvfsp;
3375         struct ulockfs *ulp = NULL;
3376         struct ufs_slot slot;
3377         timestruc_t now;
3378         int error;
3379         int issync;
3380         int trans_size;
3381         krwlock_t *first_lock;
3382         krwlock_t *second_lock;
3383         krwlock_t *reverse_lock;
3384         int serr, terr;
3385 


3884          */
3885         ufs_tryirwlock_trans(&ip->i_rwlock, RW_WRITER, TOP_RMDIR, retry);
3886         if (indeadlock)
3887                 goto retry_rmdir;
3888         error = ufs_dirremove(ip, nm, (struct inode *)0, cdir, DR_RMDIR, cr);
3889 
3890         rw_exit(&ip->i_rwlock);
3891 
3892         if (ulp) {
3893                 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_RMDIR,
3894                     trans_size);
3895                 ufs_lockfs_end(ulp);
3896         }
3897 
3898 out:
3899         return (error);
3900 }
3901 
3902 /* ARGSUSED */
3903 static int
3904 ufs_readdir(
3905         struct vnode *vp,
3906         struct uio *uiop,
3907         struct cred *cr,
3908         int *eofp,
3909         caller_context_t *ct,
3910         int flags)
3911 {
3912         struct iovec *iovp;
3913         struct inode *ip;
3914         struct direct *idp;
3915         struct dirent64 *odp;
3916         struct fbuf *fbp;
3917         struct ufsvfs *ufsvfsp;
3918         struct ulockfs *ulp;
3919         caddr_t outbuf;
3920         size_t bufsize;
3921         uint_t offset;
3922         uint_t bytes_wanted, total_bytes_wanted;
3923         int incount = 0;
3924         int outcount = 0;
3925         int error;
3926 
3927         ip = VTOI(vp);
3928         ASSERT(RW_READ_HELD(&ip->i_rwlock));
3929 
3930         if (uiop->uio_loffset >= MAXOFF32_T) {


4093         } else if ((error = uiomove(outbuf, (long)outcount, UIO_READ,
4094             uiop)) == 0)
4095                 uiop->uio_offset = offset;
4096 update_inode:
4097         ITIMES(ip);
4098         if (uiop->uio_segflg != UIO_SYSSPACE || uiop->uio_iovcnt != 1)
4099                 kmem_free(outbuf, bufsize);
4100 
4101         if (eofp && error == 0)
4102                 *eofp = (uiop->uio_offset >= (int)ip->i_size);
4103 unlock:
4104         if (ulp) {
4105                 ufs_lockfs_end(ulp);
4106         }
4107 out:
4108         return (error);
4109 }
4110 
4111 /*ARGSUSED*/
4112 static int
4113 ufs_symlink(
4114         struct vnode *dvp,              /* ptr to parent dir vnode */
4115         char *linkname,                 /* name of symbolic link */
4116         struct vattr *vap,              /* attributes */
4117         char *target,                   /* target path */
4118         struct cred *cr,                /* user credentials */
4119         caller_context_t *ct,
4120         int flags)
4121 {
4122         struct inode *ip, *dip = VTOI(dvp);
4123         struct ufsvfs *ufsvfsp = dip->i_ufsvfs;
4124         struct ulockfs *ulp;
4125         int error;
4126         int issync;
4127         int trans_size;
4128         int residual;
4129         int ioflag;
4130         int retry = 1;
4131 
4132         /*
4133          * No symlinks in attrdirs at this time
4134          */
4135         if ((VTOI(dvp)->i_mode & IFMT) == IFATTRDIR)
4136                 return (EINVAL);
4137 
4138 again:
4139         ip = (struct inode *)NULL;
4140         vap->va_type = VLNK;


4416         rw_enter(&ip->i_rwlock, RW_READER);
4417         if (MANDLOCK(vp, ip->i_mode)) {
4418                 rw_exit(&ip->i_rwlock);
4419                 rw_enter(&ip->i_rwlock, RW_WRITER);
4420                 return (V_WRITELOCK_TRUE);
4421         }
4422         return (V_WRITELOCK_FALSE);
4423 }
4424 
4425 /*ARGSUSED*/
4426 static void
4427 ufs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
4428 {
4429         struct inode    *ip = VTOI(vp);
4430 
4431         rw_exit(&ip->i_rwlock);
4432 }
4433 
4434 /* ARGSUSED */
4435 static int
4436 ufs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
4437         caller_context_t *ct)
4438 {
4439         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
4440 }
4441 
4442 /* ARGSUSED */
4443 static int
4444 ufs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
4445         offset_t offset, struct flk_callback *flk_cbp, struct cred *cr,
4446         caller_context_t *ct)
4447 {
4448         struct inode *ip = VTOI(vp);
4449 
4450         if (ip->i_ufsvfs == NULL)
4451                 return (EIO);
4452 
4453         /*
4454          * If file is being mapped, disallow frlock.
4455          * XXX I am not holding tlock while checking i_mapcnt because the
4456          * current locking strategy drops all locks before calling fs_frlock.
4457          * So, mapcnt could change before we enter fs_frlock making is


5174                 /*
5175                  * Must have weird flags or we are not clustering.
5176                  */
5177         }
5178 
5179         err = ufs_putpages(vp, off, len, flags, cr);
5180 
5181 errout:
5182         return (err);
5183 }
5184 
5185 /*
5186  * If len == 0, do from off to EOF.
5187  *
5188  * The normal cases should be len == 0 & off == 0 (entire vp list),
5189  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
5190  * (from pageout).
5191  */
5192 /*ARGSUSED*/
5193 static int
5194 ufs_putpages(
5195         struct vnode *vp,
5196         offset_t off,
5197         size_t len,
5198         int flags,
5199         struct cred *cr)
5200 {
5201         u_offset_t io_off;
5202         u_offset_t eoff;
5203         struct inode *ip = VTOI(vp);
5204         page_t *pp;
5205         size_t io_len;
5206         int err = 0;
5207         int dolock;
5208 
5209         if (vp->v_count == 0)
5210                 return (ufs_fault(vp, "ufs_putpages: v_count == 0"));
5211         /*
5212          * Acquire the readers/write inode lock before locking
5213          * any pages in this inode.
5214          * The inode lock is held during i/o.
5215          */
5216         if (len == 0) {
5217                 mutex_enter(&ip->i_tlock);
5218                 ip->i_delayoff = ip->i_delaylen = 0;


5349         if (ip->i_writes >= ufs_LW) {
5350                 if ((ip->i_writes -= bp->b_bcount) <= ufs_LW)
5351                         if (ufs_WRITES)
5352                                 cv_broadcast(&ip->i_wrcv); /* wake all up */
5353         } else {
5354                 ip->i_writes -= bp->b_bcount;
5355         }
5356 
5357         mutex_exit(&ip->i_tlock);
5358         iodone(bp);
5359 }
5360 
5361 /*
5362  * Write out a single page, possibly klustering adjacent
5363  * dirty pages.  The inode lock must be held.
5364  *
5365  * LMXXX - bsize < pagesize not done.
5366  */
5367 /*ARGSUSED*/
5368 int
5369 ufs_putapage(
5370         struct vnode *vp,
5371         page_t *pp,
5372         u_offset_t *offp,
5373         size_t *lenp,           /* return values */
5374         int flags,
5375         struct cred *cr)
5376 {
5377         u_offset_t io_off;
5378         u_offset_t off;
5379         struct inode *ip = VTOI(vp);
5380         struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
5381         struct fs *fs;
5382         struct buf *bp;
5383         size_t io_len;
5384         daddr_t bn;
5385         int err;
5386         int contig;
5387         int dotrans;
5388 
5389         ASSERT(RW_LOCK_HELD(&ip->i_contents));
5390 
5391         if (ufsvfsp == NULL) {
5392                 err = EIO;
5393                 goto out_trace;
5394         }
5395 


5586 
5587         pp = NULL;
5588 
5589 out:
5590         if (err != 0 && pp != NULL)
5591                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
5592 
5593         if (offp)
5594                 *offp = io_off;
5595         if (lenp)
5596                 *lenp = io_len;
5597 out_trace:
5598         return (err);
5599 }
5600 
5601 uint64_t ufs_map_alock_retry_cnt;
5602 uint64_t ufs_map_lockfs_retry_cnt;
5603 
5604 /* ARGSUSED */
5605 static int
5606 ufs_map(struct vnode *vp,
5607         offset_t off,
5608         struct as *as,
5609         caddr_t *addrp,
5610         size_t len,
5611         uchar_t prot,
5612         uchar_t maxprot,
5613         uint_t flags,
5614         struct cred *cr,
5615         caller_context_t *ct)
5616 {
5617         struct segvn_crargs vn_a;
5618         struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs;
5619         struct ulockfs *ulp;
5620         int error, sig;
5621         k_sigset_t smask;
5622         caddr_t hint = *addrp;
5623 
5624         if (vp->v_flag & VNOMAP) {
5625                 error = ENOSYS;
5626                 goto out;
5627         }
5628 
5629         if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0) {
5630                 error = ENXIO;
5631                 goto out;
5632         }
5633 
5634         if (vp->v_type != VREG) {


5711         vn_a.offset = (u_offset_t)off;
5712         vn_a.type = flags & MAP_TYPE;
5713         vn_a.prot = prot;
5714         vn_a.maxprot = maxprot;
5715         vn_a.cred = cr;
5716         vn_a.amp = NULL;
5717         vn_a.flags = flags & ~MAP_TYPE;
5718         vn_a.szc = 0;
5719         vn_a.lgrp_mem_policy_flags = 0;
5720 
5721         error = as_map_locked(as, *addrp, len, segvn_create, &vn_a);
5722         if (ulp)
5723                 ufs_lockfs_end(ulp);
5724         as_rangeunlock(as);
5725 out:
5726         return (error);
5727 }
5728 
5729 /* ARGSUSED */
5730 static int
5731 ufs_addmap(struct vnode *vp,
5732         offset_t off,
5733         struct as *as,
5734         caddr_t addr,
5735         size_t  len,
5736         uchar_t  prot,
5737         uchar_t  maxprot,
5738         uint_t    flags,
5739         struct cred *cr,
5740         caller_context_t *ct)
5741 {
5742         struct inode *ip = VTOI(vp);
5743 
5744         if (vp->v_flag & VNOMAP) {
5745                 return (ENOSYS);
5746         }
5747 
5748         mutex_enter(&ip->i_tlock);
5749         ip->i_mapcnt += btopr(len);
5750         mutex_exit(&ip->i_tlock);
5751         return (0);
5752 }
5753 
5754 /*ARGSUSED*/
5755 static int
5756 ufs_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
5757         size_t len, uint_t prot,  uint_t maxprot,  uint_t flags,
5758         struct cred *cr, caller_context_t *ct)
5759 {
5760         struct inode *ip = VTOI(vp);
5761 
5762         if (vp->v_flag & VNOMAP) {
5763                 return (ENOSYS);
5764         }
5765 
5766         mutex_enter(&ip->i_tlock);
5767         ip->i_mapcnt -= btopr(len);  /* Count released mappings */
5768         ASSERT(ip->i_mapcnt >= 0);
5769         mutex_exit(&ip->i_tlock);
5770         return (0);
5771 }
5772 /*
5773  * Return the answer requested to poll() for non-device files
5774  */
5775 struct pollhead ufs_pollhd;
5776 
5777 /* ARGSUSED */
5778 int
5779 ufs_poll(vnode_t *vp, short ev, int any, short *revp, struct pollhead **phpp,
5780         caller_context_t *ct)
5781 {
5782         struct ufsvfs   *ufsvfsp;
5783 








5784         *revp = 0;
5785         ufsvfsp = VTOI(vp)->i_ufsvfs;
5786 
5787         if (!ufsvfsp) {
5788                 *revp = POLLHUP;
5789                 goto out;
5790         }
5791 
5792         if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs) ||
5793             ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
5794                 *revp |= POLLERR;
5795 
5796         } else {
5797                 if ((ev & POLLOUT) && !ufsvfsp->vfs_fs->fs_ronly &&
5798                     !ULOCKFS_IS_WLOCK(&ufsvfsp->vfs_ulockfs))
5799                         *revp |= POLLOUT;
5800 
5801                 if ((ev & POLLWRBAND) && !ufsvfsp->vfs_fs->fs_ronly &&
5802                     !ULOCKFS_IS_WLOCK(&ufsvfsp->vfs_ulockfs))
5803                         *revp |= POLLWRBAND;
5804 
5805                 if (ev & POLLIN)
5806                         *revp |= POLLIN;
5807 
5808                 if (ev & POLLRDNORM)
5809                         *revp |= POLLRDNORM;
5810 
5811                 if (ev & POLLRDBAND)
5812                         *revp |= POLLRDBAND;
5813         }
5814 
5815         if ((ev & POLLPRI) && (*revp & (POLLERR|POLLHUP)))
5816                 *revp |= POLLPRI;
5817 out:
5818         *phpp = !any && !*revp ? &ufs_pollhd : (struct pollhead *)NULL;


5819 
5820         return (0);
5821 }
5822 
5823 /* ARGSUSED */
5824 static int
5825 ufs_l_pathconf(struct vnode *vp, int cmd, ulong_t *valp, struct cred *cr,
5826         caller_context_t *ct)
5827 {
5828         struct ufsvfs   *ufsvfsp = VTOI(vp)->i_ufsvfs;
5829         struct ulockfs  *ulp = NULL;
5830         struct inode    *sip = NULL;
5831         int             error;
5832         struct inode    *ip = VTOI(vp);
5833         int             issync;
5834 
5835         error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_PATHCONF_MASK);
5836         if (error)
5837                 return (error);
5838 




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1984, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2017 Joyent, Inc.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 #include <sys/types.h>
  37 #include <sys/t_lock.h>
  38 #include <sys/ksynch.h>
  39 #include <sys/param.h>
  40 #include <sys/time.h>
  41 #include <sys/systm.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/resource.h>
  44 #include <sys/signal.h>


2084 
2085 out:
2086         return (err);
2087 }
2088 
2089 /*
2090  * Special wrapper to provide a callback for secpolicy_vnode_setattr().
2091  * The i_contents lock is already held by the caller and we need to
2092  * declare the inode as 'void *' argument.
2093  */
2094 static int
2095 ufs_priv_access(void *vip, int mode, struct cred *cr)
2096 {
2097         struct inode *ip = vip;
2098 
2099         return (ufs_iaccess(ip, mode, cr, 0));
2100 }
2101 
2102 /*ARGSUSED4*/
2103 static int
2104 ufs_setattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr,




2105     caller_context_t *ct)
2106 {
2107         struct inode *ip = VTOI(vp);
2108         struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
2109         struct fs *fs;
2110         struct ulockfs *ulp;
2111         char *errmsg1;
2112         char *errmsg2;
2113         long blocks;
2114         long int mask = vap->va_mask;
2115         size_t len1, len2;
2116         int issync;
2117         int trans_size;
2118         int dotrans;
2119         int dorwlock;
2120         int error;
2121         int owner_change;
2122         int dodqlock;
2123         timestruc_t now;
2124         vattr_t oldva;


2596                 }
2597                 if (tflag == 1) {
2598                         /* now, copy it into the user buffer */
2599                         error = uiomove((caddr_t)kbuf,
2600                             MIN(size, uiop->uio_resid),
2601                             UIO_READ, uiop);
2602                 }
2603                 rw_exit(&ip->i_contents);
2604                 rw_exit(&ip->i_rwlock);
2605         }
2606 out:
2607         if (ulp) {
2608                 ufs_lockfs_end(ulp);
2609         }
2610 nolockout:
2611         return (error);
2612 }
2613 
2614 /* ARGSUSED */
2615 static int
2616 ufs_fsync(struct vnode *vp, int syncflag, struct cred *cr, caller_context_t *ct)

2617 {
2618         struct inode *ip = VTOI(vp);
2619         struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
2620         struct ulockfs *ulp;
2621         int error;
2622 
2623         error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_FSYNC_MASK);
2624         if (error)
2625                 return (error);
2626 
2627         if (TRANS_ISTRANS(ufsvfsp)) {
2628                 /*
2629                  * First push out any data pages
2630                  */
2631                 if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) &&
2632                     (vp->v_type != VCHR) && !(IS_SWAPVP(vp))) {
2633                         error = VOP_PUTPAGE(vp, (offset_t)0, (size_t)0,
2634                             0, CRED(), ct);
2635                         if (error)
2636                                 goto out;


3171         if (ulp)
3172                 ufs_lockfs_end(ulp);
3173 
3174         /*
3175          * If no inodes available, try to free one up out of the
3176          * pending delete queue.
3177          */
3178         if ((error == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) {
3179                 ufs_delete_drain_wait(ufsvfsp, 1);
3180                 retry = 0;
3181                 goto again;
3182         }
3183 
3184 out:
3185         return (error);
3186 }
3187 
3188 extern int ufs_idle_max;
3189 /*ARGSUSED*/
3190 static int
3191 ufs_remove(struct vnode *vp, char *nm, struct cred *cr, caller_context_t *ct,
3192     int flags)
3193 {
3194         struct inode *ip = VTOI(vp);
3195         struct ufsvfs *ufsvfsp  = ip->i_ufsvfs;
3196         struct ulockfs *ulp;
3197         vnode_t *rmvp = NULL;   /* Vnode corresponding to name being removed */
3198         int indeadlock;
3199         int error;
3200         int issync;
3201         int trans_size;
3202 
3203         /*
3204          * don't let the delete queue get too long
3205          */
3206         if (ufsvfsp == NULL) {
3207                 error = EIO;
3208                 goto out;
3209         }
3210         if (ufsvfsp->vfs_delete.uq_ne > ufs_idle_max)
3211                 ufs_delete_drain(vp->v_vfsp, 1, 1);
3212 


3333 uint64_t ufs_rename_retry_cnt;
3334 uint64_t ufs_rename_upgrade_retry_cnt;
3335 uint64_t ufs_rename_dircheck_retry_cnt;
3336 clock_t  ufs_rename_backoff_delay = 1;
3337 
3338 /*
3339  * Rename a file or directory.
3340  * We are given the vnode and entry string of the source and the
3341  * vnode and entry string of the place we want to move the source
3342  * to (the target). The essential operation is:
3343  *      unlink(target);
3344  *      link(source, target);
3345  *      unlink(source);
3346  * but "atomically".  Can't do full commit without saving state in
3347  * the inode on disk, which isn't feasible at this time.  Best we
3348  * can do is always guarantee that the TARGET exists.
3349  */
3350 
3351 /*ARGSUSED*/
3352 static int
3353 ufs_rename(struct vnode *sdvp, char *snm, struct vnode *tdvp, char *tnm,
3354     struct cred *cr, caller_context_t *ct, int flags)






3355 {
3356         struct inode *sip = NULL;       /* source inode */
3357         struct inode *ip = NULL;        /* check inode */
3358         struct inode *sdp;              /* old (source) parent inode */
3359         struct inode *tdp;              /* new (target) parent inode */
3360         struct vnode *svp = NULL;       /* source vnode */
3361         struct vnode *tvp = NULL;       /* target vnode, if it exists */
3362         struct vnode *realvp;
3363         struct ufsvfs *ufsvfsp;
3364         struct ulockfs *ulp = NULL;
3365         struct ufs_slot slot;
3366         timestruc_t now;
3367         int error;
3368         int issync;
3369         int trans_size;
3370         krwlock_t *first_lock;
3371         krwlock_t *second_lock;
3372         krwlock_t *reverse_lock;
3373         int serr, terr;
3374 


3873          */
3874         ufs_tryirwlock_trans(&ip->i_rwlock, RW_WRITER, TOP_RMDIR, retry);
3875         if (indeadlock)
3876                 goto retry_rmdir;
3877         error = ufs_dirremove(ip, nm, (struct inode *)0, cdir, DR_RMDIR, cr);
3878 
3879         rw_exit(&ip->i_rwlock);
3880 
3881         if (ulp) {
3882                 TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_RMDIR,
3883                     trans_size);
3884                 ufs_lockfs_end(ulp);
3885         }
3886 
3887 out:
3888         return (error);
3889 }
3890 
3891 /* ARGSUSED */
3892 static int
3893 ufs_readdir(struct vnode *vp, struct uio *uiop, struct cred *cr, int *eofp,
3894     caller_context_t *ct, int flags)





3895 {
3896         struct iovec *iovp;
3897         struct inode *ip;
3898         struct direct *idp;
3899         struct dirent64 *odp;
3900         struct fbuf *fbp;
3901         struct ufsvfs *ufsvfsp;
3902         struct ulockfs *ulp;
3903         caddr_t outbuf;
3904         size_t bufsize;
3905         uint_t offset;
3906         uint_t bytes_wanted, total_bytes_wanted;
3907         int incount = 0;
3908         int outcount = 0;
3909         int error;
3910 
3911         ip = VTOI(vp);
3912         ASSERT(RW_READ_HELD(&ip->i_rwlock));
3913 
3914         if (uiop->uio_loffset >= MAXOFF32_T) {


4077         } else if ((error = uiomove(outbuf, (long)outcount, UIO_READ,
4078             uiop)) == 0)
4079                 uiop->uio_offset = offset;
4080 update_inode:
4081         ITIMES(ip);
4082         if (uiop->uio_segflg != UIO_SYSSPACE || uiop->uio_iovcnt != 1)
4083                 kmem_free(outbuf, bufsize);
4084 
4085         if (eofp && error == 0)
4086                 *eofp = (uiop->uio_offset >= (int)ip->i_size);
4087 unlock:
4088         if (ulp) {
4089                 ufs_lockfs_end(ulp);
4090         }
4091 out:
4092         return (error);
4093 }
4094 
4095 /*ARGSUSED*/
4096 static int
4097 ufs_symlink(struct vnode *dvp, char *linkname, struct vattr *vap, char *target,
4098     struct cred *cr, caller_context_t *ct, int flags)






4099 {
4100         struct inode *ip, *dip = VTOI(dvp);
4101         struct ufsvfs *ufsvfsp = dip->i_ufsvfs;
4102         struct ulockfs *ulp;
4103         int error;
4104         int issync;
4105         int trans_size;
4106         int residual;
4107         int ioflag;
4108         int retry = 1;
4109 
4110         /*
4111          * No symlinks in attrdirs at this time
4112          */
4113         if ((VTOI(dvp)->i_mode & IFMT) == IFATTRDIR)
4114                 return (EINVAL);
4115 
4116 again:
4117         ip = (struct inode *)NULL;
4118         vap->va_type = VLNK;


4394         rw_enter(&ip->i_rwlock, RW_READER);
4395         if (MANDLOCK(vp, ip->i_mode)) {
4396                 rw_exit(&ip->i_rwlock);
4397                 rw_enter(&ip->i_rwlock, RW_WRITER);
4398                 return (V_WRITELOCK_TRUE);
4399         }
4400         return (V_WRITELOCK_FALSE);
4401 }
4402 
4403 /*ARGSUSED*/
4404 static void
4405 ufs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
4406 {
4407         struct inode    *ip = VTOI(vp);
4408 
4409         rw_exit(&ip->i_rwlock);
4410 }
4411 
4412 /* ARGSUSED */
4413 static int
4414 ufs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)

4415 {
4416         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
4417 }
4418 
4419 /* ARGSUSED */
4420 static int
4421 ufs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
4422     offset_t offset, struct flk_callback *flk_cbp, struct cred *cr,
4423     caller_context_t *ct)
4424 {
4425         struct inode *ip = VTOI(vp);
4426 
4427         if (ip->i_ufsvfs == NULL)
4428                 return (EIO);
4429 
4430         /*
4431          * If file is being mapped, disallow frlock.
4432          * XXX I am not holding tlock while checking i_mapcnt because the
4433          * current locking strategy drops all locks before calling fs_frlock.
4434          * So, mapcnt could change before we enter fs_frlock making is


5151                 /*
5152                  * Must have weird flags or we are not clustering.
5153                  */
5154         }
5155 
5156         err = ufs_putpages(vp, off, len, flags, cr);
5157 
5158 errout:
5159         return (err);
5160 }
5161 
5162 /*
5163  * If len == 0, do from off to EOF.
5164  *
5165  * The normal cases should be len == 0 & off == 0 (entire vp list),
5166  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
5167  * (from pageout).
5168  */
5169 /*ARGSUSED*/
5170 static int
5171 ufs_putpages(struct vnode *vp, offset_t off, size_t len, int flags,




5172     struct cred *cr)
5173 {
5174         u_offset_t io_off;
5175         u_offset_t eoff;
5176         struct inode *ip = VTOI(vp);
5177         page_t *pp;
5178         size_t io_len;
5179         int err = 0;
5180         int dolock;
5181 
5182         if (vp->v_count == 0)
5183                 return (ufs_fault(vp, "ufs_putpages: v_count == 0"));
5184         /*
5185          * Acquire the readers/write inode lock before locking
5186          * any pages in this inode.
5187          * The inode lock is held during i/o.
5188          */
5189         if (len == 0) {
5190                 mutex_enter(&ip->i_tlock);
5191                 ip->i_delayoff = ip->i_delaylen = 0;


5322         if (ip->i_writes >= ufs_LW) {
5323                 if ((ip->i_writes -= bp->b_bcount) <= ufs_LW)
5324                         if (ufs_WRITES)
5325                                 cv_broadcast(&ip->i_wrcv); /* wake all up */
5326         } else {
5327                 ip->i_writes -= bp->b_bcount;
5328         }
5329 
5330         mutex_exit(&ip->i_tlock);
5331         iodone(bp);
5332 }
5333 
5334 /*
5335  * Write out a single page, possibly klustering adjacent
5336  * dirty pages.  The inode lock must be held.
5337  *
5338  * LMXXX - bsize < pagesize not done.
5339  */
5340 /*ARGSUSED*/
5341 int
5342 ufs_putapage(struct vnode *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
5343     int flags, struct cred *cr)





5344 {
5345         u_offset_t io_off;
5346         u_offset_t off;
5347         struct inode *ip = VTOI(vp);
5348         struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
5349         struct fs *fs;
5350         struct buf *bp;
5351         size_t io_len;
5352         daddr_t bn;
5353         int err;
5354         int contig;
5355         int dotrans;
5356 
5357         ASSERT(RW_LOCK_HELD(&ip->i_contents));
5358 
5359         if (ufsvfsp == NULL) {
5360                 err = EIO;
5361                 goto out_trace;
5362         }
5363 


5554 
5555         pp = NULL;
5556 
5557 out:
5558         if (err != 0 && pp != NULL)
5559                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
5560 
5561         if (offp)
5562                 *offp = io_off;
5563         if (lenp)
5564                 *lenp = io_len;
5565 out_trace:
5566         return (err);
5567 }
5568 
5569 uint64_t ufs_map_alock_retry_cnt;
5570 uint64_t ufs_map_lockfs_retry_cnt;
5571 
5572 /* ARGSUSED */
5573 static int
5574 ufs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
5575     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,







5576     caller_context_t *ct)
5577 {
5578         struct segvn_crargs vn_a;
5579         struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs;
5580         struct ulockfs *ulp;
5581         int error, sig;
5582         k_sigset_t smask;
5583         caddr_t hint = *addrp;
5584 
5585         if (vp->v_flag & VNOMAP) {
5586                 error = ENOSYS;
5587                 goto out;
5588         }
5589 
5590         if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0) {
5591                 error = ENXIO;
5592                 goto out;
5593         }
5594 
5595         if (vp->v_type != VREG) {


5672         vn_a.offset = (u_offset_t)off;
5673         vn_a.type = flags & MAP_TYPE;
5674         vn_a.prot = prot;
5675         vn_a.maxprot = maxprot;
5676         vn_a.cred = cr;
5677         vn_a.amp = NULL;
5678         vn_a.flags = flags & ~MAP_TYPE;
5679         vn_a.szc = 0;
5680         vn_a.lgrp_mem_policy_flags = 0;
5681 
5682         error = as_map_locked(as, *addrp, len, segvn_create, &vn_a);
5683         if (ulp)
5684                 ufs_lockfs_end(ulp);
5685         as_rangeunlock(as);
5686 out:
5687         return (error);
5688 }
5689 
5690 /* ARGSUSED */
5691 static int
5692 ufs_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
5693     size_t len, uchar_t  prot, uchar_t  maxprot, uint_t    flags,
5694     struct cred *cr, caller_context_t *ct)







5695 {
5696         struct inode *ip = VTOI(vp);
5697 
5698         if (vp->v_flag & VNOMAP) {
5699                 return (ENOSYS);
5700         }
5701 
5702         mutex_enter(&ip->i_tlock);
5703         ip->i_mapcnt += btopr(len);
5704         mutex_exit(&ip->i_tlock);
5705         return (0);
5706 }
5707 
5708 /*ARGSUSED*/
5709 static int
5710 ufs_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
5711     size_t len, uint_t prot,  uint_t maxprot,  uint_t flags, struct cred *cr,
5712     caller_context_t *ct)
5713 {
5714         struct inode *ip = VTOI(vp);
5715 
5716         if (vp->v_flag & VNOMAP) {
5717                 return (ENOSYS);
5718         }
5719 
5720         mutex_enter(&ip->i_tlock);
5721         ip->i_mapcnt -= btopr(len);  /* Count released mappings */
5722         ASSERT(ip->i_mapcnt >= 0);
5723         mutex_exit(&ip->i_tlock);
5724         return (0);
5725 }
5726 /*
5727  * Return the answer requested to poll() for non-device files
5728  */
5729 struct pollhead ufs_pollhd;
5730 
5731 /* ARGSUSED */
5732 int
5733 ufs_poll(vnode_t *vp, short ev, int any, short *revp, struct pollhead **phpp,
5734     caller_context_t *ct)
5735 {
5736         struct ufsvfs   *ufsvfsp;
5737 
5738         /*
5739          * Regular files reject edge-triggered pollers.
5740          * See the comment in fs_poll() for a more detailed explanation.
5741          */
5742         if (ev & POLLET) {
5743                 return (EPERM);
5744         }
5745 
5746         *revp = 0;
5747         ufsvfsp = VTOI(vp)->i_ufsvfs;
5748 
5749         if (!ufsvfsp) {
5750                 *revp = POLLHUP;
5751                 goto out;
5752         }
5753 
5754         if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs) ||
5755             ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
5756                 *revp |= POLLERR;
5757 
5758         } else {
5759                 if ((ev & POLLOUT) && !ufsvfsp->vfs_fs->fs_ronly &&
5760                     !ULOCKFS_IS_WLOCK(&ufsvfsp->vfs_ulockfs))
5761                         *revp |= POLLOUT;
5762 
5763                 if ((ev & POLLWRBAND) && !ufsvfsp->vfs_fs->fs_ronly &&
5764                     !ULOCKFS_IS_WLOCK(&ufsvfsp->vfs_ulockfs))
5765                         *revp |= POLLWRBAND;
5766 
5767                 if (ev & POLLIN)
5768                         *revp |= POLLIN;
5769 
5770                 if (ev & POLLRDNORM)
5771                         *revp |= POLLRDNORM;
5772 
5773                 if (ev & POLLRDBAND)
5774                         *revp |= POLLRDBAND;
5775         }
5776 
5777         if ((ev & POLLPRI) && (*revp & (POLLERR|POLLHUP)))
5778                 *revp |= POLLPRI;
5779 out:
5780         if (*revp == 0 && ! any) {
5781                 *phpp = &ufs_pollhd;
5782         }
5783 
5784         return (0);
5785 }
5786 
5787 /* ARGSUSED */
5788 static int
5789 ufs_l_pathconf(struct vnode *vp, int cmd, ulong_t *valp, struct cred *cr,
5790     caller_context_t *ct)
5791 {
5792         struct ufsvfs   *ufsvfsp = VTOI(vp)->i_ufsvfs;
5793         struct ulockfs  *ulp = NULL;
5794         struct inode    *sip = NULL;
5795         int             error;
5796         struct inode    *ip = VTOI(vp);
5797         int             issync;
5798 
5799         error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_PATHCONF_MASK);
5800         if (error)
5801                 return (error);
5802