io-lx-public Sdiff usr/src/uts/common/fs/zfs/zfs

Print this page

OS-5148 ftruncate at offset should emit proper events
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5291 lxbrand inotify02 LTP regression
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4319 zfs mishandles partial writes
OS-3294 add support for inotify
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  24  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2014 Integros [integros.com]
  26  * Copyright 2015 Joyent, Inc.
  27  */
  28 
  29 /* Portions Copyright 2007 Jeremy Teo */
  30 /* Portions Copyright 2010 Robert Milkowski */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/time.h>
  35 #include <sys/systm.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/resource.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/file.h>
  42 #include <sys/stat.h>
  43 #include <sys/kmem.h>
  44 #include <sys/taskq.h>
  45 #include <sys/uio.h>
  46 #include <sys/vmsystm.h>

 647  *      vp - ctime|mtime updated if byte count > 0
 648  */
 649 
 650 /* ARGSUSED */
 651 static int
 652 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 653 {
 654         znode_t         *zp = VTOZ(vp);
 655         rlim64_t        limit = uio->uio_llimit;
 656         ssize_t         start_resid = uio->uio_resid;
 657         ssize_t         tx_bytes;
 658         uint64_t        end_size;
 659         dmu_tx_t        *tx;
 660         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
 661         zilog_t         *zilog;
 662         offset_t        woff;
 663         ssize_t         n, nbytes;
 664         rl_t            *rl;
 665         int             max_blksz = zfsvfs->z_max_blksz;
 666         int             error = 0;

 667         arc_buf_t       *abuf;
 668         iovec_t         *aiov = NULL;
 669         xuio_t          *xuio = NULL;
 670         int             i_iov = 0;
 671         int             iovcnt = uio->uio_iovcnt;
 672         iovec_t         *iovp = uio->uio_iov;
 673         int             write_eof;
 674         int             count = 0;
 675         sa_bulk_attr_t  bulk[4];
 676         uint64_t        mtime[2], ctime[2];
 677 
 678         /*
 679          * Fasttrack empty write
 680          */
 681         n = start_resid;
 682         if (n == 0)
 683                 return (0);
 684 
 685         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 686                 limit = MAXOFFSET_T;

 951                     secpolicy_vnode_setid_retain(cr,
 952                     (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
 953                         uint64_t newmode;
 954                         zp->z_mode &= ~(S_ISUID | S_ISGID);
 955                         newmode = zp->z_mode;
 956                         (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 957                             (void *)&newmode, sizeof (uint64_t), tx);
 958                 }
 959                 mutex_exit(&zp->z_acl_lock);
 960 
 961                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 962                     B_TRUE);
 963 
 964                 /*
 965                  * Update the file size (zp_size) if it has changed;
 966                  * account for possible concurrent updates.
 967                  */
 968                 while ((end_size = zp->z_size) < uio->uio_loffset) {
 969                         (void) atomic_cas_64(&zp->z_size, end_size,
 970                             uio->uio_loffset);
 971                         ASSERT(error == 0);
 972                 }
 973                 /*
 974                  * If we are replaying and eof is non zero then force
 975                  * the file size to the specified eof. Note, there's no
 976                  * concurrency during replay.
 977                  */
 978                 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 979                         zp->z_size = zfsvfs->z_replay_eof;
 980 





 981                 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 982 
 983                 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 984                 dmu_tx_commit(tx);
 985 
 986                 if (error != 0)
 987                         break;
 988                 ASSERT(tx_bytes == nbytes);
 989                 n -= nbytes;
 990 
 991                 if (!xuio && n > 0)
 992                         uio_prefaultpages(MIN(n, max_blksz), uio);
 993         }
 994 
 995         zfs_range_unlock(rl);
 996 
 997         /*
 998          * If we're in replay mode, or we made no progress, return error.
 999          * Otherwise, it's at least a partial write, so it's successful.
1000          */
1001         if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1002                 ZFS_EXIT(zfsvfs);
1003                 return (error);
1004         }
1005 
1006         if (ioflag & (FSYNC | FDSYNC) ||

2815 
2816         if (mask & AT_SIZE) {
2817                 err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
2818                 if (err) {
2819                         ZFS_EXIT(zfsvfs);
2820                         return (err);
2821                 }
2822                 /*
2823                  * XXX - Note, we are not providing any open
2824                  * mode flags here (like FNDELAY), so we may
2825                  * block if there are locks present... this
2826                  * should be addressed in openat().
2827                  */
2828                 /* XXX - would it be OK to generate a log record here? */
2829                 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2830                 if (err) {
2831                         ZFS_EXIT(zfsvfs);
2832                         return (err);
2833                 }
2834 
2835                 if (vap->va_size == 0)
2836                         vnevent_truncate(ZTOV(zp), ct);


2837         }

2838 
2839         if (mask & (AT_ATIME|AT_MTIME) ||
2840             ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2841             XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2842             XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2843             XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2844             XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2845             XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2846             XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2847                 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2848                     skipaclchk, cr);
2849         }
2850 
2851         if (mask & (AT_UID|AT_GID)) {
2852                 int     idmask = (mask & (AT_UID|AT_GID));
2853                 int     take_owner;
2854                 int     take_group;
2855 
2856                 /*
2857                  * NOTE: even if a new mode is being set,

3744                                  * complications; for one, the link count is
3745                                  * wrong.  The easiest way to deal with this
3746                                  * is to remove the newly created target, and
3747                                  * return the original error.  This must
3748                                  * succeed; fortunately, it is very unlikely to
3749                                  * fail, since we just created it.
3750                                  */
3751                                 VERIFY3U(zfs_link_destroy(tdl, szp, tx,
3752                                     ZRENAMING, NULL), ==, 0);
3753                         }
3754                 }
3755         }
3756 
3757         dmu_tx_commit(tx);
3758 
3759         if (tzp && rm_err == 0)
3760                 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct);
3761 
3762         if (error == 0) {
3763                 vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
3764                 /* notify the target dir if it is not the same as source dir */
3765                 if (tdvp != sdvp)
3766                         vnevent_rename_dest_dir(tdvp, ct);
3767         }
3768 out:
3769         if (zl != NULL)
3770                 zfs_rename_unlock(&zl);
3771 
3772         zfs_dirent_unlock(sdl);
3773         zfs_dirent_unlock(tdl);
3774 
3775         if (sdzp == tdzp)
3776                 rw_exit(&sdzp->z_name_lock);
3777 
3778 
3779         VN_RELE(ZTOV(szp));
3780         if (tzp)
3781                 VN_RELE(ZTOV(tzp));
3782 
3783         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3784                 zil_commit(zilog, 0);
3785 
3786         ZFS_EXIT(zfsvfs);

4842         if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
4843                 ZFS_EXIT(zfsvfs);
4844                 return (SET_ERROR(EROFS));
4845         }
4846 
4847         if (error = convoff(vp, bfp, 0, offset)) {
4848                 ZFS_EXIT(zfsvfs);
4849                 return (error);
4850         }
4851 
4852         if (bfp->l_len < 0) {
4853                 ZFS_EXIT(zfsvfs);
4854                 return (SET_ERROR(EINVAL));
4855         }
4856 
4857         off = bfp->l_start;
4858         len = bfp->l_len; /* 0 means from off to end of file */
4859 
4860         error = zfs_freesp(zp, off, len, flag, TRUE);
4861 
4862         if (error == 0 && off == 0 && len == 0)

4863                 vnevent_truncate(ZTOV(zp), ct);




4864 
4865         ZFS_EXIT(zfsvfs);
4866         return (error);
4867 }
4868 
4869 /*ARGSUSED*/
4870 static int
4871 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
4872 {
4873         znode_t         *zp = VTOZ(vp);
4874         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4875         uint32_t        gen;
4876         uint64_t        gen64;
4877         uint64_t        object = zp->z_id;
4878         zfid_short_t    *zfid;
4879         int             size, i, error;
4880 
4881         ZFS_ENTER(zfsvfs);
4882         ZFS_VERIFY_ZP(zp);
4883

   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  24  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2014 Integros [integros.com]
  26  * Copyright 2016 Joyent, Inc.
  27  */
  28 
  29 /* Portions Copyright 2007 Jeremy Teo */
  30 /* Portions Copyright 2010 Robert Milkowski */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/time.h>
  35 #include <sys/systm.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/resource.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/file.h>
  42 #include <sys/stat.h>
  43 #include <sys/kmem.h>
  44 #include <sys/taskq.h>
  45 #include <sys/uio.h>
  46 #include <sys/vmsystm.h>

 647  *      vp - ctime|mtime updated if byte count > 0
 648  */
 649 
 650 /* ARGSUSED */
 651 static int
 652 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 653 {
 654         znode_t         *zp = VTOZ(vp);
 655         rlim64_t        limit = uio->uio_llimit;
 656         ssize_t         start_resid = uio->uio_resid;
 657         ssize_t         tx_bytes;
 658         uint64_t        end_size;
 659         dmu_tx_t        *tx;
 660         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
 661         zilog_t         *zilog;
 662         offset_t        woff;
 663         ssize_t         n, nbytes;
 664         rl_t            *rl;
 665         int             max_blksz = zfsvfs->z_max_blksz;
 666         int             error = 0;
 667         int             prev_error;
 668         arc_buf_t       *abuf;
 669         iovec_t         *aiov = NULL;
 670         xuio_t          *xuio = NULL;
 671         int             i_iov = 0;
 672         int             iovcnt = uio->uio_iovcnt;
 673         iovec_t         *iovp = uio->uio_iov;
 674         int             write_eof;
 675         int             count = 0;
 676         sa_bulk_attr_t  bulk[4];
 677         uint64_t        mtime[2], ctime[2];
 678 
 679         /*
 680          * Fasttrack empty write
 681          */
 682         n = start_resid;
 683         if (n == 0)
 684                 return (0);
 685 
 686         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 687                 limit = MAXOFFSET_T;

 952                     secpolicy_vnode_setid_retain(cr,
 953                     (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
 954                         uint64_t newmode;
 955                         zp->z_mode &= ~(S_ISUID | S_ISGID);
 956                         newmode = zp->z_mode;
 957                         (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 958                             (void *)&newmode, sizeof (uint64_t), tx);
 959                 }
 960                 mutex_exit(&zp->z_acl_lock);
 961 
 962                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 963                     B_TRUE);
 964 
 965                 /*
 966                  * Update the file size (zp_size) if it has changed;
 967                  * account for possible concurrent updates.
 968                  */
 969                 while ((end_size = zp->z_size) < uio->uio_loffset) {
 970                         (void) atomic_cas_64(&zp->z_size, end_size,
 971                             uio->uio_loffset);

 972                 }
 973                 /*
 974                  * If we are replaying and eof is non zero then force
 975                  * the file size to the specified eof. Note, there's no
 976                  * concurrency during replay.
 977                  */
 978                 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 979                         zp->z_size = zfsvfs->z_replay_eof;
 980 
 981                 /*
 982                  * Keep track of a possible pre-existing error from a partial
 983                  * write via dmu_write_uio_dbuf above.
 984                  */
 985                 prev_error = error;
 986                 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 987 
 988                 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 989                 dmu_tx_commit(tx);
 990 
 991                 if (prev_error != 0 || error != 0)
 992                         break;
 993                 ASSERT(tx_bytes == nbytes);
 994                 n -= nbytes;
 995 
 996                 if (!xuio && n > 0)
 997                         uio_prefaultpages(MIN(n, max_blksz), uio);
 998         }
 999 
1000         zfs_range_unlock(rl);
1001 
1002         /*
1003          * If we're in replay mode, or we made no progress, return error.
1004          * Otherwise, it's at least a partial write, so it's successful.
1005          */
1006         if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1007                 ZFS_EXIT(zfsvfs);
1008                 return (error);
1009         }
1010 
1011         if (ioflag & (FSYNC | FDSYNC) ||

2820 
2821         if (mask & AT_SIZE) {
2822                 err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
2823                 if (err) {
2824                         ZFS_EXIT(zfsvfs);
2825                         return (err);
2826                 }
2827                 /*
2828                  * XXX - Note, we are not providing any open
2829                  * mode flags here (like FNDELAY), so we may
2830                  * block if there are locks present... this
2831                  * should be addressed in openat().
2832                  */
2833                 /* XXX - would it be OK to generate a log record here? */
2834                 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2835                 if (err) {
2836                         ZFS_EXIT(zfsvfs);
2837                         return (err);
2838                 }
2839 
2840                 if (vap->va_size == 0) {
2841                         vnevent_truncate(ZTOV(zp), ct);
2842                 } else {
2843                         vnevent_resize(ZTOV(zp), ct);
2844                 }
2845         }
2846 
2847         if (mask & (AT_ATIME|AT_MTIME) ||
2848             ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2849             XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2850             XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2851             XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2852             XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2853             XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2854             XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2855                 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2856                     skipaclchk, cr);
2857         }
2858 
2859         if (mask & (AT_UID|AT_GID)) {
2860                 int     idmask = (mask & (AT_UID|AT_GID));
2861                 int     take_owner;
2862                 int     take_group;
2863 
2864                 /*
2865                  * NOTE: even if a new mode is being set,

3752                                  * complications; for one, the link count is
3753                                  * wrong.  The easiest way to deal with this
3754                                  * is to remove the newly created target, and
3755                                  * return the original error.  This must
3756                                  * succeed; fortunately, it is very unlikely to
3757                                  * fail, since we just created it.
3758                                  */
3759                                 VERIFY3U(zfs_link_destroy(tdl, szp, tx,
3760                                     ZRENAMING, NULL), ==, 0);
3761                         }
3762                 }
3763         }
3764 
3765         dmu_tx_commit(tx);
3766 
3767         if (tzp && rm_err == 0)
3768                 vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct);
3769 
3770         if (error == 0) {
3771                 vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
3772                 vnevent_rename_dest_dir(tdvp, ZTOV(szp), tnm, ct);


3773         }
3774 out:
3775         if (zl != NULL)
3776                 zfs_rename_unlock(&zl);
3777 
3778         zfs_dirent_unlock(sdl);
3779         zfs_dirent_unlock(tdl);
3780 
3781         if (sdzp == tdzp)
3782                 rw_exit(&sdzp->z_name_lock);
3783 
3784 
3785         VN_RELE(ZTOV(szp));
3786         if (tzp)
3787                 VN_RELE(ZTOV(tzp));
3788 
3789         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3790                 zil_commit(zilog, 0);
3791 
3792         ZFS_EXIT(zfsvfs);

4848         if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
4849                 ZFS_EXIT(zfsvfs);
4850                 return (SET_ERROR(EROFS));
4851         }
4852 
4853         if (error = convoff(vp, bfp, 0, offset)) {
4854                 ZFS_EXIT(zfsvfs);
4855                 return (error);
4856         }
4857 
4858         if (bfp->l_len < 0) {
4859                 ZFS_EXIT(zfsvfs);
4860                 return (SET_ERROR(EINVAL));
4861         }
4862 
4863         off = bfp->l_start;
4864         len = bfp->l_len; /* 0 means from off to end of file */
4865 
4866         error = zfs_freesp(zp, off, len, flag, TRUE);
4867 
4868         if (error == 0 && len == 0) {
4869                 if (off == 0) {
4870                         vnevent_truncate(ZTOV(zp), ct);
4871                 } else {
4872                         vnevent_resize(ZTOV(zp), ct);
4873                 }
4874         }
4875 
4876         ZFS_EXIT(zfsvfs);
4877         return (error);
4878 }
4879 
4880 /*ARGSUSED*/
4881 static int
4882 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
4883 {
4884         znode_t         *zp = VTOZ(vp);
4885         zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4886         uint32_t        gen;
4887         uint64_t        gen64;
4888         uint64_t        object = zp->z_id;
4889         zfid_short_t    *zfid;
4890         int             size, i, error;
4891 
4892         ZFS_ENTER(zfsvfs);
4893         ZFS_VERIFY_ZP(zp);
4894