669 iovec_t *aiov = NULL;
670 xuio_t *xuio = NULL;
671 int i_iov = 0;
672 int iovcnt = uio->uio_iovcnt;
673 iovec_t *iovp = uio->uio_iov;
674 int write_eof;
675 int count = 0;
676 sa_bulk_attr_t bulk[4];
677 uint64_t mtime[2], ctime[2];
678
679 /*
680 * Fasttrack empty write
681 */
682 n = start_resid;
683 if (n == 0)
684 return (0);
685
686 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
687 limit = MAXOFFSET_T;
688
689 /*
690 * Pre-fault the pages to ensure slow (eg NFS) pages
691 * don't hold up txg.
692 * Skip this if uio contains loaned arc_buf.
693 */
694 if ((uio->uio_extflg == UIO_XUIO) &&
695 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
696 xuio = (xuio_t *)uio;
697 else
698 uio_prefaultpages(n, uio);
699
700 ZFS_ENTER(zfsvfs);
701 ZFS_VERIFY_ZP(zp);
702
703 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
704 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
705 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
706 &zp->z_size, 8);
707 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
708 &zp->z_pflags, 8);
709
710 /*
711 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
712 * callers might not be able to detect properly that we are read-only,
713 * so check it explicitly here.
714 */
715 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
716 ZFS_EXIT(zfsvfs);
717 return (SET_ERROR(EROFS));
718 }
719
732 /*
733 * Validate file offset
734 */
735 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
736 if (woff < 0) {
737 ZFS_EXIT(zfsvfs);
738 return (SET_ERROR(EINVAL));
739 }
740
741 /*
742 * Check for mandatory locks before calling zfs_range_lock()
743 * in order to prevent a deadlock with locks set via fcntl().
744 */
745 if (MANDMODE((mode_t)zp->z_mode) &&
746 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
747 ZFS_EXIT(zfsvfs);
748 return (error);
749 }
750
751 /*
752 * If in append mode, set the io offset pointer to eof.
753 */
754 if (ioflag & FAPPEND) {
755 /*
756 * Obtain an appending range lock to guarantee file append
757 * semantics. We reset the write offset once we have the lock.
758 */
759 rl = zfs_range_lock(zp, 0, n, RL_APPEND);
760 woff = rl->r_off;
761 if (rl->r_len == UINT64_MAX) {
762 /*
763 * We overlocked the file because this write will cause
764 * the file block size to increase.
765 * Note that zp_size cannot change with this lock held.
766 */
767 woff = zp->z_size;
768 }
769 uio->uio_loffset = woff;
770 } else {
771 /*
975 * the file size to the specified eof. Note, there's no
976 * concurrency during replay.
977 */
978 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
979 zp->z_size = zfsvfs->z_replay_eof;
980
981 /*
982 * Keep track of a possible pre-existing error from a partial
983 * write via dmu_write_uio_dbuf above.
984 */
985 prev_error = error;
986 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
987
988 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
989 dmu_tx_commit(tx);
990
991 if (prev_error != 0 || error != 0)
992 break;
993 ASSERT(tx_bytes == nbytes);
994 n -= nbytes;
995 }
996
997 zfs_range_unlock(rl);
998
999 /*
1000 * If we're in replay mode, or we made no progress, return error.
1001 * Otherwise, it's at least a partial write, so it's successful.
1002 */
1003 if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1004 ZFS_EXIT(zfsvfs);
1005 return (error);
1006 }
1007
1008 if (ioflag & (FSYNC | FDSYNC) ||
1009 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1010 zil_commit(zilog, zp->z_id);
1011
1012 ZFS_EXIT(zfsvfs);
1013 return (0);
1014 }
4241 ASSERT3U(len, <=, PAGESIZE);
4242 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
4243 zfs_unmap_page(pp, va);
4244 } else {
4245 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
4246 }
4247
4248 if (err == 0) {
4249 uint64_t mtime[2], ctime[2];
4250 sa_bulk_attr_t bulk[3];
4251 int count = 0;
4252
4253 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4254 &mtime, 16);
4255 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4256 &ctime, 16);
4257 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4258 &zp->z_pflags, 8);
4259 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4260 B_TRUE);
4261 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4262
4263 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4264 }
4265 dmu_tx_commit(tx);
4266
4267 out:
4268 pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
4269 if (offp)
4270 *offp = off;
4271 if (lenp)
4272 *lenp = len;
4273
4274 return (err);
4275 }
4276
4277 /*
4278 * Copy the portion of the file indicated from pages into the file.
4279 * The pages are stored in a page list attached to the files vnode.
4280 *
4281 * IN: vp - vnode of file to push page data to.
4282 * off - position in file to put data.
4778 * close()
4779 * <time lapse>
4780 * putpage() via fsflush
4781 *
4782 * If we wait until fsflush to come along, we can have a modification time that
4783 * is some arbitrary point in the future. In order to prevent this in the
4784 * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is
4785 * torn down.
4786 */
4787 /* ARGSUSED */
4788 static int
4789 zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4790 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
4791 caller_context_t *ct)
4792 {
4793 uint64_t pages = btopr(len);
4794
4795 ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
4796 atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
4797
4798 return (0);
4799 }
4800
4801 /*
4802 * Free or allocate space in a file. Currently, this function only
4803 * supports the `F_FREESP' command. However, this command is somewhat
4804 * misnamed, as its functionality includes the ability to allocate as
4805 * well as free space.
4806 *
4807 * IN: vp - vnode of file to free data in.
4808 * cmd - action to take (only F_FREESP supported).
4809 * bfp - section of file to free/alloc.
4810 * flag - current file open mode flags.
4811 * offset - current file offset.
4812 * cr - credentials of caller [UNUSED].
4813 * ct - caller context.
4814 *
4815 * RETURN: 0 on success, error code on failure.
4816 *
4817 * Timestamps:
|
669 iovec_t *aiov = NULL;
670 xuio_t *xuio = NULL;
671 int i_iov = 0;
672 int iovcnt = uio->uio_iovcnt;
673 iovec_t *iovp = uio->uio_iov;
674 int write_eof;
675 int count = 0;
676 sa_bulk_attr_t bulk[4];
677 uint64_t mtime[2], ctime[2];
678
679 /*
680 * Fasttrack empty write
681 */
682 n = start_resid;
683 if (n == 0)
684 return (0);
685
686 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
687 limit = MAXOFFSET_T;
688
689 ZFS_ENTER(zfsvfs);
690 ZFS_VERIFY_ZP(zp);
691
692 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
693 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
694 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
695 &zp->z_size, 8);
696 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
697 &zp->z_pflags, 8);
698
699 /*
700 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
701 * callers might not be able to detect properly that we are read-only,
702 * so check it explicitly here.
703 */
704 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
705 ZFS_EXIT(zfsvfs);
706 return (SET_ERROR(EROFS));
707 }
708
721 /*
722 * Validate file offset
723 */
724 woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
725 if (woff < 0) {
726 ZFS_EXIT(zfsvfs);
727 return (SET_ERROR(EINVAL));
728 }
729
730 /*
731 * Check for mandatory locks before calling zfs_range_lock()
732 * in order to prevent a deadlock with locks set via fcntl().
733 */
734 if (MANDMODE((mode_t)zp->z_mode) &&
735 (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
736 ZFS_EXIT(zfsvfs);
737 return (error);
738 }
739
740 /*
741 * Pre-fault the pages to ensure slow (eg NFS) pages
742 * don't hold up txg.
743 * Skip this if uio contains loaned arc_buf.
744 */
745 if ((uio->uio_extflg == UIO_XUIO) &&
746 (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
747 xuio = (xuio_t *)uio;
748 else
749 uio_prefaultpages(MIN(n, max_blksz), uio);
750
751 /*
752 * If in append mode, set the io offset pointer to eof.
753 */
754 if (ioflag & FAPPEND) {
755 /*
756 * Obtain an appending range lock to guarantee file append
757 * semantics. We reset the write offset once we have the lock.
758 */
759 rl = zfs_range_lock(zp, 0, n, RL_APPEND);
760 woff = rl->r_off;
761 if (rl->r_len == UINT64_MAX) {
762 /*
763 * We overlocked the file because this write will cause
764 * the file block size to increase.
765 * Note that zp_size cannot change with this lock held.
766 */
767 woff = zp->z_size;
768 }
769 uio->uio_loffset = woff;
770 } else {
771 /*
975 * the file size to the specified eof. Note, there's no
976 * concurrency during replay.
977 */
978 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
979 zp->z_size = zfsvfs->z_replay_eof;
980
981 /*
982 * Keep track of a possible pre-existing error from a partial
983 * write via dmu_write_uio_dbuf above.
984 */
985 prev_error = error;
986 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
987
988 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
989 dmu_tx_commit(tx);
990
991 if (prev_error != 0 || error != 0)
992 break;
993 ASSERT(tx_bytes == nbytes);
994 n -= nbytes;
995
996 if (!xuio && n > 0)
997 uio_prefaultpages(MIN(n, max_blksz), uio);
998 }
999
1000 zfs_range_unlock(rl);
1001
1002 /*
1003 * If we're in replay mode, or we made no progress, return error.
1004 * Otherwise, it's at least a partial write, so it's successful.
1005 */
1006 if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1007 ZFS_EXIT(zfsvfs);
1008 return (error);
1009 }
1010
1011 if (ioflag & (FSYNC | FDSYNC) ||
1012 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1013 zil_commit(zilog, zp->z_id);
1014
1015 ZFS_EXIT(zfsvfs);
1016 return (0);
1017 }
4244 ASSERT3U(len, <=, PAGESIZE);
4245 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
4246 zfs_unmap_page(pp, va);
4247 } else {
4248 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
4249 }
4250
4251 if (err == 0) {
4252 uint64_t mtime[2], ctime[2];
4253 sa_bulk_attr_t bulk[3];
4254 int count = 0;
4255
4256 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4257 &mtime, 16);
4258 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4259 &ctime, 16);
4260 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4261 &zp->z_pflags, 8);
4262 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4263 B_TRUE);
4264 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4265 }
4266 dmu_tx_commit(tx);
4267
4268 out:
4269 pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
4270 if (offp)
4271 *offp = off;
4272 if (lenp)
4273 *lenp = len;
4274
4275 return (err);
4276 }
4277
4278 /*
4279 * Copy the portion of the file indicated from pages into the file.
4280 * The pages are stored in a page list attached to the files vnode.
4281 *
4282 * IN: vp - vnode of file to push page data to.
4283 * off - position in file to put data.
4779 * close()
4780 * <time lapse>
4781 * putpage() via fsflush
4782 *
4783 * If we wait until fsflush to come along, we can have a modification time that
4784 * is some arbitrary point in the future. In order to prevent this in the
4785 * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is
4786 * torn down.
4787 */
4788 /* ARGSUSED */
4789 static int
4790 zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4791 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
4792 caller_context_t *ct)
4793 {
4794 uint64_t pages = btopr(len);
4795
4796 ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
4797 atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
4798
4799 if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
4800 vn_has_cached_data(vp))
4801 (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
4802
4803 return (0);
4804 }
4805
4806 /*
4807 * Free or allocate space in a file. Currently, this function only
4808 * supports the `F_FREESP' command. However, this command is somewhat
4809 * misnamed, as its functionality includes the ability to allocate as
4810 * well as free space.
4811 *
4812 * IN: vp - vnode of file to free data in.
4813 * cmd - action to take (only F_FREESP supported).
4814 * bfp - section of file to free/alloc.
4815 * flag - current file open mode flags.
4816 * offset - current file offset.
4817 * cr - credentials of caller [UNUSED].
4818 * ct - caller context.
4819 *
4820 * RETURN: 0 on success, error code on failure.
4821 *
4822 * Timestamps:
|