io-lx-public-vs-joyent Sdiff usr/src/uts/common/fs/zfs/zfs

Print this page

 669         iovec_t         *aiov = NULL;
 670         xuio_t          *xuio = NULL;
 671         int             i_iov = 0;
 672         int             iovcnt = uio->uio_iovcnt;
 673         iovec_t         *iovp = uio->uio_iov;
 674         int             write_eof;
 675         int             count = 0;
 676         sa_bulk_attr_t  bulk[4];
 677         uint64_t        mtime[2], ctime[2];
 678 
 679         /*
 680          * Fasttrack empty write
 681          */
 682         n = start_resid;
 683         if (n == 0)
 684                 return (0);
 685 
 686         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 687                 limit = MAXOFFSET_T;
 688 
 689         /*
 690          * Pre-fault the pages to ensure slow (eg NFS) pages
 691          * don't hold up txg.
 692          * Skip this if uio contains loaned arc_buf.
 693          */
 694         if ((uio->uio_extflg == UIO_XUIO) &&
 695             (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
 696                 xuio = (xuio_t *)uio;
 697         else
 698                 uio_prefaultpages(n, uio);
 699 
 700         ZFS_ENTER(zfsvfs);
 701         ZFS_VERIFY_ZP(zp);
 702 
 703         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 704         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 705         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 706             &zp->z_size, 8);
 707         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 708             &zp->z_pflags, 8);
 709 
 710         /*
 711          * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
 712          * callers might not be able to detect properly that we are read-only,
 713          * so check it explicitly here.
 714          */
 715         if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 716                 ZFS_EXIT(zfsvfs);
 717                 return (SET_ERROR(EROFS));
 718         }
 719

 732         /*
 733          * Validate file offset
 734          */
 735         woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 736         if (woff < 0) {
 737                 ZFS_EXIT(zfsvfs);
 738                 return (SET_ERROR(EINVAL));
 739         }
 740 
 741         /*
 742          * Check for mandatory locks before calling zfs_range_lock()
 743          * in order to prevent a deadlock with locks set via fcntl().
 744          */
 745         if (MANDMODE((mode_t)zp->z_mode) &&
 746             (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
 747                 ZFS_EXIT(zfsvfs);
 748                 return (error);
 749         }
 750 
 751         /*











 752          * If in append mode, set the io offset pointer to eof.
 753          */
 754         if (ioflag & FAPPEND) {
 755                 /*
 756                  * Obtain an appending range lock to guarantee file append
 757                  * semantics.  We reset the write offset once we have the lock.
 758                  */
 759                 rl = zfs_range_lock(zp, 0, n, RL_APPEND);
 760                 woff = rl->r_off;
 761                 if (rl->r_len == UINT64_MAX) {
 762                         /*
 763                          * We overlocked the file because this write will cause
 764                          * the file block size to increase.
 765                          * Note that zp_size cannot change with this lock held.
 766                          */
 767                         woff = zp->z_size;
 768                 }
 769                 uio->uio_loffset = woff;
 770         } else {
 771                 /*

 975                  * the file size to the specified eof. Note, there's no
 976                  * concurrency during replay.
 977                  */
 978                 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 979                         zp->z_size = zfsvfs->z_replay_eof;
 980 
 981                 /*
 982                  * Keep track of a possible pre-existing error from a partial
 983                  * write via dmu_write_uio_dbuf above.
 984                  */
 985                 prev_error = error;
 986                 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 987 
 988                 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 989                 dmu_tx_commit(tx);
 990 
 991                 if (prev_error != 0 || error != 0)
 992                         break;
 993                 ASSERT(tx_bytes == nbytes);
 994                 n -= nbytes;



 995         }
 996 
 997         zfs_range_unlock(rl);
 998 
 999         /*
1000          * If we're in replay mode, or we made no progress, return error.
1001          * Otherwise, it's at least a partial write, so it's successful.
1002          */
1003         if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1004                 ZFS_EXIT(zfsvfs);
1005                 return (error);
1006         }
1007 
1008         if (ioflag & (FSYNC | FDSYNC) ||
1009             zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1010                 zil_commit(zilog, zp->z_id);
1011 
1012         ZFS_EXIT(zfsvfs);
1013         return (0);
1014 }

4241                 ASSERT3U(len, <=, PAGESIZE);
4242                 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
4243                 zfs_unmap_page(pp, va);
4244         } else {
4245                 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
4246         }
4247 
4248         if (err == 0) {
4249                 uint64_t mtime[2], ctime[2];
4250                 sa_bulk_attr_t bulk[3];
4251                 int count = 0;
4252 
4253                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4254                     &mtime, 16);
4255                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4256                     &ctime, 16);
4257                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4258                     &zp->z_pflags, 8);
4259                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4260                     B_TRUE);
4261                 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4262 
4263                 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4264         }
4265         dmu_tx_commit(tx);
4266 
4267 out:
4268         pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
4269         if (offp)
4270                 *offp = off;
4271         if (lenp)
4272                 *lenp = len;
4273 
4274         return (err);
4275 }
4276 
4277 /*
4278  * Copy the portion of the file indicated from pages into the file.
4279  * The pages are stored in a page list attached to the files vnode.
4280  *
4281  *      IN:     vp      - vnode of file to push page data to.
4282  *              off     - position in file to put data.

4778  *      close()
4779  *      <time lapse>
4780  *      putpage() via fsflush
4781  *
4782  * If we wait until fsflush to come along, we can have a modification time that
4783  * is some arbitrary point in the future.  In order to prevent this in the
4784  * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is
4785  * torn down.
4786  */
4787 /* ARGSUSED */
4788 static int
4789 zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4790     size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
4791     caller_context_t *ct)
4792 {
4793         uint64_t pages = btopr(len);
4794 
4795         ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
4796         atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
4797 




4798         return (0);
4799 }
4800 
4801 /*
4802  * Free or allocate space in a file.  Currently, this function only
4803  * supports the `F_FREESP' command.  However, this command is somewhat
4804  * misnamed, as its functionality includes the ability to allocate as
4805  * well as free space.
4806  *
4807  *      IN:     vp      - vnode of file to free data in.
4808  *              cmd     - action to take (only F_FREESP supported).
4809  *              bfp     - section of file to free/alloc.
4810  *              flag    - current file open mode flags.
4811  *              offset  - current file offset.
4812  *              cr      - credentials of caller [UNUSED].
4813  *              ct      - caller context.
4814  *
4815  *      RETURN: 0 on success, error code on failure.
4816  *
4817  * Timestamps:

 669         iovec_t         *aiov = NULL;
 670         xuio_t          *xuio = NULL;
 671         int             i_iov = 0;
 672         int             iovcnt = uio->uio_iovcnt;
 673         iovec_t         *iovp = uio->uio_iov;
 674         int             write_eof;
 675         int             count = 0;
 676         sa_bulk_attr_t  bulk[4];
 677         uint64_t        mtime[2], ctime[2];
 678 
 679         /*
 680          * Fasttrack empty write
 681          */
 682         n = start_resid;
 683         if (n == 0)
 684                 return (0);
 685 
 686         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 687                 limit = MAXOFFSET_T;
 688 











 689         ZFS_ENTER(zfsvfs);
 690         ZFS_VERIFY_ZP(zp);
 691 
 692         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 693         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 694         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 695             &zp->z_size, 8);
 696         SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 697             &zp->z_pflags, 8);
 698 
 699         /*
 700          * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
 701          * callers might not be able to detect properly that we are read-only,
 702          * so check it explicitly here.
 703          */
 704         if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 705                 ZFS_EXIT(zfsvfs);
 706                 return (SET_ERROR(EROFS));
 707         }
 708

 721         /*
 722          * Validate file offset
 723          */
 724         woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 725         if (woff < 0) {
 726                 ZFS_EXIT(zfsvfs);
 727                 return (SET_ERROR(EINVAL));
 728         }
 729 
 730         /*
 731          * Check for mandatory locks before calling zfs_range_lock()
 732          * in order to prevent a deadlock with locks set via fcntl().
 733          */
 734         if (MANDMODE((mode_t)zp->z_mode) &&
 735             (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
 736                 ZFS_EXIT(zfsvfs);
 737                 return (error);
 738         }
 739 
 740         /*
 741          * Pre-fault the pages to ensure slow (eg NFS) pages
 742          * don't hold up txg.
 743          * Skip this if uio contains loaned arc_buf.
 744          */
 745         if ((uio->uio_extflg == UIO_XUIO) &&
 746             (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
 747                 xuio = (xuio_t *)uio;
 748         else
 749                 uio_prefaultpages(MIN(n, max_blksz), uio);
 750 
 751         /*
 752          * If in append mode, set the io offset pointer to eof.
 753          */
 754         if (ioflag & FAPPEND) {
 755                 /*
 756                  * Obtain an appending range lock to guarantee file append
 757                  * semantics.  We reset the write offset once we have the lock.
 758                  */
 759                 rl = zfs_range_lock(zp, 0, n, RL_APPEND);
 760                 woff = rl->r_off;
 761                 if (rl->r_len == UINT64_MAX) {
 762                         /*
 763                          * We overlocked the file because this write will cause
 764                          * the file block size to increase.
 765                          * Note that zp_size cannot change with this lock held.
 766                          */
 767                         woff = zp->z_size;
 768                 }
 769                 uio->uio_loffset = woff;
 770         } else {
 771                 /*

 975                  * the file size to the specified eof. Note, there's no
 976                  * concurrency during replay.
 977                  */
 978                 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 979                         zp->z_size = zfsvfs->z_replay_eof;
 980 
 981                 /*
 982                  * Keep track of a possible pre-existing error from a partial
 983                  * write via dmu_write_uio_dbuf above.
 984                  */
 985                 prev_error = error;
 986                 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 987 
 988                 zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 989                 dmu_tx_commit(tx);
 990 
 991                 if (prev_error != 0 || error != 0)
 992                         break;
 993                 ASSERT(tx_bytes == nbytes);
 994                 n -= nbytes;
 995 
 996                 if (!xuio && n > 0)
 997                         uio_prefaultpages(MIN(n, max_blksz), uio);
 998         }
 999 
1000         zfs_range_unlock(rl);
1001 
1002         /*
1003          * If we're in replay mode, or we made no progress, return error.
1004          * Otherwise, it's at least a partial write, so it's successful.
1005          */
1006         if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
1007                 ZFS_EXIT(zfsvfs);
1008                 return (error);
1009         }
1010 
1011         if (ioflag & (FSYNC | FDSYNC) ||
1012             zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1013                 zil_commit(zilog, zp->z_id);
1014 
1015         ZFS_EXIT(zfsvfs);
1016         return (0);
1017 }

4244                 ASSERT3U(len, <=, PAGESIZE);
4245                 dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
4246                 zfs_unmap_page(pp, va);
4247         } else {
4248                 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
4249         }
4250 
4251         if (err == 0) {
4252                 uint64_t mtime[2], ctime[2];
4253                 sa_bulk_attr_t bulk[3];
4254                 int count = 0;
4255 
4256                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4257                     &mtime, 16);
4258                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4259                     &ctime, 16);
4260                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4261                     &zp->z_pflags, 8);
4262                 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4263                     B_TRUE);


4264                 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4265         }
4266         dmu_tx_commit(tx);
4267 
4268 out:
4269         pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
4270         if (offp)
4271                 *offp = off;
4272         if (lenp)
4273                 *lenp = len;
4274 
4275         return (err);
4276 }
4277 
4278 /*
4279  * Copy the portion of the file indicated from pages into the file.
4280  * The pages are stored in a page list attached to the files vnode.
4281  *
4282  *      IN:     vp      - vnode of file to push page data to.
4283  *              off     - position in file to put data.

4779  *      close()
4780  *      <time lapse>
4781  *      putpage() via fsflush
4782  *
4783  * If we wait until fsflush to come along, we can have a modification time that
4784  * is some arbitrary point in the future.  In order to prevent this in the
4785  * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is
4786  * torn down.
4787  */
4788 /* ARGSUSED */
4789 static int
4790 zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4791     size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
4792     caller_context_t *ct)
4793 {
4794         uint64_t pages = btopr(len);
4795 
4796         ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
4797         atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
4798 
4799         if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
4800             vn_has_cached_data(vp))
4801                 (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
4802 
4803         return (0);
4804 }
4805 
4806 /*
4807  * Free or allocate space in a file.  Currently, this function only
4808  * supports the `F_FREESP' command.  However, this command is somewhat
4809  * misnamed, as its functionality includes the ability to allocate as
4810  * well as free space.
4811  *
4812  *      IN:     vp      - vnode of file to free data in.
4813  *              cmd     - action to take (only F_FREESP supported).
4814  *              bfp     - section of file to free/alloc.
4815  *              flag    - current file open mode flags.
4816  *              offset  - current file offset.
4817  *              cr      - credentials of caller [UNUSED].
4818  *              ct      - caller context.
4819  *
4820  *      RETURN: 0 on success, error code on failure.
4821  *
4822  * Timestamps: