Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
NEX-5367 special vdev: sync-write options (NEW)
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
6250 zvol_dump_init() can hold txg open
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Albert Lee <trisk@omniti.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-1142 move rwlock to vdev to protect vdev_tsd
not just ldi handle.
This way we serialize open/close, yet allow parallel I/O.
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-1065 Added serialization to avoid race
between ldi notification and I/O path.
Also fixes OS-124, NEX-1051, NEX-1062.

*** 24,33 **** --- 24,34 ---- * Portions Copyright 2010 Robert Milkowski * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ /* * ZFS volume emulation driver.
*** 86,95 **** --- 87,97 ---- #include <sys/zil_impl.h> #include <sys/dbuf.h> #include <sys/dmu_tx.h> #include <sys/zfeature.h> #include <sys/zio_checksum.h> + #include <sys/dkioc_free_util.h> #include <sys/zil_impl.h> #include "zfs_namecheck.h" void *zfsdev_state;
*** 970,990 **** mutex_exit(&zfsdev_state_lock); return (error); } static void zvol_get_done(zgd_t *zgd, int error) { if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); zfs_range_unlock(zgd->zgd_rl); - if (error == 0 && zgd->zgd_bp) - zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); - kmem_free(zgd, sizeof (zgd_t)); } /* * Get data to generate a TX_WRITE intent log record. --- 972,990 ---- mutex_exit(&zfsdev_state_lock); return (error); } + /* ARGSUSED */ static void zvol_get_done(zgd_t *zgd, int error) { if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); zfs_range_unlock(zgd->zgd_rl); kmem_free(zgd, sizeof (zgd_t)); } /* * Get data to generate a TX_WRITE intent log record.
*** 1067,1086 **** zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, boolean_t sync) { uint32_t blocksize = zv->zv_volblocksize; zilog_t *zilog = zv->zv_zilog; itx_wr_state_t write_state; if (zil_replaying(zilog, tx)) return; ! if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) write_state = WR_INDIRECT; else if (!spa_has_slogs(zilog->zl_spa) && resid >= blocksize && blocksize > zvol_immediate_write_sz) write_state = WR_INDIRECT; else if (sync) write_state = WR_COPIED; else write_state = WR_NEED_COPY; --- 1067,1115 ---- zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, boolean_t sync) { uint32_t blocksize = zv->zv_volblocksize; zilog_t *zilog = zv->zv_zilog; + spa_t *spa = zilog->zl_spa; + spa_meta_placement_t *mp = &spa->spa_meta_policy; + boolean_t slogging, zil_to_special, write_to_special; + ssize_t immediate_write_sz; itx_wr_state_t write_state; if (zil_replaying(zilog, tx)) return; ! /* ! * See comments in zfs_log_write() ! */ ! ! immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) ! ? 0 : zvol_immediate_write_sz; ! ! zil_to_special = !spa_has_slogs(spa) && ! spa_can_special_be_used(spa) && ! mp->spa_sync_to_special != SYNC_TO_SPECIAL_DISABLED; ! ! write_to_special = !spa_has_slogs(spa) && ! spa_write_data_to_special(spa, zilog->zl_os) && ! (mp->spa_sync_to_special == SYNC_TO_SPECIAL_ALWAYS || ! (mp->spa_sync_to_special == SYNC_TO_SPECIAL_BALANCED && ! spa->spa_avg_stat_rotor % 100 < spa->spa_special_to_normal_ratio)); ! ! slogging = (spa_has_slogs(spa) || zil_to_special) && ! (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); ! ! if (blocksize > immediate_write_sz && !slogging && ! resid >= blocksize && off % blocksize == 0) write_state = WR_INDIRECT; + else if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + write_state = WR_INDIRECT; else if (!spa_has_slogs(zilog->zl_spa) && resid >= blocksize && blocksize > zvol_immediate_write_sz) write_state = WR_INDIRECT; + else if (write_to_special) + write_state = WR_INDIRECT; else if (sync) write_state = WR_COPIED; else write_state = WR_NEED_COPY;
*** 1126,1136 **** static int zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, uint64_t size, boolean_t doread, boolean_t isdump) { vdev_disk_t *dvd; ! int c; int numerrors = 0; if (vd->vdev_ops == &vdev_mirror_ops || vd->vdev_ops == &vdev_replacing_ops || vd->vdev_ops == &vdev_spare_ops) { --- 1155,1165 ---- static int zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, uint64_t size, boolean_t doread, boolean_t isdump) { vdev_disk_t *dvd; ! int c, rc; int numerrors = 0; if (vd->vdev_ops == &vdev_mirror_ops || vd->vdev_ops == &vdev_replacing_ops || vd->vdev_ops == &vdev_spare_ops) {
*** 1158,1181 **** addr, size, offset, origoffset, doread, isdump)); } offset += VDEV_LABEL_START_SIZE; if (ddi_in_panic() || isdump) { ASSERT(!doread); ! if (doread) return (SET_ERROR(EIO)); ! dvd = vd->vdev_tsd; ASSERT3P(dvd, !=, NULL); ! return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), ! lbtodb(size))); } else { ! dvd = vd->vdev_tsd; ASSERT3P(dvd, !=, NULL); ! return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, ! offset, doread ? B_READ : B_WRITE)); } } static int zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, boolean_t doread, boolean_t isdump) --- 1187,1228 ---- addr, size, offset, origoffset, doread, isdump)); } offset += VDEV_LABEL_START_SIZE; + rw_enter(&vd->vdev_tsd_lock, RW_READER); + dvd = vd->vdev_tsd; if (ddi_in_panic() || isdump) { ASSERT(!doread); ! if (doread) { ! rw_exit(&vd->vdev_tsd_lock); return (SET_ERROR(EIO)); ! } ! /* We assume here dvd is not NULL */ ASSERT3P(dvd, !=, NULL); ! ! /* If our assumption is wrong, we do not want to crash */ ! if (dvd != NULL && dvd->vd_lh != NULL) { ! rc = ldi_dump(dvd->vd_lh, addr, lbtodb(offset), ! lbtodb(size)); } else { ! rc = SET_ERROR(ENXIO); ! } ! } else { ! /* We assume here dvd is not NULL */ ASSERT3P(dvd, !=, NULL); ! ! /* If our assumption is wrong, we do not want to crash */ ! if (dvd != NULL && dvd->vd_lh != NULL) { ! rc = vdev_disk_ldi_physio(dvd->vd_lh, addr, size, ! offset, doread ? B_READ : B_WRITE); ! } else { ! rc = SET_ERROR(ENXIO); } + } + rw_exit(&vd->vdev_tsd_lock); + return (rc); } static int zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, boolean_t doread, boolean_t isdump)
*** 1778,1825 **** zfs_range_unlock(rl); break; case DKIOCFREE: { ! dkioc_free_t df; dmu_tx_t *tx; if (!zvol_unmap_enabled) break; ! if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { error = SET_ERROR(EFAULT); break; } /* ! * Apply Postel's Law to length-checking. If they overshoot, ! * just blank out until the end, if there's a need to blank ! * out anything. */ ! if (df.df_start >= zv->zv_volsize) ! break; /* No need to do anything... */ ! mutex_exit(&zfsdev_state_lock); ! ! rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); - dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) { dmu_tx_abort(tx); } else { ! zvol_log_truncate(zv, tx, df.df_start, ! df.df_length, B_TRUE); dmu_tx_commit(tx); ! error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, ! df.df_start, df.df_length); } zfs_range_unlock(rl); /* * If the write-cache is disabled, 'sync' property * is set to 'always', or if the caller is asking for * a synchronous free, commit this operation to the zil. * This will sync any previous uncommitted writes to the --- 1825,1889 ---- zfs_range_unlock(rl); break; case DKIOCFREE: { ! dkioc_free_list_t *dfl; dmu_tx_t *tx; + mutex_exit(&zfsdev_state_lock); + if (!zvol_unmap_enabled) break; ! if (!(flag & FKIOCTL)) { ! dfl = dfl_copyin((void *)arg, flag, KM_SLEEP); ! if (dfl == NULL) { error = SET_ERROR(EFAULT); break; } + } else { + dfl = (dkioc_free_list_t *)arg; + } + for (int i = 0; i < dfl->dfl_num_exts; i++) { + uint64_t start = dfl->dfl_exts[i].dfle_start, + length = dfl->dfl_exts[i].dfle_length, + end = start + length; + /* ! * Apply Postel's Law to length-checking. If they ! * overshoot, just blank out until the end, if there's ! * a need to blank out anything. */ ! if (start >= zv->zv_volsize) ! continue; /* No need to do anything... */ ! if (end > zv->zv_volsize) { ! end = DMU_OBJECT_END; ! length = end - start; ! } ! rl = zfs_range_lock(&zv->zv_znode, start, length, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) { dmu_tx_abort(tx); } else { ! zvol_log_truncate(zv, tx, start, length, ! B_TRUE); dmu_tx_commit(tx); ! error = dmu_free_long_range(zv->zv_objset, ! ZVOL_OBJ, start, length); } zfs_range_unlock(rl); + if (error != 0) + break; + } + /* * If the write-cache is disabled, 'sync' property * is set to 'always', or if the caller is asking for * a synchronous free, commit this operation to the zil. * This will sync any previous uncommitted writes to the
*** 1827,1840 **** * Can be overridden by the zvol_unmap_sync_enabled tunable. */ if ((error == 0) && zvol_unmap_sync_enabled && (!(zv->zv_flags & ZVOL_WCE) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) || ! (df.df_flags & DF_WAIT_SYNC))) { zil_commit(zv->zv_zilog, ZVOL_OBJ); } return (error); } default: error = SET_ERROR(ENOTTY); --- 1891,1907 ---- * Can be overridden by the zvol_unmap_sync_enabled tunable. */ if ((error == 0) && zvol_unmap_sync_enabled && (!(zv->zv_flags & ZVOL_WCE) || (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) || ! (dfl->dfl_flags & DF_WAIT_SYNC))) { zil_commit(zv->zv_zilog, ZVOL_OBJ); } + if (!(flag & FKIOCTL)) + dfl_free(dfl); + return (error); } default: error = SET_ERROR(ENOTTY);