Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
NEX-5367 special vdev: sync-write options (NEW)
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
6250 zvol_dump_init() can hold txg open
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Albert Lee <trisk@omniti.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
usr/src/uts/common/io/scsi/targets/sd.c
usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-1142 move rwlock to vdev to protect vdev_tsd
not just ldi handle.
This way we serialize open/close, yet allow parallel I/O.
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-1065 Added serialization to avoid race
between ldi notification and I/O path.
Also fixes OS-124, NEX-1051, NEX-1062.
*** 24,33 ****
--- 24,34 ----
* Portions Copyright 2010 Robert Milkowski
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
/*
* ZFS volume emulation driver.
*** 86,95 ****
--- 87,97 ----
#include <sys/zil_impl.h>
#include <sys/dbuf.h>
#include <sys/dmu_tx.h>
#include <sys/zfeature.h>
#include <sys/zio_checksum.h>
+ #include <sys/dkioc_free_util.h>
#include <sys/zil_impl.h>
#include "zfs_namecheck.h"
void *zfsdev_state;
*** 970,990 ****
mutex_exit(&zfsdev_state_lock);
return (error);
}
static void
zvol_get_done(zgd_t *zgd, int error)
{
if (zgd->zgd_db)
dmu_buf_rele(zgd->zgd_db, zgd);
zfs_range_unlock(zgd->zgd_rl);
- if (error == 0 && zgd->zgd_bp)
- zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
kmem_free(zgd, sizeof (zgd_t));
}
/*
* Get data to generate a TX_WRITE intent log record.
--- 972,990 ----
mutex_exit(&zfsdev_state_lock);
return (error);
}
+ /* ARGSUSED */
static void
zvol_get_done(zgd_t *zgd, int error)
{
if (zgd->zgd_db)
dmu_buf_rele(zgd->zgd_db, zgd);
zfs_range_unlock(zgd->zgd_rl);
kmem_free(zgd, sizeof (zgd_t));
}
/*
* Get data to generate a TX_WRITE intent log record.
*** 1067,1086 ****
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
boolean_t sync)
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
if (zil_replaying(zilog, tx))
return;
! if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
write_state = WR_INDIRECT;
else if (!spa_has_slogs(zilog->zl_spa) &&
resid >= blocksize && blocksize > zvol_immediate_write_sz)
write_state = WR_INDIRECT;
else if (sync)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
--- 1067,1115 ----
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
boolean_t sync)
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
+ spa_t *spa = zilog->zl_spa;
+ spa_meta_placement_t *mp = &spa->spa_meta_policy;
+ boolean_t slogging, zil_to_special, write_to_special;
+ ssize_t immediate_write_sz;
itx_wr_state_t write_state;
if (zil_replaying(zilog, tx))
return;
! /*
! * See comments in zfs_log_write()
! */
!
! immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
! ? 0 : zvol_immediate_write_sz;
!
! zil_to_special = !spa_has_slogs(spa) &&
! spa_can_special_be_used(spa) &&
! mp->spa_sync_to_special != SYNC_TO_SPECIAL_DISABLED;
!
! write_to_special = !spa_has_slogs(spa) &&
! spa_write_data_to_special(spa, zilog->zl_os) &&
! (mp->spa_sync_to_special == SYNC_TO_SPECIAL_ALWAYS ||
! (mp->spa_sync_to_special == SYNC_TO_SPECIAL_BALANCED &&
! spa->spa_avg_stat_rotor % 100 < spa->spa_special_to_normal_ratio));
!
! slogging = (spa_has_slogs(spa) || zil_to_special) &&
! (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
!
! if (blocksize > immediate_write_sz && !slogging &&
! resid >= blocksize && off % blocksize == 0)
write_state = WR_INDIRECT;
+ else if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
else if (!spa_has_slogs(zilog->zl_spa) &&
resid >= blocksize && blocksize > zvol_immediate_write_sz)
write_state = WR_INDIRECT;
+ else if (write_to_special)
+ write_state = WR_INDIRECT;
else if (sync)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
*** 1126,1136 ****
static int
zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
uint64_t size, boolean_t doread, boolean_t isdump)
{
vdev_disk_t *dvd;
! int c;
int numerrors = 0;
if (vd->vdev_ops == &vdev_mirror_ops ||
vd->vdev_ops == &vdev_replacing_ops ||
vd->vdev_ops == &vdev_spare_ops) {
--- 1155,1165 ----
static int
zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
uint64_t size, boolean_t doread, boolean_t isdump)
{
vdev_disk_t *dvd;
! int c, rc;
int numerrors = 0;
if (vd->vdev_ops == &vdev_mirror_ops ||
vd->vdev_ops == &vdev_replacing_ops ||
vd->vdev_ops == &vdev_spare_ops) {
*** 1158,1181 ****
addr, size, offset, origoffset, doread, isdump));
}
offset += VDEV_LABEL_START_SIZE;
if (ddi_in_panic() || isdump) {
ASSERT(!doread);
! if (doread)
return (SET_ERROR(EIO));
! dvd = vd->vdev_tsd;
ASSERT3P(dvd, !=, NULL);
! return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
! lbtodb(size)));
} else {
! dvd = vd->vdev_tsd;
ASSERT3P(dvd, !=, NULL);
! return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
! offset, doread ? B_READ : B_WRITE));
}
}
static int
zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
boolean_t doread, boolean_t isdump)
--- 1187,1228 ----
addr, size, offset, origoffset, doread, isdump));
}
offset += VDEV_LABEL_START_SIZE;
+ rw_enter(&vd->vdev_tsd_lock, RW_READER);
+ dvd = vd->vdev_tsd;
if (ddi_in_panic() || isdump) {
ASSERT(!doread);
! if (doread) {
! rw_exit(&vd->vdev_tsd_lock);
return (SET_ERROR(EIO));
! }
! /* We assume here dvd is not NULL */
ASSERT3P(dvd, !=, NULL);
!
! /* If our assumption is wrong, we do not want to crash */
! if (dvd != NULL && dvd->vd_lh != NULL) {
! rc = ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
! lbtodb(size));
} else {
! rc = SET_ERROR(ENXIO);
! }
! } else {
! /* We assume here dvd is not NULL */
ASSERT3P(dvd, !=, NULL);
!
! /* If our assumption is wrong, we do not want to crash */
! if (dvd != NULL && dvd->vd_lh != NULL) {
! rc = vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
! offset, doread ? B_READ : B_WRITE);
! } else {
! rc = SET_ERROR(ENXIO);
}
+ }
+ rw_exit(&vd->vdev_tsd_lock);
+ return (rc);
}
static int
zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
boolean_t doread, boolean_t isdump)
*** 1778,1825 ****
zfs_range_unlock(rl);
break;
case DKIOCFREE:
{
! dkioc_free_t df;
dmu_tx_t *tx;
if (!zvol_unmap_enabled)
break;
! if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
error = SET_ERROR(EFAULT);
break;
}
/*
! * Apply Postel's Law to length-checking. If they overshoot,
! * just blank out until the end, if there's a need to blank
! * out anything.
*/
! if (df.df_start >= zv->zv_volsize)
! break; /* No need to do anything... */
! mutex_exit(&zfsdev_state_lock);
!
! rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
RL_WRITER);
tx = dmu_tx_create(zv->zv_objset);
- dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error != 0) {
dmu_tx_abort(tx);
} else {
! zvol_log_truncate(zv, tx, df.df_start,
! df.df_length, B_TRUE);
dmu_tx_commit(tx);
! error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
! df.df_start, df.df_length);
}
zfs_range_unlock(rl);
/*
* If the write-cache is disabled, 'sync' property
* is set to 'always', or if the caller is asking for
* a synchronous free, commit this operation to the zil.
* This will sync any previous uncommitted writes to the
--- 1825,1889 ----
zfs_range_unlock(rl);
break;
case DKIOCFREE:
{
! dkioc_free_list_t *dfl;
dmu_tx_t *tx;
+ mutex_exit(&zfsdev_state_lock);
+
if (!zvol_unmap_enabled)
break;
! if (!(flag & FKIOCTL)) {
! dfl = dfl_copyin((void *)arg, flag, KM_SLEEP);
! if (dfl == NULL) {
error = SET_ERROR(EFAULT);
break;
}
+ } else {
+ dfl = (dkioc_free_list_t *)arg;
+ }
+ for (int i = 0; i < dfl->dfl_num_exts; i++) {
+ uint64_t start = dfl->dfl_exts[i].dfle_start,
+ length = dfl->dfl_exts[i].dfle_length,
+ end = start + length;
+
/*
! * Apply Postel's Law to length-checking. If they
! * overshoot, just blank out until the end, if there's
! * a need to blank out anything.
*/
! if (start >= zv->zv_volsize)
! continue; /* No need to do anything... */
! if (end > zv->zv_volsize) {
! end = DMU_OBJECT_END;
! length = end - start;
! }
! rl = zfs_range_lock(&zv->zv_znode, start, length,
RL_WRITER);
tx = dmu_tx_create(zv->zv_objset);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error != 0) {
dmu_tx_abort(tx);
} else {
! zvol_log_truncate(zv, tx, start, length,
! B_TRUE);
dmu_tx_commit(tx);
! error = dmu_free_long_range(zv->zv_objset,
! ZVOL_OBJ, start, length);
}
zfs_range_unlock(rl);
+ if (error != 0)
+ break;
+ }
+
/*
* If the write-cache is disabled, 'sync' property
* is set to 'always', or if the caller is asking for
* a synchronous free, commit this operation to the zil.
* This will sync any previous uncommitted writes to the
*** 1827,1840 ****
* Can be overridden by the zvol_unmap_sync_enabled tunable.
*/
if ((error == 0) && zvol_unmap_sync_enabled &&
(!(zv->zv_flags & ZVOL_WCE) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
! (df.df_flags & DF_WAIT_SYNC))) {
zil_commit(zv->zv_zilog, ZVOL_OBJ);
}
return (error);
}
default:
error = SET_ERROR(ENOTTY);
--- 1891,1907 ----
* Can be overridden by the zvol_unmap_sync_enabled tunable.
*/
if ((error == 0) && zvol_unmap_sync_enabled &&
(!(zv->zv_flags & ZVOL_WCE) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
! (dfl->dfl_flags & DF_WAIT_SYNC))) {
zil_commit(zv->zv_zilog, ZVOL_OBJ);
}
+ if (!(flag & FKIOCTL))
+ dfl_free(dfl);
+
return (error);
}
default:
error = SET_ERROR(ENOTTY);