Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
NEX-5367 special vdev: sync-write options (NEW)
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
6250 zvol_dump_init() can hold txg open
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Albert Lee <trisk@omniti.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
usr/src/uts/common/io/scsi/targets/sd.c
usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-1142 move rwlock to vdev to protect vdev_tsd
not just ldi handle.
This way we serialize open/close, yet allow parallel I/O.
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-1065 Added serialization to avoid race
between ldi notification and I/O path.
Also fixes OS-124, NEX-1051, NEX-1062.
@@ -24,10 +24,11 @@
* Portions Copyright 2010 Robert Milkowski
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
/*
* ZFS volume emulation driver.
@@ -86,10 +87,11 @@
#include <sys/zil_impl.h>
#include <sys/dbuf.h>
#include <sys/dmu_tx.h>
#include <sys/zfeature.h>
#include <sys/zio_checksum.h>
+#include <sys/dkioc_free_util.h>
#include <sys/zil_impl.h>
#include "zfs_namecheck.h"
void *zfsdev_state;
@@ -970,21 +972,19 @@
mutex_exit(&zfsdev_state_lock);
return (error);
}
+/* ARGSUSED */
static void
zvol_get_done(zgd_t *zgd, int error)
{
if (zgd->zgd_db)
dmu_buf_rele(zgd->zgd_db, zgd);
zfs_range_unlock(zgd->zgd_rl);
- if (error == 0 && zgd->zgd_bp)
- zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
kmem_free(zgd, sizeof (zgd_t));
}
/*
* Get data to generate a TX_WRITE intent log record.
@@ -1067,20 +1067,49 @@
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
boolean_t sync)
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
+ spa_t *spa = zilog->zl_spa;
+ spa_meta_placement_t *mp = &spa->spa_meta_policy;
+ boolean_t slogging, zil_to_special, write_to_special;
+ ssize_t immediate_write_sz;
itx_wr_state_t write_state;
if (zil_replaying(zilog, tx))
return;
- if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ /*
+ * See comments in zfs_log_write()
+ */
+
+ immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ ? 0 : zvol_immediate_write_sz;
+
+ zil_to_special = !spa_has_slogs(spa) &&
+ spa_can_special_be_used(spa) &&
+ mp->spa_sync_to_special != SYNC_TO_SPECIAL_DISABLED;
+
+ write_to_special = !spa_has_slogs(spa) &&
+ spa_write_data_to_special(spa, zilog->zl_os) &&
+ (mp->spa_sync_to_special == SYNC_TO_SPECIAL_ALWAYS ||
+ (mp->spa_sync_to_special == SYNC_TO_SPECIAL_BALANCED &&
+ spa->spa_avg_stat_rotor % 100 < spa->spa_special_to_normal_ratio));
+
+ slogging = (spa_has_slogs(spa) || zil_to_special) &&
+ (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+
+ if (blocksize > immediate_write_sz && !slogging &&
+ resid >= blocksize && off % blocksize == 0)
write_state = WR_INDIRECT;
+ else if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
else if (!spa_has_slogs(zilog->zl_spa) &&
resid >= blocksize && blocksize > zvol_immediate_write_sz)
write_state = WR_INDIRECT;
+ else if (write_to_special)
+ write_state = WR_INDIRECT;
else if (sync)
write_state = WR_COPIED;
else
write_state = WR_NEED_COPY;
@@ -1126,11 +1155,11 @@
static int
zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
uint64_t size, boolean_t doread, boolean_t isdump)
{
vdev_disk_t *dvd;
- int c;
+ int c, rc;
int numerrors = 0;
if (vd->vdev_ops == &vdev_mirror_ops ||
vd->vdev_ops == &vdev_replacing_ops ||
vd->vdev_ops == &vdev_spare_ops) {
@@ -1158,24 +1187,42 @@
addr, size, offset, origoffset, doread, isdump));
}
offset += VDEV_LABEL_START_SIZE;
+ rw_enter(&vd->vdev_tsd_lock, RW_READER);
+ dvd = vd->vdev_tsd;
if (ddi_in_panic() || isdump) {
ASSERT(!doread);
- if (doread)
+ if (doread) {
+ rw_exit(&vd->vdev_tsd_lock);
return (SET_ERROR(EIO));
- dvd = vd->vdev_tsd;
+ }
+ /* We assume here dvd is not NULL */
ASSERT3P(dvd, !=, NULL);
- return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
- lbtodb(size)));
+
+ /* If our assumption is wrong, we do not want to crash */
+ if (dvd != NULL && dvd->vd_lh != NULL) {
+ rc = ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
+ lbtodb(size));
} else {
- dvd = vd->vdev_tsd;
+ rc = SET_ERROR(ENXIO);
+ }
+ } else {
+ /* We assume here dvd is not NULL */
ASSERT3P(dvd, !=, NULL);
- return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
- offset, doread ? B_READ : B_WRITE));
+
+ /* If our assumption is wrong, we do not want to crash */
+ if (dvd != NULL && dvd->vd_lh != NULL) {
+ rc = vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
+ offset, doread ? B_READ : B_WRITE);
+ } else {
+ rc = SET_ERROR(ENXIO);
}
+ }
+ rw_exit(&vd->vdev_tsd_lock);
+ return (rc);
}
static int
zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
boolean_t doread, boolean_t isdump)
@@ -1778,48 +1825,65 @@
zfs_range_unlock(rl);
break;
case DKIOCFREE:
{
- dkioc_free_t df;
+ dkioc_free_list_t *dfl;
dmu_tx_t *tx;
+ mutex_exit(&zfsdev_state_lock);
+
if (!zvol_unmap_enabled)
break;
- if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
+ if (!(flag & FKIOCTL)) {
+ dfl = dfl_copyin((void *)arg, flag, KM_SLEEP);
+ if (dfl == NULL) {
error = SET_ERROR(EFAULT);
break;
}
+ } else {
+ dfl = (dkioc_free_list_t *)arg;
+ }
+ for (int i = 0; i < dfl->dfl_num_exts; i++) {
+ uint64_t start = dfl->dfl_exts[i].dfle_start,
+ length = dfl->dfl_exts[i].dfle_length,
+ end = start + length;
+
/*
- * Apply Postel's Law to length-checking. If they overshoot,
- * just blank out until the end, if there's a need to blank
- * out anything.
+ * Apply Postel's Law to length-checking. If they
+ * overshoot, just blank out until the end, if there's
+ * a need to blank out anything.
*/
- if (df.df_start >= zv->zv_volsize)
- break; /* No need to do anything... */
+ if (start >= zv->zv_volsize)
+ continue; /* No need to do anything... */
+ if (end > zv->zv_volsize) {
+ end = DMU_OBJECT_END;
+ length = end - start;
+ }
- mutex_exit(&zfsdev_state_lock);
-
- rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
+ rl = zfs_range_lock(&zv->zv_znode, start, length,
RL_WRITER);
tx = dmu_tx_create(zv->zv_objset);
- dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error != 0) {
dmu_tx_abort(tx);
} else {
- zvol_log_truncate(zv, tx, df.df_start,
- df.df_length, B_TRUE);
+ zvol_log_truncate(zv, tx, start, length,
+ B_TRUE);
dmu_tx_commit(tx);
- error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
- df.df_start, df.df_length);
+ error = dmu_free_long_range(zv->zv_objset,
+ ZVOL_OBJ, start, length);
}
zfs_range_unlock(rl);
+ if (error != 0)
+ break;
+ }
+
/*
* If the write-cache is disabled, 'sync' property
* is set to 'always', or if the caller is asking for
* a synchronous free, commit this operation to the zil.
* This will sync any previous uncommitted writes to the
@@ -1827,14 +1891,17 @@
* Can be overridden by the zvol_unmap_sync_enabled tunable.
*/
if ((error == 0) && zvol_unmap_sync_enabled &&
(!(zv->zv_flags & ZVOL_WCE) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
- (df.df_flags & DF_WAIT_SYNC))) {
+ (dfl->dfl_flags & DF_WAIT_SYNC))) {
zil_commit(zv->zv_zilog, ZVOL_OBJ);
}
+ if (!(flag & FKIOCTL))
+ dfl_free(dfl);
+
return (error);
}
default:
error = SET_ERROR(ENOTTY);