Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
NEX-5367 special vdev: sync-write options (NEW)
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
6250 zvol_dump_init() can hold txg open
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Albert Lee <trisk@omniti.com>
Reviewed by: Xin Li <delphij@freebsd.org>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-1142 move rwlock to vdev to protect vdev_tsd
not just ldi handle.
This way we serialize open/close, yet allow parallel I/O.
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-1065 Added serialization to avoid race
between ldi notification and I/O path.
Also fixes OS-124, NEX-1051, NEX-1062.

@@ -24,10 +24,11 @@
  * Portions Copyright 2010 Robert Milkowski
  *
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 /*
  * ZFS volume emulation driver.

@@ -86,10 +87,11 @@
 #include <sys/zil_impl.h>
 #include <sys/dbuf.h>
 #include <sys/dmu_tx.h>
 #include <sys/zfeature.h>
 #include <sys/zio_checksum.h>
+#include <sys/dkioc_free_util.h>
 #include <sys/zil_impl.h>
 
 #include "zfs_namecheck.h"
 
 void *zfsdev_state;

@@ -970,21 +972,19 @@
 
         mutex_exit(&zfsdev_state_lock);
         return (error);
 }
 
+/* ARGSUSED */
 static void
 zvol_get_done(zgd_t *zgd, int error)
 {
         if (zgd->zgd_db)
                 dmu_buf_rele(zgd->zgd_db, zgd);
 
         zfs_range_unlock(zgd->zgd_rl);
 
-        if (error == 0 && zgd->zgd_bp)
-                zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
         kmem_free(zgd, sizeof (zgd_t));
 }
 
 /*
  * Get data to generate a TX_WRITE intent log record.

@@ -1067,20 +1067,49 @@
 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
     boolean_t sync)
 {
         uint32_t blocksize = zv->zv_volblocksize;
         zilog_t *zilog = zv->zv_zilog;
+        spa_t *spa = zilog->zl_spa;
+        spa_meta_placement_t *mp = &spa->spa_meta_policy;
+        boolean_t slogging, zil_to_special, write_to_special;
+        ssize_t immediate_write_sz;
         itx_wr_state_t write_state;
 
         if (zil_replaying(zilog, tx))
                 return;
 
-        if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+        /*
+         * See comments in zfs_log_write()
+         */
+
+        immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+            ? 0 : zvol_immediate_write_sz;
+
+        zil_to_special = !spa_has_slogs(spa) &&
+            spa_can_special_be_used(spa) &&
+            mp->spa_sync_to_special != SYNC_TO_SPECIAL_DISABLED;
+
+        write_to_special = !spa_has_slogs(spa) &&
+            spa_write_data_to_special(spa, zilog->zl_os) &&
+            (mp->spa_sync_to_special == SYNC_TO_SPECIAL_ALWAYS ||
+            (mp->spa_sync_to_special == SYNC_TO_SPECIAL_BALANCED &&
+            spa->spa_avg_stat_rotor % 100 < spa->spa_special_to_normal_ratio));
+
+        slogging = (spa_has_slogs(spa) || zil_to_special) &&
+            (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+
+        if (blocksize > immediate_write_sz && !slogging &&
+            resid >= blocksize && off % blocksize == 0)
                 write_state = WR_INDIRECT;
+        else if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+                write_state = WR_INDIRECT;
         else if (!spa_has_slogs(zilog->zl_spa) &&
             resid >= blocksize && blocksize > zvol_immediate_write_sz)
                 write_state = WR_INDIRECT;
+        else if (write_to_special)
+                 write_state = WR_INDIRECT;
         else if (sync)
                 write_state = WR_COPIED;
         else
                 write_state = WR_NEED_COPY;
 

@@ -1126,11 +1155,11 @@
 static int
 zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset,
     uint64_t size, boolean_t doread, boolean_t isdump)
 {
         vdev_disk_t *dvd;
-        int c;
+        int c, rc;
         int numerrors = 0;
 
         if (vd->vdev_ops == &vdev_mirror_ops ||
             vd->vdev_ops == &vdev_replacing_ops ||
             vd->vdev_ops == &vdev_spare_ops) {

@@ -1158,24 +1187,42 @@
                     addr, size, offset, origoffset, doread, isdump));
         }
 
         offset += VDEV_LABEL_START_SIZE;
 
+        rw_enter(&vd->vdev_tsd_lock, RW_READER);
+        dvd = vd->vdev_tsd;
         if (ddi_in_panic() || isdump) {
                 ASSERT(!doread);
-                if (doread)
+                if (doread) {
+                        rw_exit(&vd->vdev_tsd_lock);
                         return (SET_ERROR(EIO));
-                dvd = vd->vdev_tsd;
+                }
+                /* We assume here dvd is not NULL */
                 ASSERT3P(dvd, !=, NULL);
-                return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
-                    lbtodb(size)));
+
+                /* If our assumption is wrong, we do not want to crash */
+                if (dvd != NULL && dvd->vd_lh != NULL) {
+                        rc = ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
+                            lbtodb(size));
         } else {
-                dvd = vd->vdev_tsd;
+                        rc = SET_ERROR(ENXIO);
+                }
+        } else {
+                /* We assume here dvd is not NULL */
                 ASSERT3P(dvd, !=, NULL);
-                return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
-                    offset, doread ? B_READ : B_WRITE));
+
+                /* If our assumption is wrong, we do not want to crash */
+                if (dvd != NULL && dvd->vd_lh != NULL) {
+                        rc = vdev_disk_ldi_physio(dvd->vd_lh, addr, size,
+                            offset, doread ? B_READ : B_WRITE);
+                } else {
+                        rc = SET_ERROR(ENXIO);
         }
+        }
+        rw_exit(&vd->vdev_tsd_lock);
+        return (rc);
 }
 
 static int
 zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
     boolean_t doread, boolean_t isdump)

@@ -1778,48 +1825,65 @@
                 zfs_range_unlock(rl);
                 break;
 
         case DKIOCFREE:
         {
-                dkioc_free_t df;
+                dkioc_free_list_t *dfl;
                 dmu_tx_t *tx;
 
+                mutex_exit(&zfsdev_state_lock);
+
                 if (!zvol_unmap_enabled)
                         break;
 
-                if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
+                if (!(flag & FKIOCTL)) {
+                        dfl = dfl_copyin((void *)arg, flag, KM_SLEEP);
+                        if (dfl == NULL) {
                         error = SET_ERROR(EFAULT);
                         break;
                 }
+                } else {
+                        dfl = (dkioc_free_list_t *)arg;
+                }
 
+                for (int i = 0; i < dfl->dfl_num_exts; i++) {
+                        uint64_t start = dfl->dfl_exts[i].dfle_start,
+                            length = dfl->dfl_exts[i].dfle_length,
+                            end = start + length;
+
                 /*
-                 * Apply Postel's Law to length-checking.  If they overshoot,
-                 * just blank out until the end, if there's a need to blank
-                 * out anything.
+                         * Apply Postel's Law to length-checking.  If they
+                         * overshoot, just blank out until the end, if there's
+                         * a need to blank out anything.
                  */
-                if (df.df_start >= zv->zv_volsize)
-                        break;  /* No need to do anything... */
+                        if (start >= zv->zv_volsize)
+                                continue;       /* No need to do anything... */
+                        if (end > zv->zv_volsize) {
+                                end = DMU_OBJECT_END;
+                                length = end - start;
+                        }
 
-                mutex_exit(&zfsdev_state_lock);
-
-                rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
+                        rl = zfs_range_lock(&zv->zv_znode, start, length,
                     RL_WRITER);
                 tx = dmu_tx_create(zv->zv_objset);
-                dmu_tx_mark_netfree(tx);
                 error = dmu_tx_assign(tx, TXG_WAIT);
                 if (error != 0) {
                         dmu_tx_abort(tx);
                 } else {
-                        zvol_log_truncate(zv, tx, df.df_start,
-                            df.df_length, B_TRUE);
+                                zvol_log_truncate(zv, tx, start, length,
+                                    B_TRUE);
                         dmu_tx_commit(tx);
-                        error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
-                            df.df_start, df.df_length);
+                                error = dmu_free_long_range(zv->zv_objset,
+                                    ZVOL_OBJ, start, length);
                 }
 
                 zfs_range_unlock(rl);
 
+                        if (error != 0)
+                                break;
+                }
+
                 /*
                  * If the write-cache is disabled, 'sync' property
                  * is set to 'always', or if the caller is asking for
                  * a synchronous free, commit this operation to the zil.
                  * This will sync any previous uncommitted writes to the

@@ -1827,14 +1891,17 @@
                  * Can be overridden by the zvol_unmap_sync_enabled tunable.
                  */
                 if ((error == 0) && zvol_unmap_sync_enabled &&
                     (!(zv->zv_flags & ZVOL_WCE) ||
                     (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
-                    (df.df_flags & DF_WAIT_SYNC))) {
+                    (dfl->dfl_flags & DF_WAIT_SYNC))) {
                         zil_commit(zv->zv_zilog, ZVOL_OBJ);
                 }
 
+                if (!(flag & FKIOCTL))
+                        dfl_free(dfl);
+
                 return (error);
         }
 
         default:
                 error = SET_ERROR(ENOTTY);