Print this page
Revert "8958 Update Intel ucode to 20180108 release"
This reverts commit 1adc3ffcd976ec0a34010cc7db08037a14c3ea4c.
NEX-15280 New default metadata block size is too large
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-15280 New default metadata block size is too large
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-9752 backport illumos 6950 ARC should cache compressed data
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
6950 ARC should cache compressed data
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-5366 Race between unique_insert() and unique_remove() causes ZFS fsid change
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Dan Vatca <dan.vatca@gmail.com>
NEX-5058 WBC: Race between the purging of window and opening new one
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
NEX-2830 ZFS smart compression
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
5987 zfs prefetch code needs work
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Approved by: Gordon Ross <gordon.ross@nexenta.com>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
5911 ZFS "hangs" while deleting file
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Simon Klinkert <simon.klinkert@gmail.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-1823 Slow performance doing of a large dataset
5911 ZFS "hangs" while deleting file
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Bayard Bell <bayard.bell@nexenta.com>
NEX-3266 5630 stale bonus buffer in recycled dnode_t leads to data corruption
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Will Andrews <will@freebsd.org>
Approved by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
SUP-507 Delete or truncate of large files delayed on datasets with small recordsize
Reviewed by: Albert Lee <trisk@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Ilya Usvyatsky <ilya.usvyatsky@nexenta.com>
Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
Moved closed ZFS files to open repo, changed Makefiles accordingly
Removed unneeded weak symbols
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties
@@ -18,10 +18,11 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 RackTop Systems.
*/
@@ -38,10 +39,12 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu_zfetch.h>
#include <sys/range_tree.h>
+static void smartcomp_check_comp(dnode_smartcomp_t *sc);
+
static kmem_cache_t *dnode_cache;
/*
* Define DNODE_STATS to turn on statistic gathering. By default, it is only
* turned on when DEBUG is also defined.
*/
@@ -56,11 +59,11 @@
#endif /* DNODE_STATS */
static dnode_phys_t dnode_phys_zero;
int zfs_default_bs = SPA_MINBLOCKSHIFT;
-int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
+int zfs_default_ibs = DN_DFL_INDBLKSHIFT;
#ifdef _KERNEL
static kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
#endif /* _KERNEL */
@@ -156,10 +159,14 @@
dn->dn_dbufs_count = 0;
avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link));
dn->dn_moved = 0;
+
+ bzero(&dn->dn_smartcomp, sizeof (dn->dn_smartcomp));
+ mutex_init(&dn->dn_smartcomp.sc_lock, NULL, MUTEX_DEFAULT, NULL);
+
return (0);
}
/* ARGSUSED */
static void
@@ -166,10 +173,12 @@
dnode_dest(void *arg, void *unused)
{
int i;
dnode_t *dn = arg;
+ mutex_destroy(&dn->dn_smartcomp.sc_lock);
+
rw_destroy(&dn->dn_struct_rwlock);
mutex_destroy(&dn->dn_mtx);
mutex_destroy(&dn->dn_dbufs_mtx);
cv_destroy(&dn->dn_notxholds);
refcount_destroy(&dn->dn_holds);
@@ -636,11 +645,11 @@
(bonustype == DMU_OT_SA && bonuslen == 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
/* clean up any unreferenced dbufs */
- dnode_evict_dbufs(dn);
+ dnode_evict_dbufs(dn, DBUF_EVICT_ALL);
dn->dn_id_flags = 0;
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dnode_setdirty(dn, tx);
@@ -1265,10 +1274,16 @@
}
void
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
{
+ dnode_setdirty_sc(dn, tx, B_TRUE);
+}
+
+void
+dnode_setdirty_sc(dnode_t *dn, dmu_tx_t *tx, boolean_t usesc)
+{
objset_t *os = dn->dn_objset;
uint64_t txg = tx->tx_txg;
if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
dsl_dataset_dirty(os->os_dsl_dataset, tx);
@@ -1323,12 +1338,11 @@
* dnode will hang around after we finish processing its
* children.
*/
VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg));
- (void) dbuf_dirty(dn->dn_dbuf, tx);
-
+ (void) dbuf_dirty_sc(dn->dn_dbuf, tx, usesc);
dsl_dataset_dirty(os->os_dsl_dataset, tx);
}
void
dnode_free(dnode_t *dn, dmu_tx_t *tx)
@@ -1412,11 +1426,12 @@
return (SET_ERROR(ENOTSUP));
}
/* read-holding callers must not rely on the lock being continuously held */
void
-dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
+dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx,
+ boolean_t usesc, boolean_t have_read)
{
uint64_t txgoff = tx->tx_txg & TXG_MASK;
int epbs, new_nlevels;
uint64_t sz;
@@ -1466,11 +1481,11 @@
dn->dn_next_nlevels[txgoff] = new_nlevels;
/* dirty the left indirects */
db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
ASSERT(db != NULL);
- new = dbuf_dirty(db, tx);
+ new = dbuf_dirty_sc(db, tx, usesc);
dbuf_rele(db, FTAG);
/* transfer the dirty records to the new indirect */
mutex_enter(&dn->dn_mtx);
mutex_enter(&new->dt.di.dr_mtx);
@@ -1695,11 +1710,12 @@
* We will finish up this free operation in the syncing phase.
*/
mutex_enter(&dn->dn_mtx);
int txgoff = tx->tx_txg & TXG_MASK;
if (dn->dn_free_ranges[txgoff] == NULL) {
- dn->dn_free_ranges[txgoff] = range_tree_create(NULL, NULL);
+ dn->dn_free_ranges[txgoff] =
+ range_tree_create(NULL, NULL, &dn->dn_mtx);
}
range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
blkid, nblks, tx->tx_txg);
@@ -1994,6 +2010,163 @@
out:
if (!(flags & DNODE_FIND_HAVELOCK))
rw_exit(&dn->dn_struct_rwlock);
return (error);
+}
+
+/*
+ * When in the compressing phase, we check our results every 1 MiB. If
+ * compression ratio drops below the threshold factor, we give up trying
+ * to compress the file for a while. The length of the interval is
+ * calculated from this interval value according to the algorithm in
+ * smartcomp_check_comp.
+ */
+uint64_t zfs_smartcomp_interval = 1 * 1024 * 1024;
+
+/*
+ * Minimum compression factor is 12.5% (100% / factor) - below that we
+ * consider compression to have failed.
+ */
+uint64_t zfs_smartcomp_threshold_factor = 8;
+
+/*
+ * Maximum power-of-2 exponent on the deny interval and consequently
+ * the maximum number of compression successes and failures we track.
+ * Successive compression failures extend the deny interval, whereas
+ * repeated successes makes the algorithm more hesitant to start denying.
+ */
+int64_t zfs_smartcomp_interval_exp = 5;
+
+/*
+ * Callback invoked by the zio machinery when it wants to compress a data
+ * block. If we are in the denying compression phase, we add the amount of
+ * data written to our stats and check if we've denied enough data to
+ * transition back in to the compression phase again.
+ */
+boolean_t
+dnode_smartcomp_ask_cb(void *userinfo, const zio_t *zio)
+{
+ dnode_t *dn = userinfo;
+ dnode_smartcomp_t *sc;
+ dnode_smartcomp_state_t old_state;
+
+ ASSERT(dn != NULL);
+
+ sc = &dn->dn_smartcomp;
+ mutex_enter(&sc->sc_lock);
+ old_state = sc->sc_state;
+ if (sc->sc_state == DNODE_SMARTCOMP_DENYING) {
+ sc->sc_orig_size += zio->io_orig_size;
+ if (sc->sc_orig_size >= sc->sc_deny_interval) {
+ /* time to retry compression on next call */
+ sc->sc_state = DNODE_SMARTCOMP_COMPRESSING;
+ sc->sc_size = 0;
+ sc->sc_orig_size = 0;
+ }
+ }
+ mutex_exit(&sc->sc_lock);
+
+ return (old_state != DNODE_SMARTCOMP_DENYING);
+}
+
+/*
+ * Callback invoked after compression has been performed to allow us to
+ * monitor compression performance. If we're in a compressing phase, we
+ * add the uncompressed and compressed data volumes to our state counters
+ * and see if we need to recheck compression performance in
+ * smartcomp_check_comp.
+ */
+void
+dnode_smartcomp_result_cb(void *userinfo, const zio_t *zio)
+{
+ dnode_t *dn = userinfo;
+ dnode_smartcomp_t *sc;
+ uint64_t io_size = zio->io_size, io_orig_size = zio->io_orig_size;
+
+ ASSERT(dn != NULL);
+ sc = &dn->dn_smartcomp;
+
+ if (io_orig_size == 0)
+ /* XXX: is this valid anyway? */
+ return;
+
+ mutex_enter(&sc->sc_lock);
+ if (sc->sc_state == DNODE_SMARTCOMP_COMPRESSING) {
+ /* add last block's compression performance to our stats */
+ sc->sc_size += io_size;
+ sc->sc_orig_size += io_orig_size;
+ /* time to recheck compression performance? */
+ if (sc->sc_orig_size >= zfs_smartcomp_interval)
+ smartcomp_check_comp(sc);
+ }
+ mutex_exit(&sc->sc_lock);
+}
+
+/*
+ * This function checks whether the compression we've been getting is above
+ * the threshold value. If it is, we decrement the sc_comp_failures counter
+ * to indicate compression success. If it isn't we increment the same
+ * counter and potentially start a compression deny phase.
+ */
+static void
+smartcomp_check_comp(dnode_smartcomp_t *sc)
+{
+ uint64_t threshold = sc->sc_orig_size -
+ sc->sc_orig_size / zfs_smartcomp_threshold_factor;
+
+ ASSERT(MUTEX_HELD(&sc->sc_lock));
+ if (sc->sc_size > threshold) {
+ sc->sc_comp_failures =
+ MIN(sc->sc_comp_failures + 1, zfs_smartcomp_interval_exp);
+ if (sc->sc_comp_failures > 0) {
+ /* consistently getting too little compression, stop */
+ sc->sc_state = DNODE_SMARTCOMP_DENYING;
+ sc->sc_deny_interval =
+ zfs_smartcomp_interval << sc->sc_comp_failures;
+ /* randomize the interval by +-10% to avoid patterns */
+ sc->sc_deny_interval = (sc->sc_deny_interval -
+ (sc->sc_deny_interval / 10)) +
+ spa_get_random(sc->sc_deny_interval / 5 + 1);
+ }
+ } else {
+ if (sc->sc_comp_failures > 0) {
+ /*
+ * We're biased for compression, so any success makes
+ * us forget the file's past incompressibility.
+ */
+ sc->sc_comp_failures = 0;
+ } else {
+ sc->sc_comp_failures = MAX(sc->sc_comp_failures - 1,
+ -zfs_smartcomp_interval_exp);
+ }
+ }
+ /* reset state counters */
+ sc->sc_size = 0;
+ sc->sc_orig_size = 0;
+}
+
+/*
+ * Prepares a zio_smartcomp_info_t structure for passing to zio_write or
+ * arc_write depending on whether smart compression should be applied to
+ * the specified objset, dnode and buffer.
+ */
+extern void
+dnode_setup_zio_smartcomp(dmu_buf_impl_t *db, zio_smartcomp_info_t *sc)
+{
+ dnode_t *dn = DB_DNODE(db);
+ objset_t *os = dn->dn_objset;
+
+ /* Only do smart compression on user data of plain files. */
+ if (dn->dn_type == DMU_OT_PLAIN_FILE_CONTENTS && db->db_level == 0 &&
+ os->os_smartcomp_enabled && os->os_compress != ZIO_COMPRESS_OFF) {
+ sc->sc_ask = dnode_smartcomp_ask_cb;
+ sc->sc_result = dnode_smartcomp_result_cb;
+ sc->sc_userinfo = dn;
+ } else {
+ /*
+ * Zeroing out the structure passed to zio_write will turn
+ * smart compression off.
+ */
+ bzero(sc, sizeof (*sc));
+ }
}