Print this page
Revert "8958 Update Intel ucode to 20180108 release"
This reverts commit 1adc3ffcd976ec0a34010cc7db08037a14c3ea4c.
NEX-15280 New default metadata block size is too large
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-15280 New default metadata block size is too large
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-9752 backport illumos 6950 ARC should cache compressed data
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
6950 ARC should cache compressed data
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-5366 Race between unique_insert() and unique_remove() causes ZFS fsid change
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Dan Vatca <dan.vatca@gmail.com>
NEX-5058 WBC: Race between the purging of window and opening new one
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
NEX-2830 ZFS smart compression
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
5987 zfs prefetch code needs work
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Approved by: Gordon Ross <gordon.ross@nexenta.com>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
5911 ZFS "hangs" while deleting file
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Simon Klinkert <simon.klinkert@gmail.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-1823 Slow performance doing of a large dataset
5911 ZFS "hangs" while deleting file
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Bayard Bell <bayard.bell@nexenta.com>
NEX-3266 5630 stale bonus buffer in recycled dnode_t leads to data corruption
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Will Andrews <will@freebsd.org>
Approved by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
SUP-507 Delete or truncate of large files delayed on datasets with small recordsize
Reviewed by: Albert Lee <trisk@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Ilya Usvyatsky <ilya.usvyatsky@nexenta.com>
Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
Moved closed ZFS files to open repo, changed Makefiles accordingly
Removed unneeded weak symbols
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties

*** 18,27 **** --- 18,28 ---- * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 RackTop Systems. */
*** 38,47 **** --- 39,50 ---- #include <sys/spa.h> #include <sys/zio.h> #include <sys/dmu_zfetch.h> #include <sys/range_tree.h> + static void smartcomp_check_comp(dnode_smartcomp_t *sc); + static kmem_cache_t *dnode_cache; /* * Define DNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */
*** 56,66 **** #endif /* DNODE_STATS */ static dnode_phys_t dnode_phys_zero; int zfs_default_bs = SPA_MINBLOCKSHIFT; ! int zfs_default_ibs = DN_MAX_INDBLKSHIFT; #ifdef _KERNEL static kmem_cbrc_t dnode_move(void *, void *, size_t, void *); #endif /* _KERNEL */ --- 59,69 ---- #endif /* DNODE_STATS */ static dnode_phys_t dnode_phys_zero; int zfs_default_bs = SPA_MINBLOCKSHIFT; ! int zfs_default_ibs = DN_DFL_INDBLKSHIFT; #ifdef _KERNEL static kmem_cbrc_t dnode_move(void *, void *, size_t, void *); #endif /* _KERNEL */
*** 156,165 **** --- 159,172 ---- dn->dn_dbufs_count = 0; avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); dn->dn_moved = 0; + + bzero(&dn->dn_smartcomp, sizeof (dn->dn_smartcomp)); + mutex_init(&dn->dn_smartcomp.sc_lock, NULL, MUTEX_DEFAULT, NULL); + return (0); } /* ARGSUSED */ static void
*** 166,175 **** --- 173,184 ---- dnode_dest(void *arg, void *unused) { int i; dnode_t *dn = arg; + mutex_destroy(&dn->dn_smartcomp.sc_lock); + rw_destroy(&dn->dn_struct_rwlock); mutex_destroy(&dn->dn_mtx); mutex_destroy(&dn->dn_dbufs_mtx); cv_destroy(&dn->dn_notxholds); refcount_destroy(&dn->dn_holds);
*** 636,646 **** (bonustype == DMU_OT_SA && bonuslen == 0)); ASSERT(DMU_OT_IS_VALID(bonustype)); ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); /* clean up any unreferenced dbufs */ ! dnode_evict_dbufs(dn); dn->dn_id_flags = 0; rw_enter(&dn->dn_struct_rwlock, RW_WRITER); dnode_setdirty(dn, tx); --- 645,655 ---- (bonustype == DMU_OT_SA && bonuslen == 0)); ASSERT(DMU_OT_IS_VALID(bonustype)); ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN); /* clean up any unreferenced dbufs */ ! dnode_evict_dbufs(dn, DBUF_EVICT_ALL); dn->dn_id_flags = 0; rw_enter(&dn->dn_struct_rwlock, RW_WRITER); dnode_setdirty(dn, tx);
*** 1265,1274 **** --- 1274,1289 ---- } void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { + dnode_setdirty_sc(dn, tx, B_TRUE); + } + + void + dnode_setdirty_sc(dnode_t *dn, dmu_tx_t *tx, boolean_t usesc) + { objset_t *os = dn->dn_objset; uint64_t txg = tx->tx_txg; if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { dsl_dataset_dirty(os->os_dsl_dataset, tx);
*** 1323,1334 **** * dnode will hang around after we finish processing its * children. */ VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); ! (void) dbuf_dirty(dn->dn_dbuf, tx); ! dsl_dataset_dirty(os->os_dsl_dataset, tx); } void dnode_free(dnode_t *dn, dmu_tx_t *tx) --- 1338,1348 ---- * dnode will hang around after we finish processing its * children. */ VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg)); ! (void) dbuf_dirty_sc(dn->dn_dbuf, tx, usesc); dsl_dataset_dirty(os->os_dsl_dataset, tx); } void dnode_free(dnode_t *dn, dmu_tx_t *tx)
*** 1412,1422 **** return (SET_ERROR(ENOTSUP)); } /* read-holding callers must not rely on the lock being continuously held */ void ! dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) { uint64_t txgoff = tx->tx_txg & TXG_MASK; int epbs, new_nlevels; uint64_t sz; --- 1426,1437 ---- return (SET_ERROR(ENOTSUP)); } /* read-holding callers must not rely on the lock being continuously held */ void ! dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, ! boolean_t usesc, boolean_t have_read) { uint64_t txgoff = tx->tx_txg & TXG_MASK; int epbs, new_nlevels; uint64_t sz;
*** 1466,1476 **** dn->dn_next_nlevels[txgoff] = new_nlevels; /* dirty the left indirects */ db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); ASSERT(db != NULL); ! new = dbuf_dirty(db, tx); dbuf_rele(db, FTAG); /* transfer the dirty records to the new indirect */ mutex_enter(&dn->dn_mtx); mutex_enter(&new->dt.di.dr_mtx); --- 1481,1491 ---- dn->dn_next_nlevels[txgoff] = new_nlevels; /* dirty the left indirects */ db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); ASSERT(db != NULL); ! new = dbuf_dirty_sc(db, tx, usesc); dbuf_rele(db, FTAG); /* transfer the dirty records to the new indirect */ mutex_enter(&dn->dn_mtx); mutex_enter(&new->dt.di.dr_mtx);
*** 1695,1705 **** * We will finish up this free operation in the syncing phase. */ mutex_enter(&dn->dn_mtx); int txgoff = tx->tx_txg & TXG_MASK; if (dn->dn_free_ranges[txgoff] == NULL) { ! dn->dn_free_ranges[txgoff] = range_tree_create(NULL, NULL); } range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks); range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks); dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", blkid, nblks, tx->tx_txg); --- 1710,1721 ---- * We will finish up this free operation in the syncing phase. */ mutex_enter(&dn->dn_mtx); int txgoff = tx->tx_txg & TXG_MASK; if (dn->dn_free_ranges[txgoff] == NULL) { ! dn->dn_free_ranges[txgoff] = ! range_tree_create(NULL, NULL, &dn->dn_mtx); } range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks); range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks); dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n", blkid, nblks, tx->tx_txg);
*** 1994,1999 **** --- 2010,2172 ---- out: if (!(flags & DNODE_FIND_HAVELOCK)) rw_exit(&dn->dn_struct_rwlock); return (error); + } + + /* + * When in the compressing phase, we check our results every 1 MiB. If + * compression ratio drops below the threshold factor, we give up trying + * to compress the file for a while. The length of the interval is + * calculated from this interval value according to the algorithm in + * smartcomp_check_comp. + */ + uint64_t zfs_smartcomp_interval = 1 * 1024 * 1024; + + /* + * Minimum compression factor is 12.5% (100% / factor) - below that we + * consider compression to have failed. + */ + uint64_t zfs_smartcomp_threshold_factor = 8; + + /* + * Maximum power-of-2 exponent on the deny interval and consequently + * the maximum number of compression successes and failures we track. + * Successive compression failures extend the deny interval, whereas + * repeated successes makes the algorithm more hesitant to start denying. + */ + int64_t zfs_smartcomp_interval_exp = 5; + + /* + * Callback invoked by the zio machinery when it wants to compress a data + * block. If we are in the denying compression phase, we add the amount of + * data written to our stats and check if we've denied enough data to + * transition back in to the compression phase again. + */ + boolean_t + dnode_smartcomp_ask_cb(void *userinfo, const zio_t *zio) + { + dnode_t *dn = userinfo; + dnode_smartcomp_t *sc; + dnode_smartcomp_state_t old_state; + + ASSERT(dn != NULL); + + sc = &dn->dn_smartcomp; + mutex_enter(&sc->sc_lock); + old_state = sc->sc_state; + if (sc->sc_state == DNODE_SMARTCOMP_DENYING) { + sc->sc_orig_size += zio->io_orig_size; + if (sc->sc_orig_size >= sc->sc_deny_interval) { + /* time to retry compression on next call */ + sc->sc_state = DNODE_SMARTCOMP_COMPRESSING; + sc->sc_size = 0; + sc->sc_orig_size = 0; + } + } + mutex_exit(&sc->sc_lock); + + return (old_state != DNODE_SMARTCOMP_DENYING); + } + + /* + * Callback invoked after compression has been performed to allow us to + * monitor compression performance. If we're in a compressing phase, we + * add the uncompressed and compressed data volumes to our state counters + * and see if we need to recheck compression performance in + * smartcomp_check_comp. + */ + void + dnode_smartcomp_result_cb(void *userinfo, const zio_t *zio) + { + dnode_t *dn = userinfo; + dnode_smartcomp_t *sc; + uint64_t io_size = zio->io_size, io_orig_size = zio->io_orig_size; + + ASSERT(dn != NULL); + sc = &dn->dn_smartcomp; + + if (io_orig_size == 0) + /* XXX: is this valid anyway? */ + return; + + mutex_enter(&sc->sc_lock); + if (sc->sc_state == DNODE_SMARTCOMP_COMPRESSING) { + /* add last block's compression performance to our stats */ + sc->sc_size += io_size; + sc->sc_orig_size += io_orig_size; + /* time to recheck compression performance? */ + if (sc->sc_orig_size >= zfs_smartcomp_interval) + smartcomp_check_comp(sc); + } + mutex_exit(&sc->sc_lock); + } + + /* + * This function checks whether the compression we've been getting is above + * the threshold value. If it is, we decrement the sc_comp_failures counter + * to indicate compression success. If it isn't we increment the same + * counter and potentially start a compression deny phase. + */ + static void + smartcomp_check_comp(dnode_smartcomp_t *sc) + { + uint64_t threshold = sc->sc_orig_size - + sc->sc_orig_size / zfs_smartcomp_threshold_factor; + + ASSERT(MUTEX_HELD(&sc->sc_lock)); + if (sc->sc_size > threshold) { + sc->sc_comp_failures = + MIN(sc->sc_comp_failures + 1, zfs_smartcomp_interval_exp); + if (sc->sc_comp_failures > 0) { + /* consistently getting too little compression, stop */ + sc->sc_state = DNODE_SMARTCOMP_DENYING; + sc->sc_deny_interval = + zfs_smartcomp_interval << sc->sc_comp_failures; + /* randomize the interval by +-10% to avoid patterns */ + sc->sc_deny_interval = (sc->sc_deny_interval - + (sc->sc_deny_interval / 10)) + + spa_get_random(sc->sc_deny_interval / 5 + 1); + } + } else { + if (sc->sc_comp_failures > 0) { + /* + * We're biased for compression, so any success makes + * us forget the file's past incompressibility. + */ + sc->sc_comp_failures = 0; + } else { + sc->sc_comp_failures = MAX(sc->sc_comp_failures - 1, + -zfs_smartcomp_interval_exp); + } + } + /* reset state counters */ + sc->sc_size = 0; + sc->sc_orig_size = 0; + } + + /* + * Prepares a zio_smartcomp_info_t structure for passing to zio_write or + * arc_write depending on whether smart compression should be applied to + * the specified objset, dnode and buffer. + */ + extern void + dnode_setup_zio_smartcomp(dmu_buf_impl_t *db, zio_smartcomp_info_t *sc) + { + dnode_t *dn = DB_DNODE(db); + objset_t *os = dn->dn_objset; + + /* Only do smart compression on user data of plain files. */ + if (dn->dn_type == DMU_OT_PLAIN_FILE_CONTENTS && db->db_level == 0 && + os->os_smartcomp_enabled && os->os_compress != ZIO_COMPRESS_OFF) { + sc->sc_ask = dnode_smartcomp_ask_cb; + sc->sc_result = dnode_smartcomp_result_cb; + sc->sc_userinfo = dn; + } else { + /* + * Zeroing out the structure passed to zio_write will turn + * smart compression off. + */ + bzero(sc, sizeof (*sc)); + } }