Print this page
NEX-19394 backport 9337 zfs get all is slow due to uncached metadata
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
 Conflicts:
  usr/src/uts/common/fs/zfs/dbuf.c
  usr/src/uts/common/fs/zfs/dmu.c
  usr/src/uts/common/fs/zfs/sys/dmu_objset.h
NEX-9752 backport illumos 6950 ARC should cache compressed data
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
6950 ARC should cache compressed data
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Don Brady <don.brady@intel.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
6267 dn_bonus evicted too early
Reviewed by: Richard Yao <ryao@gentoo.org>
Reviewed by: Xin LI <delphij@freebsd.org>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-4582 update wrc test cases for allow to use write back cache per tree of datasets
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
5911 ZFS "hangs" while deleting file
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Simon Klinkert <simon.klinkert@gmail.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
NEX-1823 Slow performance doing of a large dataset
5911 ZFS "hangs" while deleting file
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Bayard Bell <bayard.bell@nexenta.com>
NEX-3165 segregate ddt in arc
SUP-507 Delete or truncate of large files delayed on datasets with small recordsize
Reviewed by: Albert Lee <trisk@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Ilya Usvyatsky <ilya.usvyatsky@nexenta.com>
Reviewed by: Tony Nguyen <tony.nguyen@nexenta.com>
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
OS-80 support for vdev and CoS properties for the new I/O scheduler
OS-95 lint warning introduced by OS-61
Make special vdev subtree topology the same as regular vdev subtree to simplify testcase setup
Fixup merge issues
Issue #7: add cacheability to the properties
          Contributors: Boris Protopopov
DDT is placed either into special or to L2ARC but not in both
Support for secondarycache=data option
Align mutex tables in arc.c and dbuf.c to 64 bytes (cache line), place each kmutex_t on cache line by itself to avoid false sharing
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties

*** 21,30 **** --- 21,31 ---- /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_DBUF_H #define _SYS_DBUF_H
*** 53,62 **** --- 54,65 ---- #define DB_RF_HAVESTRUCT (1 << 2) #define DB_RF_NOPREFETCH (1 << 3) #define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_CACHED (1 << 5) + #define DBUF_EVICT_ALL -1 + /* * The simplified state transition diagram for dbufs looks like: * * +----> READ ----+ * | |
*** 81,90 **** --- 84,100 ---- DB_READ, DB_CACHED, DB_EVICTING } dbuf_states_t; + typedef enum dbuf_cached_state { + DB_NO_CACHE = -1, + DB_DBUF_CACHE, + DB_DBUF_METADATA_CACHE, + DB_CACHE_MAX + } dbuf_cached_state_t; + struct dnode; struct dmu_tx; /* * level = 0 means the user data
*** 123,132 **** --- 133,145 ---- unsigned int dr_accounted; /* A copy of the bp that points to us */ blkptr_t dr_bp_copy; + /* use special class of dirty entry */ + boolean_t dr_usesc; + union dirty_types { struct dirty_indirect { /* protect access to list */ kmutex_t dr_mtx;
*** 227,241 **** * Our link on the owner dnodes's dn_dbufs list. * Protected by its dn_dbufs_mtx. */ avl_node_t db_link; ! /* ! * Link in dbuf_cache. ! */ multilist_node_t db_cache_link; /* Data which is unique to data (leaf) blocks: */ /* User callback information. */ dmu_buf_user_t *db_user; --- 240,255 ---- * Our link on the owner dnodes's dn_dbufs list. * Protected by its dn_dbufs_mtx. */ avl_node_t db_link; ! /* Link in dbuf_cache or dbuf_metadata_cache */ multilist_node_t db_cache_link; + /* Tells us which dbuf cache this dbuf is in, if any */ + dbuf_cached_state_t db_caching_status; + /* Data which is unique to data (leaf) blocks: */ /* User callback information. */ dmu_buf_user_t *db_user;
*** 261,275 **** uint8_t db_dirtycnt; } dmu_buf_impl_t; /* Note: the dbuf hash table is exposed only for the mdb module */ #define DBUF_MUTEXES 256 ! #define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) typedef struct dbuf_hash_table { uint64_t hash_table_mask; dmu_buf_impl_t **hash_table; ! kmutex_t hash_mutexes[DBUF_MUTEXES]; } dbuf_hash_table_t; uint64_t dbuf_whichblock(struct dnode *di, int64_t level, uint64_t offset); dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data); --- 275,297 ---- uint8_t db_dirtycnt; } dmu_buf_impl_t; /* Note: the dbuf hash table is exposed only for the mdb module */ #define DBUF_MUTEXES 256 ! #define DBUF_LOCK_PAD 64 ! typedef struct { ! kmutex_t mtx; ! #ifdef _KERNEL ! unsigned char pad[(DBUF_LOCK_PAD - sizeof (kmutex_t))]; ! #endif ! } dbuf_mutex_t; ! #define DBUF_HASH_MUTEX(h, idx) \ ! (&((h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)].mtx)) typedef struct dbuf_hash_table { uint64_t hash_table_mask; dmu_buf_impl_t **hash_table; ! dbuf_mutex_t hash_mutexes[DBUF_MUTEXES]; } dbuf_hash_table_t; uint64_t dbuf_whichblock(struct dnode *di, int64_t level, uint64_t offset); dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
*** 304,313 **** --- 326,337 ---- void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); + dbuf_dirty_record_t *dbuf_dirty_sc(dmu_buf_impl_t *db, dmu_tx_t *tx, + boolean_t usesc); arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db); void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, bp_embedded_type_t etype, enum zio_compress comp, int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
*** 316,327 **** void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx); void dbuf_unoverride(dbuf_dirty_record_t *dr); void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx); void dbuf_release_bp(dmu_buf_impl_t *db); - boolean_t dbuf_can_remap(const dmu_buf_impl_t *buf); - void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end, struct dmu_tx *); void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx); --- 340,349 ----
*** 333,355 **** void dbuf_init(void); void dbuf_fini(void); boolean_t dbuf_is_metadata(dmu_buf_impl_t *db); #define DBUF_GET_BUFC_TYPE(_db) \ ! (dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) #define DBUF_IS_CACHEABLE(_db) \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \ (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) #define DBUF_IS_L2CACHEABLE(_db) \ ! ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \ ! (dbuf_is_metadata(_db) && \ ! ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA))) #define DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level) \ ((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || \ (((_level) > 0 || \ DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) && \ --- 355,393 ---- void dbuf_init(void); void dbuf_fini(void); boolean_t dbuf_is_metadata(dmu_buf_impl_t *db); + boolean_t dbuf_is_ddt(dmu_buf_impl_t *db); + boolean_t dbuf_ddt_is_l2cacheable(dmu_buf_impl_t *db); + boolean_t dbuf_meta_is_l2cacheable(dmu_buf_impl_t *db); #define DBUF_GET_BUFC_TYPE(_db) \ ! (dbuf_is_ddt(_db) ? ARC_BUFC_DDT :\ ! (dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)) #define DBUF_IS_CACHEABLE(_db) \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \ (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) + /* + * Checks whether we need to cache dbuf in l2arc. + * Metadata is l2cacheable if it is not placed on special device + * or it is placed on special device in "dual" mode. We need to check + * for ddt in ZFS_CACHE_ALL and ZFS_CACHE_METADATA because it is in MOS. + * ZFS_CACHE_DATA mode actually means to cache both data and cacheable + * metadata. + */ #define DBUF_IS_L2CACHEABLE(_db) \ ! (((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL && \ ! (dbuf_ddt_is_l2cacheable(_db) == B_TRUE)) || \ ! ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA && \ ! (dbuf_is_metadata(_db)) && \ ! (dbuf_ddt_is_l2cacheable(_db) == B_TRUE)) || \ ! ((dbuf_meta_is_l2cacheable(_db) == B_TRUE) && \ ! ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_DATA))) #define DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level) \ ((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || \ (((_level) > 0 || \ DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) && \