Print this page
NEX-13140 DVA-throttle support for special-class
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-4620 ZFS autotrim triggering is unreliable
NEX-4622 On-demand TRIM code illogically enumerates metaslabs via mg_ms_tree
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
NEX-3984 On-demand TRIM
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Conflicts:
        usr/src/common/zfs/zpool_prop.c
        usr/src/uts/common/sys/fs/zfs.h
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h

@@ -23,10 +23,11 @@
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
  */
 
 #ifndef _SYS_METASLAB_IMPL_H
 #define _SYS_METASLAB_IMPL_H
 

@@ -186,10 +187,13 @@
         uint64_t                mc_alloc;       /* total allocated space */
         uint64_t                mc_deferred;    /* total deferred frees */
         uint64_t                mc_space;       /* total space (alloc + free) */
         uint64_t                mc_dspace;      /* total deflated space */
         uint64_t                mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
+
+        kmutex_t                mc_alloc_lock;
+        avl_tree_t              mc_alloc_tree;
 };
 
 /*
  * Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs)
  * of a top-level vdev. They are linked togther to form a circular linked

@@ -244,10 +248,15 @@
         uint64_t                mg_failed_allocations;
         uint64_t                mg_fragmentation;
         uint64_t                mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
 };
 
+typedef struct {
+        uint64_t        ts_birth;       /* TXG at which this trimset starts */
+        range_tree_t    *ts_tree;       /* tree of extents in the trimset */
+} metaslab_trimset_t;
+
 /*
  * This value defines the number of elements in the ms_lbas array. The value
  * of 64 was chosen as it covers all power of 2 buckets up to UINT64_MAX.
  * This is the equivalent of highbit(UINT64_MAX).
  */

@@ -255,17 +264,18 @@
 
 /*
  * Each metaslab maintains a set of in-core trees to track metaslab
  * operations.  The in-core free tree (ms_tree) contains the list of
  * free segments which are eligible for allocation.  As blocks are
- * allocated, the allocated segment are removed from the ms_tree and
- * added to a per txg allocation tree (ms_alloctree).  As blocks are
- * freed, they are added to the free tree (ms_freeingtree).  These trees
- * allow us to process all allocations and frees in syncing context
- * where it is safe to update the on-disk space maps.  An additional set
- * of in-core trees is maintained to track deferred frees
- * (ms_defertree).  Once a block is freed it will move from the
+ * allocated, the allocated segments are removed from the ms_tree and
+ * added to a per txg allocation tree (ms_alloctree).  This allows us to
+ * process all allocations in syncing context where it is safe to update
+ * the on-disk space maps.  Frees are also processed in syncing context.
+ * Most frees are generated from syncing context, and those that are not
+ * are held in the spa_free_bplist for processing in syncing context.
+ * An additional set of in-core trees is maintained to track deferred
+ * frees (ms_defertree).  Once a block is freed it will move from the
  * ms_freedtree to the ms_defertree.  A deferred free means that a block
  * has been freed but cannot be used by the pool until TXG_DEFER_SIZE
  * transactions groups later.  For example, a block that is freed in txg
  * 50 will not be available for reallocation until txg 52 (50 +
  * TXG_DEFER_SIZE).  This provides a safety net for uberblock rollback.

@@ -307,11 +317,10 @@
  * ensure that allocations are not performed on the metaslab that is
  * being written.
  */
 struct metaslab {
         kmutex_t        ms_lock;
-        kmutex_t        ms_sync_lock;
         kcondvar_t      ms_load_cv;
         space_map_t     *ms_sm;
         uint64_t        ms_id;
         uint64_t        ms_start;
         uint64_t        ms_size;

@@ -318,10 +327,15 @@
         uint64_t        ms_fragmentation;
 
         range_tree_t    *ms_alloctree[TXG_SIZE];
         range_tree_t    *ms_tree;
 
+        metaslab_trimset_t      *ms_cur_ts; /* currently prepared trims */
+        metaslab_trimset_t      *ms_prev_ts;  /* previous (aging) trims */
+        kcondvar_t              ms_trim_cv;
+        metaslab_trimset_t      *ms_trimming_ts;
+
         /*
          * The following range trees are accessed only from syncing context.
          * ms_free*tree only have entries while syncing, and are empty
          * between syncs.
          */