Print this page
NEX-19083 backport OS-7314 zil_commit should omit cache thrash
9962 zil_commit should omit cache thrash
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/sys/zil_impl.h
          +++ new/usr/src/uts/common/fs/zfs/sys/zil_impl.h
↓ open down ↓ 40 lines elided ↑ open up ↑
  41   41   *
  42   42   * An lwb will start out in the "closed" state, and then transition to
  43   43   * the "opened" state via a call to zil_lwb_write_open(). When
  44   44   * transitioning from "closed" to "opened" the zilog's "zl_issuer_lock"
  45   45   * must be held.
  46   46   *
  47   47   * After the lwb is "opened", it can transition into the "issued" state
  48   48   * via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
  49   49   * be held when making this transition.
  50   50   *
  51      - * After the lwb's zio completes, and the vdev's are flushed, the lwb
  52      - * will transition into the "done" state via zil_lwb_write_done(). When
  53      - * transitioning from "issued" to "done", the zilog's "zl_lock" must be
  54      - * held, *not* the "zl_issuer_lock".
       51 + * After the lwb's write zio completes, it transitions into the "write
       52 + * done" state via zil_lwb_write_done(); and then into the "flush done"
       53 + * state via zil_lwb_flush_vdevs_done(). When transitioning from
       54 + * "issued" to "write done", and then from "write done" to "flush done",
       55 + * the zilog's "zl_lock" must be held, *not* the "zl_issuer_lock".
  55   56   *
  56   57   * The zilog's "zl_issuer_lock" can become heavily contended in certain
  57   58   * workloads, so we specifically avoid acquiring that lock when
  58   59   * transitioning an lwb from "issued" to "done". This allows us to avoid
  59   60   * having to acquire the "zl_issuer_lock" for each lwb ZIO completion,
  60   61   * which would have added more lock contention on an already heavily
  61   62   * contended lock.
  62   63   *
  63   64   * Additionally, correctness when reading an lwb's state is often
  64   65   * acheived by exploiting the fact that these state transitions occur in
  65   66   * this specific order; i.e. "closed" to "opened" to "issued" to "done".
  66   67   *
  67   68   * Thus, if an lwb is in the "closed" or "opened" state, holding the
  68   69   * "zl_issuer_lock" will prevent a concurrent thread from transitioning
  69   70   * that lwb to the "issued" state. Likewise, if an lwb is already in the
  70   71   * "issued" state, holding the "zl_lock" will prevent a concurrent
  71      - * thread from transitioning that lwb to the "done" state.
       72 + * thread from transitioning that lwb to the "write done" state.
  72   73   */
  73   74  typedef enum {
  74   75      LWB_STATE_CLOSED,
  75   76      LWB_STATE_OPENED,
  76   77      LWB_STATE_ISSUED,
  77      -    LWB_STATE_DONE,
       78 +    LWB_STATE_WRITE_DONE,
       79 +    LWB_STATE_FLUSH_DONE,
  78   80      LWB_NUM_STATES
  79   81  } lwb_state_t;
  80   82  
  81   83  /*
  82   84   * Log write block (lwb)
  83   85   *
  84   86   * Prior to an lwb being issued to disk via zil_lwb_write_issue(), it
  85   87   * will be protected by the zilog's "zl_issuer_lock". Basically, prior
  86   88   * to it being issued, it will only be accessed by the thread that's
  87   89   * holding the "zl_issuer_lock". After the lwb is issued, the zilog's
↓ open down ↓ 153 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX