5827 }
5828 mutex_exit(&dev->l2ad_mtx);
5829 }
5830
5831 /*
5832 * Find and write ARC buffers to the L2ARC device.
5833 *
5834 * An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid
5835 * for reading until they have completed writing.
5836 * The headroom_boost is an in-out parameter used to maintain headroom boost
5837 * state between calls to this function.
5838 *
5839 * Returns the number of bytes actually written (which may be smaller than
5840 * the delta by which the device hand has changed due to alignment).
5841 */
5842 static uint64_t
5843 l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
5844 boolean_t *headroom_boost)
5845 {
5846 arc_buf_hdr_t *hdr, *hdr_prev, *head;
5847 uint64_t write_asize, write_psize, write_sz, headroom,
5848 buf_compress_minsz;
5849 void *buf_data;
5850 boolean_t full;
5851 l2arc_write_callback_t *cb;
5852 zio_t *pio, *wzio;
5853 uint64_t guid = spa_load_guid(spa);
5854 const boolean_t do_headroom_boost = *headroom_boost;
5855
5856 ASSERT(dev->l2ad_vdev != NULL);
5857
5858 /* Lower the flag now, we might want to raise it again later. */
5859 *headroom_boost = B_FALSE;
5860
5861 pio = NULL;
5862 write_sz = write_asize = write_psize = 0;
5863 full = B_FALSE;
5864 head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
5865 head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
5866 head->b_flags |= ARC_FLAG_HAS_L2HDR;
5867
5868 /*
5869 * We will want to try to compress buffers that are at least 2x the
5870 * device sector size.
5871 */
5872 buf_compress_minsz = 2 << dev->l2ad_vdev->vdev_ashift;
5873
5874 /*
5875 * Copy buffers for L2ARC writing.
5876 */
5877 for (int try = 0; try <= 3; try++) {
5878 multilist_sublist_t *mls = l2arc_sublist_lock(try);
5879 uint64_t passed_sz = 0;
5880
5881 /*
5882 * L2ARC fast warmup.
5883 *
5884 * Until the ARC is warm and starts to evict, read from the
5885 * head of the ARC lists rather than the tail.
5886 */
5887 if (arc_warm == B_FALSE)
5888 hdr = multilist_sublist_head(mls);
5889 else
5890 hdr = multilist_sublist_tail(mls);
5891
5892 headroom = target_sz * l2arc_headroom;
5893 if (do_headroom_boost)
5894 headroom = (headroom * l2arc_headroom_boost) / 100;
5895
5896 for (; hdr; hdr = hdr_prev) {
5897 kmutex_t *hash_lock;
5898 uint64_t buf_sz;
5899
5900 if (arc_warm == B_FALSE)
5901 hdr_prev = multilist_sublist_next(mls, hdr);
5902 else
5903 hdr_prev = multilist_sublist_prev(mls, hdr);
5904
5905 hash_lock = HDR_LOCK(hdr);
5906 if (!mutex_tryenter(hash_lock)) {
5907 /*
5908 * Skip this buffer rather than waiting.
5909 */
5910 continue;
5911 }
5912
5913 passed_sz += hdr->b_size;
5914 if (passed_sz > headroom) {
5915 /*
5916 * Searched too far.
5917 */
5918 mutex_exit(hash_lock);
5919 break;
5920 }
5921
5922 if (!l2arc_write_eligible(guid, hdr)) {
5923 mutex_exit(hash_lock);
5924 continue;
5925 }
5926
5927 if ((write_sz + hdr->b_size) > target_sz) {
5928 full = B_TRUE;
5929 mutex_exit(hash_lock);
5930 break;
5931 }
5932
5933 if (pio == NULL) {
5934 /*
5935 * Insert a dummy header on the buflist so
5936 * l2arc_write_done() can find where the
5937 * write buffers begin without searching.
5938 */
5939 mutex_enter(&dev->l2ad_mtx);
5940 list_insert_head(&dev->l2ad_buflist, head);
5941 mutex_exit(&dev->l2ad_mtx);
5942
5943 cb = kmem_alloc(
5944 sizeof (l2arc_write_callback_t), KM_SLEEP);
5945 cb->l2wcb_dev = dev;
5946 cb->l2wcb_head = head;
5947 pio = zio_root(spa, l2arc_write_done, cb,
5971 * enables us to differentiate which stage of
5972 * l2arc_write_buffers() the particular header
5973 * is in (e.g. this loop, or the one below).
5974 * ARC_FLAG_L2_WRITING is not enough to make
5975 * this distinction, and we need to know in
5976 * order to do proper l2arc vdev accounting in
5977 * arc_release() and arc_hdr_destroy().
5978 *
5979 * Note, we can't use a new flag to distinguish
5980 * the two stages because we don't hold the
5981 * header's hash_lock below, in the second stage
5982 * of this function. Thus, we can't simply
5983 * change the b_flags field to denote that the
5984 * IO has been sent. We can change the b_daddr
5985 * field of the L2 portion, though, since we'll
5986 * be holding the l2ad_mtx; which is why we're
5987 * using it to denote the header's state change.
5988 */
5989 hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
5990
5991 buf_sz = hdr->b_size;
5992 hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
5993
5994 mutex_enter(&dev->l2ad_mtx);
5995 list_insert_head(&dev->l2ad_buflist, hdr);
5996 mutex_exit(&dev->l2ad_mtx);
5997
5998 /*
5999 * Compute and store the buffer cksum before
6000 * writing. On debug the cksum is verified first.
6001 */
6002 arc_cksum_verify(hdr->b_l1hdr.b_buf);
6003 arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE);
6004
6005 mutex_exit(hash_lock);
6006
6007 write_sz += buf_sz;
6008 }
6009
6010 multilist_sublist_unlock(mls);
6011
6012 if (full == B_TRUE)
6013 break;
6014 }
6015
6016 /* No buffers selected for writing? */
6017 if (pio == NULL) {
6018 ASSERT0(write_sz);
6019 ASSERT(!HDR_HAS_L1HDR(head));
6020 kmem_cache_free(hdr_l2only_cache, head);
6021 return (0);
6022 }
6023
6024 mutex_enter(&dev->l2ad_mtx);
6025
6026 /*
6027 * Now start writing the buffers. We're starting at the write head
6028 * and work backwards, retracing the course of the buffer selector
6029 * loop above.
6030 */
6031 for (hdr = list_prev(&dev->l2ad_buflist, head); hdr;
6032 hdr = list_prev(&dev->l2ad_buflist, hdr)) {
6033 uint64_t buf_sz;
6034
6035 /*
6036 * We rely on the L1 portion of the header below, so
6037 * it's invalid for this header to have been evicted out
6038 * of the ghost cache, prior to being written out. The
6039 * ARC_FLAG_L2_WRITING bit ensures this won't happen.
6040 */
6041 ASSERT(HDR_HAS_L1HDR(hdr));
6042
6043 /*
6044 * We shouldn't need to lock the buffer here, since we flagged
6045 * it as ARC_FLAG_L2_WRITING in the previous step, but we must
6046 * take care to only access its L2 cache parameters. In
6059 *headroom_boost = B_TRUE;
6060 }
6061 }
6062
6063 /*
6064 * Pick up the buffer data we had previously stashed away
6065 * (and now potentially also compressed).
6066 */
6067 buf_data = hdr->b_l1hdr.b_tmp_cdata;
6068 buf_sz = hdr->b_l2hdr.b_asize;
6069
6070 /*
6071 * We need to do this regardless if buf_sz is zero or
6072 * not, otherwise, when this l2hdr is evicted we'll
6073 * remove a reference that was never added.
6074 */
6075 (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr);
6076
6077 /* Compression may have squashed the buffer to zero length. */
6078 if (buf_sz != 0) {
6079 uint64_t buf_p_sz;
6080
6081 wzio = zio_write_phys(pio, dev->l2ad_vdev,
6082 dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
6083 NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
6084 ZIO_FLAG_CANFAIL, B_FALSE);
6085
6086 DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
6087 zio_t *, wzio);
6088 (void) zio_nowait(wzio);
6089
6090 write_asize += buf_sz;
6091
6092 /*
6093 * Keep the clock hand suitably device-aligned.
6094 */
6095 buf_p_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
6096 write_psize += buf_p_sz;
6097 dev->l2ad_hand += buf_p_sz;
6098 }
6099 }
6100
6101 mutex_exit(&dev->l2ad_mtx);
6102
6103 ASSERT3U(write_asize, <=, target_sz);
6104 ARCSTAT_BUMP(arcstat_l2_writes_sent);
6105 ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
6106 ARCSTAT_INCR(arcstat_l2_size, write_sz);
6107 ARCSTAT_INCR(arcstat_l2_asize, write_asize);
6108 vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0);
6109
6110 /*
6111 * Bump device hand to the device start if it is approaching the end.
6112 * l2arc_evict() will already have evicted ahead for this case.
6113 */
6114 if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
6115 dev->l2ad_hand = dev->l2ad_start;
6116 dev->l2ad_first = B_FALSE;
6117 }
6118
6119 dev->l2ad_writing = B_TRUE;
6120 (void) zio_wait(pio);
6121 dev->l2ad_writing = B_FALSE;
6122
6123 return (write_asize);
6124 }
6125
6126 /*
6127 * Compresses an L2ARC buffer.
6128 * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its
|
5827 }
5828 mutex_exit(&dev->l2ad_mtx);
5829 }
5830
5831 /*
5832 * Find and write ARC buffers to the L2ARC device.
5833 *
5834 * An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid
5835 * for reading until they have completed writing.
5836 * The headroom_boost is an in-out parameter used to maintain headroom boost
5837 * state between calls to this function.
5838 *
5839 * Returns the number of bytes actually written (which may be smaller than
5840 * the delta by which the device hand has changed due to alignment).
5841 */
5842 static uint64_t
5843 l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
5844 boolean_t *headroom_boost)
5845 {
5846 arc_buf_hdr_t *hdr, *hdr_prev, *head;
5847 uint64_t write_asize, write_sz, headroom,
5848 buf_compress_minsz;
5849 void *buf_data;
5850 boolean_t full;
5851 l2arc_write_callback_t *cb;
5852 zio_t *pio, *wzio;
5853 uint64_t guid = spa_load_guid(spa);
5854 const boolean_t do_headroom_boost = *headroom_boost;
5855
5856 ASSERT(dev->l2ad_vdev != NULL);
5857
5858 /* Lower the flag now, we might want to raise it again later. */
5859 *headroom_boost = B_FALSE;
5860
5861 pio = NULL;
5862 write_sz = write_asize = 0;
5863 full = B_FALSE;
5864 head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
5865 head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
5866 head->b_flags |= ARC_FLAG_HAS_L2HDR;
5867
5868 /*
5869 * We will want to try to compress buffers that are at least 2x the
5870 * device sector size.
5871 */
5872 buf_compress_minsz = 2 << dev->l2ad_vdev->vdev_ashift;
5873
5874 /*
5875 * Copy buffers for L2ARC writing.
5876 */
5877 for (int try = 0; try <= 3; try++) {
5878 multilist_sublist_t *mls = l2arc_sublist_lock(try);
5879 uint64_t passed_sz = 0;
5880
5881 /*
5882 * L2ARC fast warmup.
5883 *
5884 * Until the ARC is warm and starts to evict, read from the
5885 * head of the ARC lists rather than the tail.
5886 */
5887 if (arc_warm == B_FALSE)
5888 hdr = multilist_sublist_head(mls);
5889 else
5890 hdr = multilist_sublist_tail(mls);
5891
5892 headroom = target_sz * l2arc_headroom;
5893 if (do_headroom_boost)
5894 headroom = (headroom * l2arc_headroom_boost) / 100;
5895
5896 for (; hdr; hdr = hdr_prev) {
5897 kmutex_t *hash_lock;
5898 uint64_t buf_sz;
5899 uint64_t buf_a_sz;
5900
5901 if (arc_warm == B_FALSE)
5902 hdr_prev = multilist_sublist_next(mls, hdr);
5903 else
5904 hdr_prev = multilist_sublist_prev(mls, hdr);
5905
5906 hash_lock = HDR_LOCK(hdr);
5907 if (!mutex_tryenter(hash_lock)) {
5908 /*
5909 * Skip this buffer rather than waiting.
5910 */
5911 continue;
5912 }
5913
5914 passed_sz += hdr->b_size;
5915 if (passed_sz > headroom) {
5916 /*
5917 * Searched too far.
5918 */
5919 mutex_exit(hash_lock);
5920 break;
5921 }
5922
5923 if (!l2arc_write_eligible(guid, hdr)) {
5924 mutex_exit(hash_lock);
5925 continue;
5926 }
5927
5928 /*
5929 * Assume that the buffer is not going to be compressed
5930 * and could take more space on disk because of a larger
5931 * disk block size.
5932 */
5933 buf_sz = hdr->b_size;
5934 buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
5935
5936 if ((write_asize + buf_a_sz) > target_sz) {
5937 full = B_TRUE;
5938 mutex_exit(hash_lock);
5939 break;
5940 }
5941
5942 if (pio == NULL) {
5943 /*
5944 * Insert a dummy header on the buflist so
5945 * l2arc_write_done() can find where the
5946 * write buffers begin without searching.
5947 */
5948 mutex_enter(&dev->l2ad_mtx);
5949 list_insert_head(&dev->l2ad_buflist, head);
5950 mutex_exit(&dev->l2ad_mtx);
5951
5952 cb = kmem_alloc(
5953 sizeof (l2arc_write_callback_t), KM_SLEEP);
5954 cb->l2wcb_dev = dev;
5955 cb->l2wcb_head = head;
5956 pio = zio_root(spa, l2arc_write_done, cb,
5980 * enables us to differentiate which stage of
5981 * l2arc_write_buffers() the particular header
5982 * is in (e.g. this loop, or the one below).
5983 * ARC_FLAG_L2_WRITING is not enough to make
5984 * this distinction, and we need to know in
5985 * order to do proper l2arc vdev accounting in
5986 * arc_release() and arc_hdr_destroy().
5987 *
5988 * Note, we can't use a new flag to distinguish
5989 * the two stages because we don't hold the
5990 * header's hash_lock below, in the second stage
5991 * of this function. Thus, we can't simply
5992 * change the b_flags field to denote that the
5993 * IO has been sent. We can change the b_daddr
5994 * field of the L2 portion, though, since we'll
5995 * be holding the l2ad_mtx; which is why we're
5996 * using it to denote the header's state change.
5997 */
5998 hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
5999
6000 hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
6001
6002 mutex_enter(&dev->l2ad_mtx);
6003 list_insert_head(&dev->l2ad_buflist, hdr);
6004 mutex_exit(&dev->l2ad_mtx);
6005
6006 /*
6007 * Compute and store the buffer cksum before
6008 * writing. On debug the cksum is verified first.
6009 */
6010 arc_cksum_verify(hdr->b_l1hdr.b_buf);
6011 arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE);
6012
6013 mutex_exit(hash_lock);
6014
6015 write_sz += buf_sz;
6016 write_asize += buf_a_sz;
6017 }
6018
6019 multilist_sublist_unlock(mls);
6020
6021 if (full == B_TRUE)
6022 break;
6023 }
6024
6025 /* No buffers selected for writing? */
6026 if (pio == NULL) {
6027 ASSERT0(write_sz);
6028 ASSERT(!HDR_HAS_L1HDR(head));
6029 kmem_cache_free(hdr_l2only_cache, head);
6030 return (0);
6031 }
6032
6033 mutex_enter(&dev->l2ad_mtx);
6034
6035 /*
6036 * Note that elsewhere in this file arcstat_l2_asize
6037 * and the used space on l2ad_vdev are updated using b_asize,
6038 * which is not necessarily rounded up to the device block size.
6039 * Too keep accounting consistent we do the same here as well:
6040 * stats_size accumulates the sum of b_asize of the written buffers,
6041 * while write_asize accumulates the sum of b_asize rounded up
6042 * to the device block size.
6043 * The latter sum is used only to validate the corectness of the code.
6044 */
6045 uint64_t stats_size = 0;
6046 write_asize = 0;
6047
6048 /*
6049 * Now start writing the buffers. We're starting at the write head
6050 * and work backwards, retracing the course of the buffer selector
6051 * loop above.
6052 */
6053 for (hdr = list_prev(&dev->l2ad_buflist, head); hdr;
6054 hdr = list_prev(&dev->l2ad_buflist, hdr)) {
6055 uint64_t buf_sz;
6056
6057 /*
6058 * We rely on the L1 portion of the header below, so
6059 * it's invalid for this header to have been evicted out
6060 * of the ghost cache, prior to being written out. The
6061 * ARC_FLAG_L2_WRITING bit ensures this won't happen.
6062 */
6063 ASSERT(HDR_HAS_L1HDR(hdr));
6064
6065 /*
6066 * We shouldn't need to lock the buffer here, since we flagged
6067 * it as ARC_FLAG_L2_WRITING in the previous step, but we must
6068 * take care to only access its L2 cache parameters. In
6081 *headroom_boost = B_TRUE;
6082 }
6083 }
6084
6085 /*
6086 * Pick up the buffer data we had previously stashed away
6087 * (and now potentially also compressed).
6088 */
6089 buf_data = hdr->b_l1hdr.b_tmp_cdata;
6090 buf_sz = hdr->b_l2hdr.b_asize;
6091
6092 /*
6093 * We need to do this regardless if buf_sz is zero or
6094 * not, otherwise, when this l2hdr is evicted we'll
6095 * remove a reference that was never added.
6096 */
6097 (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr);
6098
6099 /* Compression may have squashed the buffer to zero length. */
6100 if (buf_sz != 0) {
6101 uint64_t buf_a_sz;
6102
6103 wzio = zio_write_phys(pio, dev->l2ad_vdev,
6104 dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
6105 NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
6106 ZIO_FLAG_CANFAIL, B_FALSE);
6107
6108 DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
6109 zio_t *, wzio);
6110 (void) zio_nowait(wzio);
6111
6112 stats_size += buf_sz;
6113
6114 /*
6115 * Keep the clock hand suitably device-aligned.
6116 */
6117 buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
6118 write_asize += buf_a_sz;
6119 dev->l2ad_hand += buf_a_sz;
6120 }
6121 }
6122
6123 mutex_exit(&dev->l2ad_mtx);
6124
6125 ASSERT3U(write_asize, <=, target_sz);
6126 ARCSTAT_BUMP(arcstat_l2_writes_sent);
6127 ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
6128 ARCSTAT_INCR(arcstat_l2_size, write_sz);
6129 ARCSTAT_INCR(arcstat_l2_asize, stats_size);
6130 vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
6131
6132 /*
6133 * Bump device hand to the device start if it is approaching the end.
6134 * l2arc_evict() will already have evicted ahead for this case.
6135 */
6136 if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
6137 dev->l2ad_hand = dev->l2ad_start;
6138 dev->l2ad_first = B_FALSE;
6139 }
6140
6141 dev->l2ad_writing = B_TRUE;
6142 (void) zio_wait(pio);
6143 dev->l2ad_writing = B_FALSE;
6144
6145 return (write_asize);
6146 }
6147
6148 /*
6149 * Compresses an L2ARC buffer.
6150 * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its
|