Print this page
5219 l2arc_write_buffers() may write beyond target_sz
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Saso Kiselkov <skiselkov@gmail.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk>
Reviewed by: Justin Gibbs <gibbs@FreeBSD.org>
Approved by: Matthew Ahrens <mahrens@delphix.com>

@@ -5842,11 +5842,11 @@
 static uint64_t
 l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
     boolean_t *headroom_boost)
 {
         arc_buf_hdr_t *hdr, *hdr_prev, *head;
-        uint64_t write_asize, write_psize, write_sz, headroom,
+        uint64_t write_asize, write_sz, headroom,
             buf_compress_minsz;
         void *buf_data;
         boolean_t full;
         l2arc_write_callback_t *cb;
         zio_t *pio, *wzio;

@@ -5857,11 +5857,11 @@
 
         /* Lower the flag now, we might want to raise it again later. */
         *headroom_boost = B_FALSE;
 
         pio = NULL;
-        write_sz = write_asize = write_psize = 0;
+        write_sz = write_asize = 0;
         full = B_FALSE;
         head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
         head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
         head->b_flags |= ARC_FLAG_HAS_L2HDR;
 

@@ -5894,10 +5894,11 @@
                         headroom = (headroom * l2arc_headroom_boost) / 100;
 
                 for (; hdr; hdr = hdr_prev) {
                         kmutex_t *hash_lock;
                         uint64_t buf_sz;
+                        uint64_t buf_a_sz;
 
                         if (arc_warm == B_FALSE)
                                 hdr_prev = multilist_sublist_next(mls, hdr);
                         else
                                 hdr_prev = multilist_sublist_prev(mls, hdr);

@@ -5922,11 +5923,19 @@
                         if (!l2arc_write_eligible(guid, hdr)) {
                                 mutex_exit(hash_lock);
                                 continue;
                         }
 
-                        if ((write_sz + hdr->b_size) > target_sz) {
+                        /*
+                         * Assume that the buffer is not going to be compressed
+                         * and could take more space on disk because of a larger
+                         * disk block size.
+                         */
+                        buf_sz = hdr->b_size;
+                        buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+
+                        if ((write_asize + buf_a_sz) > target_sz) {
                                 full = B_TRUE;
                                 mutex_exit(hash_lock);
                                 break;
                         }
 

@@ -5986,11 +5995,10 @@
                          * be holding the l2ad_mtx; which is why we're
                          * using it to denote the header's state change.
                          */
                         hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
 
-                        buf_sz = hdr->b_size;
                         hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
 
                         mutex_enter(&dev->l2ad_mtx);
                         list_insert_head(&dev->l2ad_buflist, hdr);
                         mutex_exit(&dev->l2ad_mtx);

@@ -6003,10 +6011,11 @@
                         arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE);
 
                         mutex_exit(hash_lock);
 
                         write_sz += buf_sz;
+                        write_asize += buf_a_sz;
                 }
 
                 multilist_sublist_unlock(mls);
 
                 if (full == B_TRUE)

@@ -6022,10 +6031,23 @@
         }
 
         mutex_enter(&dev->l2ad_mtx);
 
         /*
+         * Note that elsewhere in this file arcstat_l2_asize
+         * and the used space on l2ad_vdev are updated using b_asize,
+         * which is not necessarily rounded up to the device block size.
+         * Too keep accounting consistent we do the same here as well:
+         * stats_size accumulates the sum of b_asize of the written buffers,
+         * while write_asize accumulates the sum of b_asize rounded up
+         * to the device block size.
+         * The latter sum is used only to validate the corectness of the code.
+         */
+        uint64_t stats_size = 0;
+        write_asize = 0;
+
+        /*
          * Now start writing the buffers. We're starting at the write head
          * and work backwards, retracing the course of the buffer selector
          * loop above.
          */
         for (hdr = list_prev(&dev->l2ad_buflist, head); hdr;

@@ -6074,11 +6096,11 @@
                  */
                 (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr);
 
                 /* Compression may have squashed the buffer to zero length. */
                 if (buf_sz != 0) {
-                        uint64_t buf_p_sz;
+                        uint64_t buf_a_sz;
 
                         wzio = zio_write_phys(pio, dev->l2ad_vdev,
                             dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
                             NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
                             ZIO_FLAG_CANFAIL, B_FALSE);

@@ -6085,29 +6107,29 @@
 
                         DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
                             zio_t *, wzio);
                         (void) zio_nowait(wzio);
 
-                        write_asize += buf_sz;
+                        stats_size += buf_sz;
 
                         /*
                          * Keep the clock hand suitably device-aligned.
                          */
-                        buf_p_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
-                        write_psize += buf_p_sz;
-                        dev->l2ad_hand += buf_p_sz;
+                        buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+                        write_asize += buf_a_sz;
+                        dev->l2ad_hand += buf_a_sz;
                 }
         }
 
         mutex_exit(&dev->l2ad_mtx);
 
         ASSERT3U(write_asize, <=, target_sz);
         ARCSTAT_BUMP(arcstat_l2_writes_sent);
         ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
         ARCSTAT_INCR(arcstat_l2_size, write_sz);
-        ARCSTAT_INCR(arcstat_l2_asize, write_asize);
-        vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0);
+        ARCSTAT_INCR(arcstat_l2_asize, stats_size);
+        vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
 
         /*
          * Bump device hand to the device start if it is approaching the end.
          * l2arc_evict() will already have evicted ahead for this case.
          */