528 #define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_BUF_AVAILABLE)
529 #define HDR_FREE_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FREE_IN_PROGRESS)
530 #define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_L2CACHE)
531 #define HDR_L2_READING(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS && \
532 (hdr)->b_l2hdr != NULL)
533 #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_L2_WRITING)
534 #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_L2_EVICTED)
535 #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_L2_WRITE_HEAD)
536
537 /*
538 * Other sizes
539 */
540
541 #define HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
542 #define L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
543
544 /*
545 * Hash table routines
546 */
547
548 #define HT_LOCK_PAD 64
549
550 struct ht_lock {
551 kmutex_t ht_lock;
552 #ifdef _KERNEL
553 unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
554 #endif
555 };
556
557 #define BUF_LOCKS 256
558 typedef struct buf_hash_table {
559 uint64_t ht_mask;
560 arc_buf_hdr_t **ht_table;
561 struct ht_lock ht_locks[BUF_LOCKS];
562 } buf_hash_table_t;
563
564 static buf_hash_table_t buf_hash_table;
565
566 #define BUF_HASH_INDEX(spa, dva, birth) \
567 (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
568 #define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
569 #define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
570 #define HDR_LOCK(hdr) \
571 (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
572
573 uint64_t zfs_crc64_table[256];
574
575 /*
576 * Level 2 ARC
577 */
578
579 #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */
580 #define L2ARC_HEADROOM 2 /* num of writes */
581 #define L2ARC_FEED_SECS 1 /* caching interval secs */
582 #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
583
584 #define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
585 #define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
586
587 /*
588 * L2ARC Performance Tunables
589 */
686 ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \
687 ((buf)->b_birth == birth) && ((buf)->b_spa == spa)
688
689 static void
690 buf_discard_identity(arc_buf_hdr_t *hdr)
691 {
692 hdr->b_dva.dva_word[0] = 0;
693 hdr->b_dva.dva_word[1] = 0;
694 hdr->b_birth = 0;
695 hdr->b_cksum0 = 0;
696 }
697
698 static arc_buf_hdr_t *
699 buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
700 {
701 uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
702 kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
703 arc_buf_hdr_t *buf;
704
705 mutex_enter(hash_lock);
706 for (buf = buf_hash_table.ht_table[idx]; buf != NULL;
707 buf = buf->b_hash_next) {
708 if (BUF_EQUAL(spa, dva, birth, buf)) {
709 *lockp = hash_lock;
710 return (buf);
711 }
712 }
713 mutex_exit(hash_lock);
714 *lockp = NULL;
715 return (NULL);
716 }
717
718 /*
719 * Insert an entry into the hash table. If there is already an element
720 * equal to elem in the hash table, then the already existing element
721 * will be returned and the new element will not be inserted.
722 * Otherwise returns NULL.
723 */
724 static arc_buf_hdr_t *
725 buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
726 {
727 uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
728 kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
729 arc_buf_hdr_t *fbuf;
730 uint32_t i;
731
732 ASSERT(!HDR_IN_HASH_TABLE(buf));
733 *lockp = hash_lock;
734 mutex_enter(hash_lock);
735 for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL;
736 fbuf = fbuf->b_hash_next, i++) {
737 if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf))
738 return (fbuf);
739 }
740
741 buf->b_hash_next = buf_hash_table.ht_table[idx];
742 buf_hash_table.ht_table[idx] = buf;
743 buf->b_flags |= ARC_IN_HASH_TABLE;
744
745 /* collect some hash table performance data */
746 if (i > 0) {
747 ARCSTAT_BUMP(arcstat_hash_collisions);
748 if (i == 1)
749 ARCSTAT_BUMP(arcstat_hash_chains);
750
751 ARCSTAT_MAX(arcstat_hash_chain_max, i);
752 }
753
754 ARCSTAT_BUMP(arcstat_hash_elements);
755 ARCSTAT_MAXSTAT(arcstat_hash_elements);
756
757 return (NULL);
758 }
759
760 static void
761 buf_hash_remove(arc_buf_hdr_t *buf)
762 {
763 arc_buf_hdr_t *fbuf, **bufp;
764 uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
765
766 ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx)));
767 ASSERT(HDR_IN_HASH_TABLE(buf));
768
769 bufp = &buf_hash_table.ht_table[idx];
770 while ((fbuf = *bufp) != buf) {
771 ASSERT(fbuf != NULL);
772 bufp = &fbuf->b_hash_next;
773 }
774 *bufp = buf->b_hash_next;
775 buf->b_hash_next = NULL;
776 buf->b_flags &= ~ARC_IN_HASH_TABLE;
777
778 /* collect some hash table performance data */
779 ARCSTAT_BUMPDOWN(arcstat_hash_elements);
780
781 if (buf_hash_table.ht_table[idx] &&
782 buf_hash_table.ht_table[idx]->b_hash_next == NULL)
783 ARCSTAT_BUMPDOWN(arcstat_hash_chains);
784 }
785
786 /*
787 * Global data structures and functions for the buf kmem cache.
788 */
789 static kmem_cache_t *hdr_cache;
790 static kmem_cache_t *buf_cache;
791
792 static void
793 buf_fini(void)
794 {
795 int i;
796
797 kmem_free(buf_hash_table.ht_table,
798 (buf_hash_table.ht_mask + 1) * sizeof (void *));
799 for (i = 0; i < BUF_LOCKS; i++)
800 mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
801 kmem_cache_destroy(hdr_cache);
802 kmem_cache_destroy(buf_cache);
803 }
804
805 /*
806 * Constructor callback - called when the cache is empty
807 * and a new buf is requested.
808 */
809 /* ARGSUSED */
810 static int
811 hdr_cons(void *vbuf, void *unused, int kmflag)
812 {
813 arc_buf_hdr_t *buf = vbuf;
814
815 bzero(buf, sizeof (arc_buf_hdr_t));
816 refcount_create(&buf->b_refcnt);
817 cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
818 mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
819 arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
820
877 cv_signal(&arc_reclaim_thr_cv);
878 }
879
880 static void
881 buf_init(void)
882 {
883 uint64_t *ct;
884 uint64_t hsize = 1ULL << 12;
885 int i, j;
886
887 /*
888 * The hash table is big enough to fill all of physical memory
889 * with an average 64K block size. The table will take up
890 * totalmem*sizeof(void*)/64K (eg. 128KB/GB with 8-byte pointers).
891 */
892 while (hsize * 65536 < physmem * PAGESIZE)
893 hsize <<= 1;
894 retry:
895 buf_hash_table.ht_mask = hsize - 1;
896 buf_hash_table.ht_table =
897 kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP);
898 if (buf_hash_table.ht_table == NULL) {
899 ASSERT(hsize > (1ULL << 8));
900 hsize >>= 1;
901 goto retry;
902 }
903
904 hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
905 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
906 buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
907 0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
908
909 for (i = 0; i < 256; i++)
910 for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
911 *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
912
913 for (i = 0; i < BUF_LOCKS; i++) {
914 mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
915 NULL, MUTEX_DEFAULT, NULL);
916 }
917 }
918
919 #define ARC_MINTIME (hz>>4) /* 62 ms */
920
921 static void
922 arc_cksum_verify(arc_buf_t *buf)
923 {
924 zio_cksum_t zc;
925
926 if (!(zfs_flags & ZFS_DEBUG_MODIFY))
927 return;
928
929 mutex_enter(&buf->b_hdr->b_freeze_lock);
930 if (buf->b_hdr->b_freeze_cksum == NULL ||
931 (buf->b_hdr->b_flags & ARC_IO_ERROR)) {
932 mutex_exit(&buf->b_hdr->b_freeze_lock);
933 return;
934 }
|
528 #define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_BUF_AVAILABLE)
529 #define HDR_FREE_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FREE_IN_PROGRESS)
530 #define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_L2CACHE)
531 #define HDR_L2_READING(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS && \
532 (hdr)->b_l2hdr != NULL)
533 #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_L2_WRITING)
534 #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_L2_EVICTED)
535 #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_L2_WRITE_HEAD)
536
537 /*
538 * Other sizes
539 */
540
541 #define HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
542 #define L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
543
544 /*
545 * Hash table routines
546 */
547
548 struct ht_table {
549 arc_buf_hdr_t *hdr;
550 kmutex_t lock;
551 };
552
553 typedef struct buf_hash_table {
554 uint64_t ht_mask;
555 struct ht_table *ht_table;
556 } buf_hash_table_t;
557
558 static buf_hash_table_t buf_hash_table;
559
560 #define BUF_HASH_INDEX(spa, dva, birth) \
561 (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
562 #define BUF_HASH_LOCK(idx) (&buf_hash_table.ht_table[idx].lock)
563 #define HDR_LOCK(hdr) \
564 (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
565
566 uint64_t zfs_crc64_table[256];
567
568 /*
569 * Level 2 ARC
570 */
571
572 #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */
573 #define L2ARC_HEADROOM 2 /* num of writes */
574 #define L2ARC_FEED_SECS 1 /* caching interval secs */
575 #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
576
577 #define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
578 #define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
579
580 /*
581 * L2ARC Performance Tunables
582 */
679 ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \
680 ((buf)->b_birth == birth) && ((buf)->b_spa == spa)
681
682 static void
683 buf_discard_identity(arc_buf_hdr_t *hdr)
684 {
685 hdr->b_dva.dva_word[0] = 0;
686 hdr->b_dva.dva_word[1] = 0;
687 hdr->b_birth = 0;
688 hdr->b_cksum0 = 0;
689 }
690
691 static arc_buf_hdr_t *
692 buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
693 {
694 uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
695 kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
696 arc_buf_hdr_t *buf;
697
698 mutex_enter(hash_lock);
699 for (buf = buf_hash_table.ht_table[idx].hdr; buf != NULL;
700 buf = buf->b_hash_next) {
701 if (BUF_EQUAL(spa, dva, birth, buf)) {
702 *lockp = hash_lock;
703 return (buf);
704 }
705 }
706 mutex_exit(hash_lock);
707 *lockp = NULL;
708 return (NULL);
709 }
710
711 /*
712 * Insert an entry into the hash table. If there is already an element
713 * equal to elem in the hash table, then the already existing element
714 * will be returned and the new element will not be inserted.
715 * Otherwise returns NULL.
716 */
717 static arc_buf_hdr_t *
718 buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
719 {
720 uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
721 kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
722 arc_buf_hdr_t *fbuf;
723 uint32_t i;
724
725 ASSERT(!HDR_IN_HASH_TABLE(buf));
726 *lockp = hash_lock;
727 mutex_enter(hash_lock);
728 for (fbuf = buf_hash_table.ht_table[idx].hdr, i = 0; fbuf != NULL;
729 fbuf = fbuf->b_hash_next, i++) {
730 if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf))
731 return (fbuf);
732 }
733
734 buf->b_hash_next = buf_hash_table.ht_table[idx].hdr;
735 buf_hash_table.ht_table[idx].hdr = buf;
736 buf->b_flags |= ARC_IN_HASH_TABLE;
737
738 /* collect some hash table performance data */
739 if (i > 0) {
740 ARCSTAT_BUMP(arcstat_hash_collisions);
741 if (i == 1)
742 ARCSTAT_BUMP(arcstat_hash_chains);
743
744 ARCSTAT_MAX(arcstat_hash_chain_max, i);
745 }
746
747 ARCSTAT_BUMP(arcstat_hash_elements);
748 ARCSTAT_MAXSTAT(arcstat_hash_elements);
749
750 return (NULL);
751 }
752
753 static void
754 buf_hash_remove(arc_buf_hdr_t *buf)
755 {
756 arc_buf_hdr_t *fbuf, **bufp;
757 uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth);
758
759 ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx)));
760 ASSERT(HDR_IN_HASH_TABLE(buf));
761
762 bufp = &buf_hash_table.ht_table[idx].hdr;
763 while ((fbuf = *bufp) != buf) {
764 ASSERT(fbuf != NULL);
765 bufp = &fbuf->b_hash_next;
766 }
767 *bufp = buf->b_hash_next;
768 buf->b_hash_next = NULL;
769 buf->b_flags &= ~ARC_IN_HASH_TABLE;
770
771 /* collect some hash table performance data */
772 ARCSTAT_BUMPDOWN(arcstat_hash_elements);
773
774 if (buf_hash_table.ht_table[idx].hdr &&
775 buf_hash_table.ht_table[idx].hdr->b_hash_next == NULL)
776 ARCSTAT_BUMPDOWN(arcstat_hash_chains);
777 }
778
779 /*
780 * Global data structures and functions for the buf kmem cache.
781 */
782 static kmem_cache_t *hdr_cache;
783 static kmem_cache_t *buf_cache;
784
785 static void
786 buf_fini(void)
787 {
788 int i;
789
790 for (i = 0; i < buf_hash_table.ht_mask + 1; i++)
791 mutex_destroy(&buf_hash_table.ht_table[i].lock);
792 kmem_free(buf_hash_table.ht_table,
793 (buf_hash_table.ht_mask + 1) * sizeof (struct ht_table));
794 kmem_cache_destroy(hdr_cache);
795 kmem_cache_destroy(buf_cache);
796 }
797
798 /*
799 * Constructor callback - called when the cache is empty
800 * and a new buf is requested.
801 */
802 /* ARGSUSED */
803 static int
804 hdr_cons(void *vbuf, void *unused, int kmflag)
805 {
806 arc_buf_hdr_t *buf = vbuf;
807
808 bzero(buf, sizeof (arc_buf_hdr_t));
809 refcount_create(&buf->b_refcnt);
810 cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
811 mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
812 arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
813
870 cv_signal(&arc_reclaim_thr_cv);
871 }
872
873 static void
874 buf_init(void)
875 {
876 uint64_t *ct;
877 uint64_t hsize = 1ULL << 12;
878 int i, j;
879
880 /*
881 * The hash table is big enough to fill all of physical memory
882 * with an average 64K block size. The table will take up
883 * totalmem*sizeof(void*)/64K (eg. 128KB/GB with 8-byte pointers).
884 */
885 while (hsize * 65536 < physmem * PAGESIZE)
886 hsize <<= 1;
887 retry:
888 buf_hash_table.ht_mask = hsize - 1;
889 buf_hash_table.ht_table =
890 kmem_zalloc(hsize * sizeof (struct ht_table), KM_NOSLEEP);
891 if (buf_hash_table.ht_table == NULL) {
892 ASSERT(hsize > (1ULL << 8));
893 hsize >>= 1;
894 goto retry;
895 }
896
897 hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
898 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
899 buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
900 0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
901
902 for (i = 0; i < 256; i++)
903 for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
904 *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
905
906 for (i = 0; i < hsize; i++) {
907 mutex_init(&buf_hash_table.ht_table[i].lock,
908 NULL, MUTEX_DEFAULT, NULL);
909 }
910 }
911
912 #define ARC_MINTIME (hz>>4) /* 62 ms */
913
914 static void
915 arc_cksum_verify(arc_buf_t *buf)
916 {
917 zio_cksum_t zc;
918
919 if (!(zfs_flags & ZFS_DEBUG_MODIFY))
920 return;
921
922 mutex_enter(&buf->b_hdr->b_freeze_lock);
923 if (buf->b_hdr->b_freeze_cksum == NULL ||
924 (buf->b_hdr->b_flags & ARC_IO_ERROR)) {
925 mutex_exit(&buf->b_hdr->b_freeze_lock);
926 return;
927 }
|