4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 */
25
26 /*
27 * This file contains the top half of the zfs directory structure
28 * implementation. The bottom half is in zap_leaf.c.
29 *
30 * The zdir is an extendable hash data structure. There is a table of
31 * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
32 * each a constant size and hold a variable number of directory entries.
33 * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
34 *
35 * The pointer table holds a power of 2 number of pointers.
36 * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to
37 * by the pointer at index i in the table holds entries whose hash value
38 * has a zd_prefix_len - bit prefix
39 */
40
41 #include <sys/spa.h>
42 #include <sys/dmu.h>
43 #include <sys/zfs_context.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/fs/zfs.h>
46 #include <sys/zap.h>
47 #include <sys/refcount.h>
48 #include <sys/zap_impl.h>
49 #include <sys/zap_leaf.h>
50
51 int fzap_default_block_shift = 14; /* 16k blocksize */
52
53 extern inline zap_phys_t *zap_f_phys(zap_t *zap);
54
55 static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
56 static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
57
58 void
59 fzap_byteswap(void *vbuf, size_t size)
60 {
61 uint64_t block_type;
62
63 block_type = *(uint64_t *)vbuf;
64
65 if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
66 zap_leaf_byteswap(vbuf, size);
67 else {
68 /* it's a ptrtbl block */
69 byteswap_uint64_array(vbuf, size);
70 }
71 }
72
73 void
74 fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
75 {
76 dmu_buf_t *db;
77 zap_leaf_t *l;
78 int i;
79 zap_phys_t *zp;
80
81 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
82 zap->zap_ismicro = FALSE;
83
84 (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, zap_evict);
85
86 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
87 zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
88
89 zp = zap_f_phys(zap);
90 /*
91 * explicitly zero it since it might be coming from an
92 * initialized microzap
93 */
94 bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
95 zp->zap_block_type = ZBT_HEADER;
96 zp->zap_magic = ZAP_MAGIC;
97
98 zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
99
100 zp->zap_freeblk = 2; /* block 1 will be the first leaf */
101 zp->zap_num_leafs = 1;
102 zp->zap_num_entries = 0;
103 zp->zap_salt = zap->zap_salt;
104 zp->zap_normflags = zap->zap_normflags;
370 static void
371 zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
372 {
373 dmu_buf_will_dirty(zap->zap_dbuf, tx);
374 mutex_enter(&zap->zap_f.zap_num_entries_mtx);
375 ASSERT(delta > 0 || zap_f_phys(zap)->zap_num_entries >= -delta);
376 zap_f_phys(zap)->zap_num_entries += delta;
377 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
378 }
379
380 static uint64_t
381 zap_allocate_blocks(zap_t *zap, int nblocks)
382 {
383 uint64_t newblk;
384 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
385 newblk = zap_f_phys(zap)->zap_freeblk;
386 zap_f_phys(zap)->zap_freeblk += nblocks;
387 return (newblk);
388 }
389
390 static zap_leaf_t *
391 zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
392 {
393 void *winner;
394 zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
395
396 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
397
398 rw_init(&l->l_rwlock, 0, 0, 0);
399 rw_enter(&l->l_rwlock, RW_WRITER);
400 l->l_blkid = zap_allocate_blocks(zap, 1);
401 l->l_dbuf = NULL;
402
403 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
404 l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
405 DMU_READ_NO_PREFETCH));
406 winner = dmu_buf_set_user(l->l_dbuf, l, zap_leaf_pageout);
407 ASSERT(winner == NULL);
408 dmu_buf_will_dirty(l->l_dbuf, tx);
409
410 zap_leaf_init(l, zap->zap_normflags != 0);
411
412 zap_f_phys(zap)->zap_num_leafs++;
413
414 return (l);
415 }
416
417 int
418 fzap_count(zap_t *zap, uint64_t *count)
419 {
420 ASSERT(!zap->zap_ismicro);
421 mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
422 *count = zap_f_phys(zap)->zap_num_entries;
423 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
424 return (0);
425 }
426
427 /*
428 * Routines for obtaining zap_leaf_t's
429 */
430
431 void
432 zap_put_leaf(zap_leaf_t *l)
433 {
434 rw_exit(&l->l_rwlock);
435 dmu_buf_rele(l->l_dbuf, NULL);
436 }
437
438 _NOTE(ARGSUSED(0))
439 static void
440 zap_leaf_pageout(dmu_buf_t *db, void *vl)
441 {
442 zap_leaf_t *l = vl;
443
444 rw_destroy(&l->l_rwlock);
445 kmem_free(l, sizeof (zap_leaf_t));
446 }
447
448 static zap_leaf_t *
449 zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
450 {
451 zap_leaf_t *l, *winner;
452
453 ASSERT(blkid != 0);
454
455 l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
456 rw_init(&l->l_rwlock, 0, 0, 0);
457 rw_enter(&l->l_rwlock, RW_WRITER);
458 l->l_blkid = blkid;
459 l->l_bs = highbit64(db->db_size) - 1;
460 l->l_dbuf = db;
461
462 winner = dmu_buf_set_user(db, l, zap_leaf_pageout);
463
464 rw_exit(&l->l_rwlock);
465 if (winner != NULL) {
466 /* someone else set it first */
467 zap_leaf_pageout(NULL, l);
468 l = winner;
469 }
470
471 /*
472 * lhr_pad was previously used for the next leaf in the leaf
473 * chain. There should be no chained leafs (as we have removed
474 * support for them).
475 */
476 ASSERT0(zap_leaf_phys(l)->l_hdr.lh_pad1);
477
478 /*
479 * There should be more hash entries than there can be
480 * chunks to put in the hash table
481 */
482 ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
483
484 /* The chunks should begin at the end of the hash table */
485 ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
486 &zap_leaf_phys(l)->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
487
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
25 */
26
27 /*
28 * This file contains the top half of the zfs directory structure
29 * implementation. The bottom half is in zap_leaf.c.
30 *
31 * The zdir is an extendable hash data structure. There is a table of
32 * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
33 * each a constant size and hold a variable number of directory entries.
34 * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
35 *
36 * The pointer table holds a power of 2 number of pointers.
37 * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to
38 * by the pointer at index i in the table holds entries whose hash value
39 * has a zd_prefix_len - bit prefix
40 */
41
42 #include <sys/spa.h>
43 #include <sys/dmu.h>
44 #include <sys/zfs_context.h>
45 #include <sys/zfs_znode.h>
46 #include <sys/fs/zfs.h>
47 #include <sys/zap.h>
48 #include <sys/refcount.h>
49 #include <sys/zap_impl.h>
50 #include <sys/zap_leaf.h>
51
52 int fzap_default_block_shift = 14; /* 16k blocksize */
53
54 extern inline zap_phys_t *zap_f_phys(zap_t *zap);
55
56 static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
57
58 void
59 fzap_byteswap(void *vbuf, size_t size)
60 {
61 uint64_t block_type;
62
63 block_type = *(uint64_t *)vbuf;
64
65 if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
66 zap_leaf_byteswap(vbuf, size);
67 else {
68 /* it's a ptrtbl block */
69 byteswap_uint64_array(vbuf, size);
70 }
71 }
72
73 void
74 fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
75 {
76 dmu_buf_t *db;
77 zap_leaf_t *l;
78 int i;
79 zap_phys_t *zp;
80
81 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
82 zap->zap_ismicro = FALSE;
83
84 zap->zap_dbu.dbu_evict_func = zap_evict;
85
86 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
87 zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
88
89 zp = zap_f_phys(zap);
90 /*
91 * explicitly zero it since it might be coming from an
92 * initialized microzap
93 */
94 bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
95 zp->zap_block_type = ZBT_HEADER;
96 zp->zap_magic = ZAP_MAGIC;
97
98 zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
99
100 zp->zap_freeblk = 2; /* block 1 will be the first leaf */
101 zp->zap_num_leafs = 1;
102 zp->zap_num_entries = 0;
103 zp->zap_salt = zap->zap_salt;
104 zp->zap_normflags = zap->zap_normflags;
370 static void
371 zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
372 {
373 dmu_buf_will_dirty(zap->zap_dbuf, tx);
374 mutex_enter(&zap->zap_f.zap_num_entries_mtx);
375 ASSERT(delta > 0 || zap_f_phys(zap)->zap_num_entries >= -delta);
376 zap_f_phys(zap)->zap_num_entries += delta;
377 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
378 }
379
380 static uint64_t
381 zap_allocate_blocks(zap_t *zap, int nblocks)
382 {
383 uint64_t newblk;
384 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
385 newblk = zap_f_phys(zap)->zap_freeblk;
386 zap_f_phys(zap)->zap_freeblk += nblocks;
387 return (newblk);
388 }
389
390 static void
391 zap_leaf_pageout(void *dbu)
392 {
393 zap_leaf_t *l = dbu;
394
395 rw_destroy(&l->l_rwlock);
396 kmem_free(l, sizeof (zap_leaf_t));
397 }
398
399 static zap_leaf_t *
400 zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
401 {
402 void *winner;
403 zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
404
405 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
406
407 rw_init(&l->l_rwlock, 0, 0, 0);
408 rw_enter(&l->l_rwlock, RW_WRITER);
409 l->l_blkid = zap_allocate_blocks(zap, 1);
410 l->l_dbuf = NULL;
411
412 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
413 l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
414 DMU_READ_NO_PREFETCH));
415 dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
416 winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
417 ASSERT(winner == NULL);
418 dmu_buf_will_dirty(l->l_dbuf, tx);
419
420 zap_leaf_init(l, zap->zap_normflags != 0);
421
422 zap_f_phys(zap)->zap_num_leafs++;
423
424 return (l);
425 }
426
427 int
428 fzap_count(zap_t *zap, uint64_t *count)
429 {
430 ASSERT(!zap->zap_ismicro);
431 mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
432 *count = zap_f_phys(zap)->zap_num_entries;
433 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
434 return (0);
435 }
436
437 /*
438 * Routines for obtaining zap_leaf_t's
439 */
440
441 void
442 zap_put_leaf(zap_leaf_t *l)
443 {
444 rw_exit(&l->l_rwlock);
445 dmu_buf_rele(l->l_dbuf, NULL);
446 }
447
448 static zap_leaf_t *
449 zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
450 {
451 zap_leaf_t *l, *winner;
452
453 ASSERT(blkid != 0);
454
455 l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
456 rw_init(&l->l_rwlock, 0, 0, 0);
457 rw_enter(&l->l_rwlock, RW_WRITER);
458 l->l_blkid = blkid;
459 l->l_bs = highbit64(db->db_size) - 1;
460 l->l_dbuf = db;
461
462 dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
463 winner = dmu_buf_set_user(db, &l->l_dbu);
464
465 rw_exit(&l->l_rwlock);
466 if (winner != NULL) {
467 /* someone else set it first */
468 zap_leaf_pageout(&l->l_dbu);
469 l = winner;
470 }
471
472 /*
473 * lhr_pad was previously used for the next leaf in the leaf
474 * chain. There should be no chained leafs (as we have removed
475 * support for them).
476 */
477 ASSERT0(zap_leaf_phys(l)->l_hdr.lh_pad1);
478
479 /*
480 * There should be more hash entries than there can be
481 * chunks to put in the hash table
482 */
483 ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
484
485 /* The chunks should begin at the end of the hash table */
486 ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
487 &zap_leaf_phys(l)->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
488
|