Print this page
    
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@delphix.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/zap_micro.c
          +++ new/usr/src/uts/common/fs/zfs/zap_micro.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2011 by Delphix. All rights reserved.
       23 + * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   */
  25   25  
  26   26  #include <sys/zio.h>
  27   27  #include <sys/spa.h>
  28   28  #include <sys/dmu.h>
  29   29  #include <sys/zfs_context.h>
  30   30  #include <sys/zap.h>
  31   31  #include <sys/refcount.h>
  32   32  #include <sys/zap_impl.h>
  33   33  #include <sys/zap_leaf.h>
  34   34  #include <sys/avl.h>
  35   35  #include <sys/arc.h>
  36   36  
  37   37  #ifdef _KERNEL
  38   38  #include <sys/sunddi.h>
  39   39  #endif
  40   40  
  41   41  static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
  42   42  
  43   43  uint64_t
  44   44  zap_getflags(zap_t *zap)
  45   45  {
  46   46          if (zap->zap_ismicro)
  47   47                  return (0);
  48   48          return (zap->zap_u.zap_fat.zap_phys->zap_flags);
  49   49  }
  50   50  
  51   51  int
  52   52  zap_hashbits(zap_t *zap)
  53   53  {
  54   54          if (zap_getflags(zap) & ZAP_FLAG_HASH64)
  55   55                  return (48);
  56   56          else
  57   57                  return (28);
  58   58  }
  59   59  
  60   60  uint32_t
  61   61  zap_maxcd(zap_t *zap)
  62   62  {
  63   63          if (zap_getflags(zap) & ZAP_FLAG_HASH64)
  64   64                  return ((1<<16)-1);
  65   65          else
  66   66                  return (-1U);
  67   67  }
  68   68  
  69   69  static uint64_t
  70   70  zap_hash(zap_name_t *zn)
  71   71  {
  72   72          zap_t *zap = zn->zn_zap;
  73   73          uint64_t h = 0;
  74   74  
  75   75          if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
  76   76                  ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
  77   77                  h = *(uint64_t *)zn->zn_key_orig;
  78   78          } else {
  79   79                  h = zap->zap_salt;
  80   80                  ASSERT(h != 0);
  81   81                  ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
  82   82  
  83   83                  if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
  84   84                          int i;
  85   85                          const uint64_t *wp = zn->zn_key_norm;
  86   86  
  87   87                          ASSERT(zn->zn_key_intlen == 8);
  88   88                          for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) {
  89   89                                  int j;
  90   90                                  uint64_t word = *wp;
  91   91  
  92   92                                  for (j = 0; j < zn->zn_key_intlen; j++) {
  93   93                                          h = (h >> 8) ^
  94   94                                              zfs_crc64_table[(h ^ word) & 0xFF];
  95   95                                          word >>= NBBY;
  96   96                                  }
  97   97                          }
  98   98                  } else {
  99   99                          int i, len;
 100  100                          const uint8_t *cp = zn->zn_key_norm;
 101  101  
 102  102                          /*
 103  103                           * We previously stored the terminating null on
 104  104                           * disk, but didn't hash it, so we need to
 105  105                           * continue to not hash it.  (The
 106  106                           * zn_key_*_numints includes the terminating
 107  107                           * null for non-binary keys.)
 108  108                           */
 109  109                          len = zn->zn_key_norm_numints - 1;
 110  110  
 111  111                          ASSERT(zn->zn_key_intlen == 1);
 112  112                          for (i = 0; i < len; cp++, i++) {
 113  113                                  h = (h >> 8) ^
 114  114                                      zfs_crc64_table[(h ^ *cp) & 0xFF];
 115  115                          }
 116  116                  }
 117  117          }
 118  118          /*
 119  119           * Don't use all 64 bits, since we need some in the cookie for
 120  120           * the collision differentiator.  We MUST use the high bits,
 121  121           * since those are the ones that we first pay attention to when
 122  122           * chosing the bucket.
 123  123           */
 124  124          h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
 125  125  
 126  126          return (h);
 127  127  }
 128  128  
 129  129  static int
 130  130  zap_normalize(zap_t *zap, const char *name, char *namenorm)
 131  131  {
 132  132          size_t inlen, outlen;
 133  133          int err;
 134  134  
 135  135          ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
 136  136  
 137  137          inlen = strlen(name) + 1;
 138  138          outlen = ZAP_MAXNAMELEN;
 139  139  
 140  140          err = 0;
 141  141          (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
 142  142              zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
 143  143              U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
 144  144  
 145  145          return (err);
 146  146  }
 147  147  
 148  148  boolean_t
 149  149  zap_match(zap_name_t *zn, const char *matchname)
 150  150  {
 151  151          ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
 152  152  
 153  153          if (zn->zn_matchtype == MT_FIRST) {
 154  154                  char norm[ZAP_MAXNAMELEN];
 155  155  
 156  156                  if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
 157  157                          return (B_FALSE);
 158  158  
 159  159                  return (strcmp(zn->zn_key_norm, norm) == 0);
 160  160          } else {
 161  161                  /* MT_BEST or MT_EXACT */
 162  162                  return (strcmp(zn->zn_key_orig, matchname) == 0);
 163  163          }
 164  164  }
 165  165  
 166  166  void
 167  167  zap_name_free(zap_name_t *zn)
 168  168  {
 169  169          kmem_free(zn, sizeof (zap_name_t));
 170  170  }
 171  171  
 172  172  zap_name_t *
 173  173  zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
 174  174  {
 175  175          zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
 176  176  
 177  177          zn->zn_zap = zap;
 178  178          zn->zn_key_intlen = sizeof (*key);
 179  179          zn->zn_key_orig = key;
 180  180          zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
 181  181          zn->zn_matchtype = mt;
 182  182          if (zap->zap_normflags) {
 183  183                  if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
 184  184                          zap_name_free(zn);
 185  185                          return (NULL);
 186  186                  }
 187  187                  zn->zn_key_norm = zn->zn_normbuf;
 188  188                  zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
 189  189          } else {
 190  190                  if (mt != MT_EXACT) {
 191  191                          zap_name_free(zn);
 192  192                          return (NULL);
 193  193                  }
 194  194                  zn->zn_key_norm = zn->zn_key_orig;
 195  195                  zn->zn_key_norm_numints = zn->zn_key_orig_numints;
 196  196          }
 197  197  
 198  198          zn->zn_hash = zap_hash(zn);
 199  199          return (zn);
 200  200  }
 201  201  
 202  202  zap_name_t *
 203  203  zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
 204  204  {
 205  205          zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
 206  206  
 207  207          ASSERT(zap->zap_normflags == 0);
 208  208          zn->zn_zap = zap;
 209  209          zn->zn_key_intlen = sizeof (*key);
 210  210          zn->zn_key_orig = zn->zn_key_norm = key;
 211  211          zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
 212  212          zn->zn_matchtype = MT_EXACT;
 213  213  
 214  214          zn->zn_hash = zap_hash(zn);
 215  215          return (zn);
 216  216  }
 217  217  
 218  218  static void
 219  219  mzap_byteswap(mzap_phys_t *buf, size_t size)
 220  220  {
 221  221          int i, max;
 222  222          buf->mz_block_type = BSWAP_64(buf->mz_block_type);
 223  223          buf->mz_salt = BSWAP_64(buf->mz_salt);
 224  224          buf->mz_normflags = BSWAP_64(buf->mz_normflags);
 225  225          max = (size / MZAP_ENT_LEN) - 1;
 226  226          for (i = 0; i < max; i++) {
 227  227                  buf->mz_chunk[i].mze_value =
 228  228                      BSWAP_64(buf->mz_chunk[i].mze_value);
 229  229                  buf->mz_chunk[i].mze_cd =
 230  230                      BSWAP_32(buf->mz_chunk[i].mze_cd);
 231  231          }
 232  232  }
 233  233  
 234  234  void
 235  235  zap_byteswap(void *buf, size_t size)
 236  236  {
 237  237          uint64_t block_type;
 238  238  
 239  239          block_type = *(uint64_t *)buf;
 240  240  
 241  241          if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
 242  242                  /* ASSERT(magic == ZAP_LEAF_MAGIC); */
 243  243                  mzap_byteswap(buf, size);
 244  244          } else {
 245  245                  fzap_byteswap(buf, size);
 246  246          }
 247  247  }
 248  248  
 249  249  static int
 250  250  mze_compare(const void *arg1, const void *arg2)
 251  251  {
 252  252          const mzap_ent_t *mze1 = arg1;
 253  253          const mzap_ent_t *mze2 = arg2;
 254  254  
 255  255          if (mze1->mze_hash > mze2->mze_hash)
 256  256                  return (+1);
 257  257          if (mze1->mze_hash < mze2->mze_hash)
 258  258                  return (-1);
 259  259          if (mze1->mze_cd > mze2->mze_cd)
 260  260                  return (+1);
 261  261          if (mze1->mze_cd < mze2->mze_cd)
 262  262                  return (-1);
 263  263          return (0);
 264  264  }
 265  265  
 266  266  static void
 267  267  mze_insert(zap_t *zap, int chunkid, uint64_t hash)
 268  268  {
 269  269          mzap_ent_t *mze;
 270  270  
 271  271          ASSERT(zap->zap_ismicro);
 272  272          ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 273  273  
 274  274          mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
 275  275          mze->mze_chunkid = chunkid;
 276  276          mze->mze_hash = hash;
 277  277          mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
 278  278          ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
 279  279          avl_add(&zap->zap_m.zap_avl, mze);
 280  280  }
 281  281  
 282  282  static mzap_ent_t *
 283  283  mze_find(zap_name_t *zn)
 284  284  {
 285  285          mzap_ent_t mze_tofind;
 286  286          mzap_ent_t *mze;
 287  287          avl_index_t idx;
 288  288          avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
 289  289  
 290  290          ASSERT(zn->zn_zap->zap_ismicro);
 291  291          ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
 292  292  
 293  293          mze_tofind.mze_hash = zn->zn_hash;
 294  294          mze_tofind.mze_cd = 0;
 295  295  
 296  296  again:
 297  297          mze = avl_find(avl, &mze_tofind, &idx);
 298  298          if (mze == NULL)
 299  299                  mze = avl_nearest(avl, idx, AVL_AFTER);
 300  300          for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
 301  301                  ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
 302  302                  if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
 303  303                          return (mze);
 304  304          }
 305  305          if (zn->zn_matchtype == MT_BEST) {
 306  306                  zn->zn_matchtype = MT_FIRST;
 307  307                  goto again;
 308  308          }
 309  309          return (NULL);
 310  310  }
 311  311  
 312  312  static uint32_t
 313  313  mze_find_unused_cd(zap_t *zap, uint64_t hash)
 314  314  {
 315  315          mzap_ent_t mze_tofind;
 316  316          mzap_ent_t *mze;
 317  317          avl_index_t idx;
 318  318          avl_tree_t *avl = &zap->zap_m.zap_avl;
 319  319          uint32_t cd;
 320  320  
 321  321          ASSERT(zap->zap_ismicro);
 322  322          ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 323  323  
 324  324          mze_tofind.mze_hash = hash;
 325  325          mze_tofind.mze_cd = 0;
 326  326  
 327  327          cd = 0;
 328  328          for (mze = avl_find(avl, &mze_tofind, &idx);
 329  329              mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
 330  330                  if (mze->mze_cd != cd)
 331  331                          break;
 332  332                  cd++;
 333  333          }
 334  334  
 335  335          return (cd);
 336  336  }
 337  337  
 338  338  static void
 339  339  mze_remove(zap_t *zap, mzap_ent_t *mze)
 340  340  {
 341  341          ASSERT(zap->zap_ismicro);
 342  342          ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 343  343  
 344  344          avl_remove(&zap->zap_m.zap_avl, mze);
 345  345          kmem_free(mze, sizeof (mzap_ent_t));
 346  346  }
 347  347  
 348  348  static void
 349  349  mze_destroy(zap_t *zap)
 350  350  {
 351  351          mzap_ent_t *mze;
 352  352          void *avlcookie = NULL;
 353  353  
 354  354          while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
 355  355                  kmem_free(mze, sizeof (mzap_ent_t));
 356  356          avl_destroy(&zap->zap_m.zap_avl);
 357  357  }
 358  358  
 359  359  static zap_t *
 360  360  mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
 361  361  {
 362  362          zap_t *winner;
 363  363          zap_t *zap;
 364  364          int i;
 365  365  
 366  366          ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
 367  367  
 368  368          zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
 369  369          rw_init(&zap->zap_rwlock, 0, 0, 0);
 370  370          rw_enter(&zap->zap_rwlock, RW_WRITER);
 371  371          zap->zap_objset = os;
 372  372          zap->zap_object = obj;
 373  373          zap->zap_dbuf = db;
 374  374  
 375  375          if (*(uint64_t *)db->db_data != ZBT_MICRO) {
 376  376                  mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
 377  377                  zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
 378  378          } else {
 379  379                  zap->zap_ismicro = TRUE;
 380  380          }
 381  381  
 382  382          /*
 383  383           * Make sure that zap_ismicro is set before we let others see
 384  384           * it, because zap_lockdir() checks zap_ismicro without the lock
 385  385           * held.
 386  386           */
 387  387          winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
 388  388  
 389  389          if (winner != NULL) {
 390  390                  rw_exit(&zap->zap_rwlock);
 391  391                  rw_destroy(&zap->zap_rwlock);
 392  392                  if (!zap->zap_ismicro)
 393  393                          mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 394  394                  kmem_free(zap, sizeof (zap_t));
 395  395                  return (winner);
 396  396          }
 397  397  
 398  398          if (zap->zap_ismicro) {
 399  399                  zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
 400  400                  zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags;
 401  401                  zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
 402  402                  avl_create(&zap->zap_m.zap_avl, mze_compare,
 403  403                      sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 404  404  
 405  405                  for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 406  406                          mzap_ent_phys_t *mze =
 407  407                              &zap->zap_m.zap_phys->mz_chunk[i];
 408  408                          if (mze->mze_name[0]) {
 409  409                                  zap_name_t *zn;
 410  410  
 411  411                                  zap->zap_m.zap_num_entries++;
 412  412                                  zn = zap_name_alloc(zap, mze->mze_name,
 413  413                                      MT_EXACT);
 414  414                                  mze_insert(zap, i, zn->zn_hash);
 415  415                                  zap_name_free(zn);
 416  416                          }
 417  417                  }
 418  418          } else {
 419  419                  zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
 420  420                  zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags;
 421  421  
 422  422                  ASSERT3U(sizeof (struct zap_leaf_header), ==,
 423  423                      2*ZAP_LEAF_CHUNKSIZE);
 424  424  
 425  425                  /*
 426  426                   * The embedded pointer table should not overlap the
 427  427                   * other members.
 428  428                   */
 429  429                  ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
 430  430                      &zap->zap_f.zap_phys->zap_salt);
 431  431  
 432  432                  /*
 433  433                   * The embedded pointer table should end at the end of
 434  434                   * the block
 435  435                   */
 436  436                  ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
 437  437                      1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
 438  438                      (uintptr_t)zap->zap_f.zap_phys, ==,
 439  439                      zap->zap_dbuf->db_size);
 440  440          }
 441  441          rw_exit(&zap->zap_rwlock);
 442  442          return (zap);
 443  443  }
 444  444  
 445  445  int
 446  446  zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 447  447      krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
 448  448  {
 449  449          zap_t *zap;
 450  450          dmu_buf_t *db;
 451  451          krw_t lt;
 452  452          int err;
 453  453  
  
    | 
      ↓ open down ↓ | 
    420 lines elided | 
    
      ↑ open up ↑ | 
  
 454  454          *zapp = NULL;
 455  455  
 456  456          err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
 457  457          if (err)
 458  458                  return (err);
 459  459  
 460  460  #ifdef ZFS_DEBUG
 461  461          {
 462  462                  dmu_object_info_t doi;
 463  463                  dmu_object_info_from_db(db, &doi);
 464      -                ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
      464 +                ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 465  465          }
 466  466  #endif
 467  467  
 468  468          zap = dmu_buf_get_user(db);
 469  469          if (zap == NULL)
 470  470                  zap = mzap_open(os, obj, db);
 471  471  
 472  472          /*
 473  473           * We're checking zap_ismicro without the lock held, in order to
 474  474           * tell what type of lock we want.  Once we have some sort of
 475  475           * lock, see if it really is the right type.  In practice this
 476  476           * can only be different if it was upgraded from micro to fat,
 477  477           * and micro wanted WRITER but fat only needs READER.
 478  478           */
 479  479          lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
 480  480          rw_enter(&zap->zap_rwlock, lt);
 481  481          if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
 482  482                  /* it was upgraded, now we only need reader */
 483  483                  ASSERT(lt == RW_WRITER);
 484  484                  ASSERT(RW_READER ==
 485  485                      (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
 486  486                  rw_downgrade(&zap->zap_rwlock);
 487  487                  lt = RW_READER;
 488  488          }
 489  489  
 490  490          zap->zap_objset = os;
 491  491  
 492  492          if (lt == RW_WRITER)
 493  493                  dmu_buf_will_dirty(db, tx);
 494  494  
 495  495          ASSERT3P(zap->zap_dbuf, ==, db);
 496  496  
 497  497          ASSERT(!zap->zap_ismicro ||
 498  498              zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
 499  499          if (zap->zap_ismicro && tx && adding &&
 500  500              zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
 501  501                  uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
 502  502                  if (newsz > MZAP_MAX_BLKSZ) {
 503  503                          dprintf("upgrading obj %llu: num_entries=%u\n",
 504  504                              obj, zap->zap_m.zap_num_entries);
 505  505                          *zapp = zap;
 506  506                          return (mzap_upgrade(zapp, tx, 0));
 507  507                  }
 508  508                  err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
 509  509                  ASSERT3U(err, ==, 0);
 510  510                  zap->zap_m.zap_num_chunks =
 511  511                      db->db_size / MZAP_ENT_LEN - 1;
 512  512          }
 513  513  
 514  514          *zapp = zap;
 515  515          return (0);
 516  516  }
 517  517  
 518  518  void
 519  519  zap_unlockdir(zap_t *zap)
 520  520  {
 521  521          rw_exit(&zap->zap_rwlock);
 522  522          dmu_buf_rele(zap->zap_dbuf, NULL);
 523  523  }
 524  524  
 525  525  static int
 526  526  mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
 527  527  {
 528  528          mzap_phys_t *mzp;
 529  529          int i, sz, nchunks;
 530  530          int err = 0;
 531  531          zap_t *zap = *zapp;
 532  532  
 533  533          ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 534  534  
 535  535          sz = zap->zap_dbuf->db_size;
 536  536          mzp = kmem_alloc(sz, KM_SLEEP);
 537  537          bcopy(zap->zap_dbuf->db_data, mzp, sz);
 538  538          nchunks = zap->zap_m.zap_num_chunks;
 539  539  
 540  540          if (!flags) {
 541  541                  err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
 542  542                      1ULL << fzap_default_block_shift, 0, tx);
 543  543                  if (err) {
 544  544                          kmem_free(mzp, sz);
 545  545                          return (err);
 546  546                  }
 547  547          }
 548  548  
 549  549          dprintf("upgrading obj=%llu with %u chunks\n",
 550  550              zap->zap_object, nchunks);
 551  551          /* XXX destroy the avl later, so we can use the stored hash value */
 552  552          mze_destroy(zap);
 553  553  
 554  554          fzap_upgrade(zap, tx, flags);
 555  555  
 556  556          for (i = 0; i < nchunks; i++) {
 557  557                  mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
 558  558                  zap_name_t *zn;
 559  559                  if (mze->mze_name[0] == 0)
 560  560                          continue;
 561  561                  dprintf("adding %s=%llu\n",
 562  562                      mze->mze_name, mze->mze_value);
 563  563                  zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
 564  564                  err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx);
 565  565                  zap = zn->zn_zap;       /* fzap_add_cd() may change zap */
 566  566                  zap_name_free(zn);
 567  567                  if (err)
 568  568                          break;
 569  569          }
 570  570          kmem_free(mzp, sz);
 571  571          *zapp = zap;
 572  572          return (err);
 573  573  }
 574  574  
 575  575  static void
 576  576  mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
 577  577      dmu_tx_t *tx)
  
    | 
      ↓ open down ↓ | 
    103 lines elided | 
    
      ↑ open up ↑ | 
  
 578  578  {
 579  579          dmu_buf_t *db;
 580  580          mzap_phys_t *zp;
 581  581  
 582  582          VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
 583  583  
 584  584  #ifdef ZFS_DEBUG
 585  585          {
 586  586                  dmu_object_info_t doi;
 587  587                  dmu_object_info_from_db(db, &doi);
 588      -                ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
      588 +                ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 589  589          }
 590  590  #endif
 591  591  
 592  592          dmu_buf_will_dirty(db, tx);
 593  593          zp = db->db_data;
 594  594          zp->mz_block_type = ZBT_MICRO;
 595  595          zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
 596  596          zp->mz_normflags = normflags;
 597  597          dmu_buf_rele(db, FTAG);
 598  598  
 599  599          if (flags != 0) {
 600  600                  zap_t *zap;
 601  601                  /* Only fat zap supports flags; upgrade immediately. */
 602  602                  VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
 603  603                      B_FALSE, B_FALSE, &zap));
 604  604                  VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags));
 605  605                  zap_unlockdir(zap);
 606  606          }
 607  607  }
 608  608  
 609  609  int
 610  610  zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
 611  611      dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 612  612  {
 613  613          return (zap_create_claim_norm(os, obj,
 614  614              0, ot, bonustype, bonuslen, tx));
 615  615  }
 616  616  
 617  617  int
 618  618  zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
 619  619      dmu_object_type_t ot,
 620  620      dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 621  621  {
 622  622          int err;
 623  623  
 624  624          err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
 625  625          if (err != 0)
 626  626                  return (err);
 627  627          mzap_create_impl(os, obj, normflags, 0, tx);
 628  628          return (0);
 629  629  }
 630  630  
 631  631  uint64_t
 632  632  zap_create(objset_t *os, dmu_object_type_t ot,
 633  633      dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 634  634  {
 635  635          return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
 636  636  }
 637  637  
 638  638  uint64_t
 639  639  zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
 640  640      dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 641  641  {
 642  642          uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
 643  643  
 644  644          mzap_create_impl(os, obj, normflags, 0, tx);
 645  645          return (obj);
 646  646  }
 647  647  
 648  648  uint64_t
 649  649  zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
 650  650      dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
 651  651      dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 652  652  {
 653  653          uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
 654  654  
 655  655          ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
 656  656              leaf_blockshift <= SPA_MAXBLOCKSHIFT &&
 657  657              indirect_blockshift >= SPA_MINBLOCKSHIFT &&
 658  658              indirect_blockshift <= SPA_MAXBLOCKSHIFT);
 659  659  
 660  660          VERIFY(dmu_object_set_blocksize(os, obj,
 661  661              1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
 662  662  
 663  663          mzap_create_impl(os, obj, normflags, flags, tx);
 664  664          return (obj);
 665  665  }
 666  666  
 667  667  int
 668  668  zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
 669  669  {
 670  670          /*
 671  671           * dmu_object_free will free the object number and free the
 672  672           * data.  Freeing the data will cause our pageout function to be
 673  673           * called, which will destroy our data (zap_leaf_t's and zap_t).
 674  674           */
 675  675  
 676  676          return (dmu_object_free(os, zapobj, tx));
 677  677  }
 678  678  
 679  679  _NOTE(ARGSUSED(0))
 680  680  void
 681  681  zap_evict(dmu_buf_t *db, void *vzap)
 682  682  {
 683  683          zap_t *zap = vzap;
 684  684  
 685  685          rw_destroy(&zap->zap_rwlock);
 686  686  
 687  687          if (zap->zap_ismicro)
 688  688                  mze_destroy(zap);
 689  689          else
 690  690                  mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 691  691  
 692  692          kmem_free(zap, sizeof (zap_t));
 693  693  }
 694  694  
 695  695  int
 696  696  zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
 697  697  {
 698  698          zap_t *zap;
 699  699          int err;
 700  700  
 701  701          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 702  702          if (err)
 703  703                  return (err);
 704  704          if (!zap->zap_ismicro) {
 705  705                  err = fzap_count(zap, count);
 706  706          } else {
 707  707                  *count = zap->zap_m.zap_num_entries;
 708  708          }
 709  709          zap_unlockdir(zap);
 710  710          return (err);
 711  711  }
 712  712  
 713  713  /*
 714  714   * zn may be NULL; if not specified, it will be computed if needed.
 715  715   * See also the comment above zap_entry_normalization_conflict().
 716  716   */
 717  717  static boolean_t
 718  718  mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
 719  719  {
 720  720          mzap_ent_t *other;
 721  721          int direction = AVL_BEFORE;
 722  722          boolean_t allocdzn = B_FALSE;
 723  723  
 724  724          if (zap->zap_normflags == 0)
 725  725                  return (B_FALSE);
 726  726  
 727  727  again:
 728  728          for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
 729  729              other && other->mze_hash == mze->mze_hash;
 730  730              other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
 731  731  
 732  732                  if (zn == NULL) {
 733  733                          zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
 734  734                              MT_FIRST);
 735  735                          allocdzn = B_TRUE;
 736  736                  }
 737  737                  if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
 738  738                          if (allocdzn)
 739  739                                  zap_name_free(zn);
 740  740                          return (B_TRUE);
 741  741                  }
 742  742          }
 743  743  
 744  744          if (direction == AVL_BEFORE) {
 745  745                  direction = AVL_AFTER;
 746  746                  goto again;
 747  747          }
 748  748  
 749  749          if (allocdzn)
 750  750                  zap_name_free(zn);
 751  751          return (B_FALSE);
 752  752  }
 753  753  
 754  754  /*
 755  755   * Routines for manipulating attributes.
 756  756   */
 757  757  
 758  758  int
 759  759  zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
 760  760      uint64_t integer_size, uint64_t num_integers, void *buf)
 761  761  {
 762  762          return (zap_lookup_norm(os, zapobj, name, integer_size,
 763  763              num_integers, buf, MT_EXACT, NULL, 0, NULL));
 764  764  }
 765  765  
 766  766  int
 767  767  zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
 768  768      uint64_t integer_size, uint64_t num_integers, void *buf,
 769  769      matchtype_t mt, char *realname, int rn_len,
 770  770      boolean_t *ncp)
 771  771  {
 772  772          zap_t *zap;
 773  773          int err;
 774  774          mzap_ent_t *mze;
 775  775          zap_name_t *zn;
 776  776  
 777  777          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 778  778          if (err)
 779  779                  return (err);
 780  780          zn = zap_name_alloc(zap, name, mt);
 781  781          if (zn == NULL) {
 782  782                  zap_unlockdir(zap);
 783  783                  return (ENOTSUP);
 784  784          }
 785  785  
 786  786          if (!zap->zap_ismicro) {
 787  787                  err = fzap_lookup(zn, integer_size, num_integers, buf,
 788  788                      realname, rn_len, ncp);
 789  789          } else {
 790  790                  mze = mze_find(zn);
 791  791                  if (mze == NULL) {
 792  792                          err = ENOENT;
 793  793                  } else {
 794  794                          if (num_integers < 1) {
 795  795                                  err = EOVERFLOW;
 796  796                          } else if (integer_size != 8) {
 797  797                                  err = EINVAL;
 798  798                          } else {
 799  799                                  *(uint64_t *)buf =
 800  800                                      MZE_PHYS(zap, mze)->mze_value;
 801  801                                  (void) strlcpy(realname,
 802  802                                      MZE_PHYS(zap, mze)->mze_name, rn_len);
 803  803                                  if (ncp) {
 804  804                                          *ncp = mzap_normalization_conflict(zap,
 805  805                                              zn, mze);
 806  806                                  }
 807  807                          }
 808  808                  }
 809  809          }
 810  810          zap_name_free(zn);
 811  811          zap_unlockdir(zap);
 812  812          return (err);
 813  813  }
 814  814  
 815  815  int
 816  816  zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
 817  817      int key_numints)
 818  818  {
 819  819          zap_t *zap;
 820  820          int err;
 821  821          zap_name_t *zn;
 822  822  
 823  823          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 824  824          if (err)
 825  825                  return (err);
 826  826          zn = zap_name_alloc_uint64(zap, key, key_numints);
 827  827          if (zn == NULL) {
 828  828                  zap_unlockdir(zap);
 829  829                  return (ENOTSUP);
 830  830          }
 831  831  
 832  832          fzap_prefetch(zn);
 833  833          zap_name_free(zn);
 834  834          zap_unlockdir(zap);
 835  835          return (err);
 836  836  }
 837  837  
 838  838  int
 839  839  zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
 840  840      int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
 841  841  {
 842  842          zap_t *zap;
 843  843          int err;
 844  844          zap_name_t *zn;
 845  845  
 846  846          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 847  847          if (err)
 848  848                  return (err);
 849  849          zn = zap_name_alloc_uint64(zap, key, key_numints);
 850  850          if (zn == NULL) {
 851  851                  zap_unlockdir(zap);
 852  852                  return (ENOTSUP);
 853  853          }
 854  854  
 855  855          err = fzap_lookup(zn, integer_size, num_integers, buf,
 856  856              NULL, 0, NULL);
 857  857          zap_name_free(zn);
 858  858          zap_unlockdir(zap);
 859  859          return (err);
 860  860  }
 861  861  
 862  862  int
 863  863  zap_contains(objset_t *os, uint64_t zapobj, const char *name)
 864  864  {
 865  865          int err = (zap_lookup_norm(os, zapobj, name, 0,
 866  866              0, NULL, MT_EXACT, NULL, 0, NULL));
 867  867          if (err == EOVERFLOW || err == EINVAL)
 868  868                  err = 0; /* found, but skipped reading the value */
 869  869          return (err);
 870  870  }
 871  871  
 872  872  int
 873  873  zap_length(objset_t *os, uint64_t zapobj, const char *name,
 874  874      uint64_t *integer_size, uint64_t *num_integers)
 875  875  {
 876  876          zap_t *zap;
 877  877          int err;
 878  878          mzap_ent_t *mze;
 879  879          zap_name_t *zn;
 880  880  
 881  881          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 882  882          if (err)
 883  883                  return (err);
 884  884          zn = zap_name_alloc(zap, name, MT_EXACT);
 885  885          if (zn == NULL) {
 886  886                  zap_unlockdir(zap);
 887  887                  return (ENOTSUP);
 888  888          }
 889  889          if (!zap->zap_ismicro) {
 890  890                  err = fzap_length(zn, integer_size, num_integers);
 891  891          } else {
 892  892                  mze = mze_find(zn);
 893  893                  if (mze == NULL) {
 894  894                          err = ENOENT;
 895  895                  } else {
 896  896                          if (integer_size)
 897  897                                  *integer_size = 8;
 898  898                          if (num_integers)
 899  899                                  *num_integers = 1;
 900  900                  }
 901  901          }
 902  902          zap_name_free(zn);
 903  903          zap_unlockdir(zap);
 904  904          return (err);
 905  905  }
 906  906  
 907  907  int
 908  908  zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
 909  909      int key_numints, uint64_t *integer_size, uint64_t *num_integers)
 910  910  {
 911  911          zap_t *zap;
 912  912          int err;
 913  913          zap_name_t *zn;
 914  914  
 915  915          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
 916  916          if (err)
 917  917                  return (err);
 918  918          zn = zap_name_alloc_uint64(zap, key, key_numints);
 919  919          if (zn == NULL) {
 920  920                  zap_unlockdir(zap);
 921  921                  return (ENOTSUP);
 922  922          }
 923  923          err = fzap_length(zn, integer_size, num_integers);
 924  924          zap_name_free(zn);
 925  925          zap_unlockdir(zap);
 926  926          return (err);
 927  927  }
 928  928  
 929  929  static void
 930  930  mzap_addent(zap_name_t *zn, uint64_t value)
 931  931  {
 932  932          int i;
 933  933          zap_t *zap = zn->zn_zap;
 934  934          int start = zap->zap_m.zap_alloc_next;
 935  935          uint32_t cd;
 936  936  
 937  937          ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 938  938  
 939  939  #ifdef ZFS_DEBUG
 940  940          for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 941  941                  mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
 942  942                  ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
 943  943          }
 944  944  #endif
 945  945  
 946  946          cd = mze_find_unused_cd(zap, zn->zn_hash);
 947  947          /* given the limited size of the microzap, this can't happen */
 948  948          ASSERT(cd < zap_maxcd(zap));
 949  949  
 950  950  again:
 951  951          for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
 952  952                  mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
 953  953                  if (mze->mze_name[0] == 0) {
 954  954                          mze->mze_value = value;
 955  955                          mze->mze_cd = cd;
 956  956                          (void) strcpy(mze->mze_name, zn->zn_key_orig);
 957  957                          zap->zap_m.zap_num_entries++;
 958  958                          zap->zap_m.zap_alloc_next = i+1;
 959  959                          if (zap->zap_m.zap_alloc_next ==
 960  960                              zap->zap_m.zap_num_chunks)
 961  961                                  zap->zap_m.zap_alloc_next = 0;
 962  962                          mze_insert(zap, i, zn->zn_hash);
 963  963                          return;
 964  964                  }
 965  965          }
 966  966          if (start != 0) {
 967  967                  start = 0;
 968  968                  goto again;
 969  969          }
 970  970          ASSERT(!"out of entries!");
 971  971  }
 972  972  
 973  973  int
 974  974  zap_add(objset_t *os, uint64_t zapobj, const char *key,
 975  975      int integer_size, uint64_t num_integers,
 976  976      const void *val, dmu_tx_t *tx)
 977  977  {
 978  978          zap_t *zap;
 979  979          int err;
 980  980          mzap_ent_t *mze;
 981  981          const uint64_t *intval = val;
 982  982          zap_name_t *zn;
 983  983  
 984  984          err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
 985  985          if (err)
 986  986                  return (err);
 987  987          zn = zap_name_alloc(zap, key, MT_EXACT);
 988  988          if (zn == NULL) {
 989  989                  zap_unlockdir(zap);
 990  990                  return (ENOTSUP);
 991  991          }
 992  992          if (!zap->zap_ismicro) {
 993  993                  err = fzap_add(zn, integer_size, num_integers, val, tx);
 994  994                  zap = zn->zn_zap;       /* fzap_add() may change zap */
 995  995          } else if (integer_size != 8 || num_integers != 1 ||
 996  996              strlen(key) >= MZAP_NAME_LEN) {
 997  997                  err = mzap_upgrade(&zn->zn_zap, tx, 0);
 998  998                  if (err == 0)
 999  999                          err = fzap_add(zn, integer_size, num_integers, val, tx);
1000 1000                  zap = zn->zn_zap;       /* fzap_add() may change zap */
1001 1001          } else {
1002 1002                  mze = mze_find(zn);
1003 1003                  if (mze != NULL) {
1004 1004                          err = EEXIST;
1005 1005                  } else {
1006 1006                          mzap_addent(zn, *intval);
1007 1007                  }
1008 1008          }
1009 1009          ASSERT(zap == zn->zn_zap);
1010 1010          zap_name_free(zn);
1011 1011          if (zap != NULL)        /* may be NULL if fzap_add() failed */
1012 1012                  zap_unlockdir(zap);
1013 1013          return (err);
1014 1014  }
1015 1015  
1016 1016  int
1017 1017  zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1018 1018      int key_numints, int integer_size, uint64_t num_integers,
1019 1019      const void *val, dmu_tx_t *tx)
1020 1020  {
1021 1021          zap_t *zap;
1022 1022          int err;
1023 1023          zap_name_t *zn;
1024 1024  
1025 1025          err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
1026 1026          if (err)
1027 1027                  return (err);
1028 1028          zn = zap_name_alloc_uint64(zap, key, key_numints);
1029 1029          if (zn == NULL) {
1030 1030                  zap_unlockdir(zap);
1031 1031                  return (ENOTSUP);
1032 1032          }
1033 1033          err = fzap_add(zn, integer_size, num_integers, val, tx);
1034 1034          zap = zn->zn_zap;       /* fzap_add() may change zap */
1035 1035          zap_name_free(zn);
1036 1036          if (zap != NULL)        /* may be NULL if fzap_add() failed */
1037 1037                  zap_unlockdir(zap);
1038 1038          return (err);
1039 1039  }
1040 1040  
1041 1041  int
1042 1042  zap_update(objset_t *os, uint64_t zapobj, const char *name,
1043 1043      int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1044 1044  {
1045 1045          zap_t *zap;
1046 1046          mzap_ent_t *mze;
1047 1047          uint64_t oldval;
1048 1048          const uint64_t *intval = val;
1049 1049          zap_name_t *zn;
1050 1050          int err;
1051 1051  
1052 1052  #ifdef ZFS_DEBUG
1053 1053          /*
1054 1054           * If there is an old value, it shouldn't change across the
1055 1055           * lockdir (eg, due to bprewrite's xlation).
1056 1056           */
1057 1057          if (integer_size == 8 && num_integers == 1)
1058 1058                  (void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
1059 1059  #endif
1060 1060  
1061 1061          err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
1062 1062          if (err)
1063 1063                  return (err);
1064 1064          zn = zap_name_alloc(zap, name, MT_EXACT);
1065 1065          if (zn == NULL) {
1066 1066                  zap_unlockdir(zap);
1067 1067                  return (ENOTSUP);
1068 1068          }
1069 1069          if (!zap->zap_ismicro) {
1070 1070                  err = fzap_update(zn, integer_size, num_integers, val, tx);
1071 1071                  zap = zn->zn_zap;       /* fzap_update() may change zap */
1072 1072          } else if (integer_size != 8 || num_integers != 1 ||
1073 1073              strlen(name) >= MZAP_NAME_LEN) {
1074 1074                  dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
1075 1075                      zapobj, integer_size, num_integers, name);
1076 1076                  err = mzap_upgrade(&zn->zn_zap, tx, 0);
1077 1077                  if (err == 0)
1078 1078                          err = fzap_update(zn, integer_size, num_integers,
1079 1079                              val, tx);
1080 1080                  zap = zn->zn_zap;       /* fzap_update() may change zap */
1081 1081          } else {
1082 1082                  mze = mze_find(zn);
1083 1083                  if (mze != NULL) {
1084 1084                          ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
1085 1085                          MZE_PHYS(zap, mze)->mze_value = *intval;
1086 1086                  } else {
1087 1087                          mzap_addent(zn, *intval);
1088 1088                  }
1089 1089          }
1090 1090          ASSERT(zap == zn->zn_zap);
1091 1091          zap_name_free(zn);
1092 1092          if (zap != NULL)        /* may be NULL if fzap_upgrade() failed */
1093 1093                  zap_unlockdir(zap);
1094 1094          return (err);
1095 1095  }
1096 1096  
1097 1097  int
1098 1098  zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1099 1099      int key_numints,
1100 1100      int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1101 1101  {
1102 1102          zap_t *zap;
1103 1103          zap_name_t *zn;
1104 1104          int err;
1105 1105  
1106 1106          err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
1107 1107          if (err)
1108 1108                  return (err);
1109 1109          zn = zap_name_alloc_uint64(zap, key, key_numints);
1110 1110          if (zn == NULL) {
1111 1111                  zap_unlockdir(zap);
1112 1112                  return (ENOTSUP);
1113 1113          }
1114 1114          err = fzap_update(zn, integer_size, num_integers, val, tx);
1115 1115          zap = zn->zn_zap;       /* fzap_update() may change zap */
1116 1116          zap_name_free(zn);
1117 1117          if (zap != NULL)        /* may be NULL if fzap_upgrade() failed */
1118 1118                  zap_unlockdir(zap);
1119 1119          return (err);
1120 1120  }
1121 1121  
1122 1122  int
1123 1123  zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
1124 1124  {
1125 1125          return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
1126 1126  }
1127 1127  
1128 1128  int
1129 1129  zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
1130 1130      matchtype_t mt, dmu_tx_t *tx)
1131 1131  {
1132 1132          zap_t *zap;
1133 1133          int err;
1134 1134          mzap_ent_t *mze;
1135 1135          zap_name_t *zn;
1136 1136  
1137 1137          err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
1138 1138          if (err)
1139 1139                  return (err);
1140 1140          zn = zap_name_alloc(zap, name, mt);
1141 1141          if (zn == NULL) {
1142 1142                  zap_unlockdir(zap);
1143 1143                  return (ENOTSUP);
1144 1144          }
1145 1145          if (!zap->zap_ismicro) {
1146 1146                  err = fzap_remove(zn, tx);
1147 1147          } else {
1148 1148                  mze = mze_find(zn);
1149 1149                  if (mze == NULL) {
1150 1150                          err = ENOENT;
1151 1151                  } else {
1152 1152                          zap->zap_m.zap_num_entries--;
1153 1153                          bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
1154 1154                              sizeof (mzap_ent_phys_t));
1155 1155                          mze_remove(zap, mze);
1156 1156                  }
1157 1157          }
1158 1158          zap_name_free(zn);
1159 1159          zap_unlockdir(zap);
1160 1160          return (err);
1161 1161  }
1162 1162  
1163 1163  int
1164 1164  zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1165 1165      int key_numints, dmu_tx_t *tx)
1166 1166  {
1167 1167          zap_t *zap;
1168 1168          int err;
1169 1169          zap_name_t *zn;
1170 1170  
1171 1171          err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
1172 1172          if (err)
1173 1173                  return (err);
1174 1174          zn = zap_name_alloc_uint64(zap, key, key_numints);
1175 1175          if (zn == NULL) {
1176 1176                  zap_unlockdir(zap);
1177 1177                  return (ENOTSUP);
1178 1178          }
1179 1179          err = fzap_remove(zn, tx);
1180 1180          zap_name_free(zn);
1181 1181          zap_unlockdir(zap);
1182 1182          return (err);
1183 1183  }
1184 1184  
1185 1185  /*
1186 1186   * Routines for iterating over the attributes.
1187 1187   */
1188 1188  
1189 1189  void
1190 1190  zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
1191 1191      uint64_t serialized)
1192 1192  {
1193 1193          zc->zc_objset = os;
1194 1194          zc->zc_zap = NULL;
1195 1195          zc->zc_leaf = NULL;
1196 1196          zc->zc_zapobj = zapobj;
1197 1197          zc->zc_serialized = serialized;
1198 1198          zc->zc_hash = 0;
1199 1199          zc->zc_cd = 0;
1200 1200  }
1201 1201  
1202 1202  void
1203 1203  zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
1204 1204  {
1205 1205          zap_cursor_init_serialized(zc, os, zapobj, 0);
1206 1206  }
1207 1207  
1208 1208  void
1209 1209  zap_cursor_fini(zap_cursor_t *zc)
1210 1210  {
1211 1211          if (zc->zc_zap) {
1212 1212                  rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1213 1213                  zap_unlockdir(zc->zc_zap);
1214 1214                  zc->zc_zap = NULL;
1215 1215          }
1216 1216          if (zc->zc_leaf) {
1217 1217                  rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1218 1218                  zap_put_leaf(zc->zc_leaf);
1219 1219                  zc->zc_leaf = NULL;
1220 1220          }
1221 1221          zc->zc_objset = NULL;
1222 1222  }
1223 1223  
1224 1224  uint64_t
1225 1225  zap_cursor_serialize(zap_cursor_t *zc)
1226 1226  {
1227 1227          if (zc->zc_hash == -1ULL)
1228 1228                  return (-1ULL);
1229 1229          if (zc->zc_zap == NULL)
1230 1230                  return (zc->zc_serialized);
1231 1231          ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
1232 1232          ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
1233 1233  
1234 1234          /*
1235 1235           * We want to keep the high 32 bits of the cursor zero if we can, so
1236 1236           * that 32-bit programs can access this.  So usually use a small
1237 1237           * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
1238 1238           * of the cursor.
1239 1239           *
1240 1240           * [ collision differentiator | zap_hashbits()-bit hash value ]
1241 1241           */
1242 1242          return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
1243 1243              ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
1244 1244  }
1245 1245  
1246 1246  int
1247 1247  zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
1248 1248  {
1249 1249          int err;
1250 1250          avl_index_t idx;
1251 1251          mzap_ent_t mze_tofind;
1252 1252          mzap_ent_t *mze;
1253 1253  
1254 1254          if (zc->zc_hash == -1ULL)
1255 1255                  return (ENOENT);
1256 1256  
1257 1257          if (zc->zc_zap == NULL) {
1258 1258                  int hb;
1259 1259                  err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1260 1260                      RW_READER, TRUE, FALSE, &zc->zc_zap);
1261 1261                  if (err)
1262 1262                          return (err);
1263 1263  
1264 1264                  /*
1265 1265                   * To support zap_cursor_init_serialized, advance, retrieve,
1266 1266                   * we must add to the existing zc_cd, which may already
1267 1267                   * be 1 due to the zap_cursor_advance.
1268 1268                   */
1269 1269                  ASSERT(zc->zc_hash == 0);
1270 1270                  hb = zap_hashbits(zc->zc_zap);
1271 1271                  zc->zc_hash = zc->zc_serialized << (64 - hb);
1272 1272                  zc->zc_cd += zc->zc_serialized >> hb;
1273 1273                  if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
1274 1274                          zc->zc_cd = 0;
1275 1275          } else {
1276 1276                  rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1277 1277          }
1278 1278          if (!zc->zc_zap->zap_ismicro) {
1279 1279                  err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
1280 1280          } else {
1281 1281                  err = ENOENT;
1282 1282  
1283 1283                  mze_tofind.mze_hash = zc->zc_hash;
1284 1284                  mze_tofind.mze_cd = zc->zc_cd;
1285 1285  
1286 1286                  mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
1287 1287                  if (mze == NULL) {
1288 1288                          mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
1289 1289                              idx, AVL_AFTER);
1290 1290                  }
1291 1291                  if (mze) {
1292 1292                          mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
1293 1293                          ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
1294 1294                          za->za_normalization_conflict =
1295 1295                              mzap_normalization_conflict(zc->zc_zap, NULL, mze);
1296 1296                          za->za_integer_length = 8;
1297 1297                          za->za_num_integers = 1;
1298 1298                          za->za_first_integer = mzep->mze_value;
1299 1299                          (void) strcpy(za->za_name, mzep->mze_name);
1300 1300                          zc->zc_hash = mze->mze_hash;
1301 1301                          zc->zc_cd = mze->mze_cd;
1302 1302                          err = 0;
1303 1303                  } else {
1304 1304                          zc->zc_hash = -1ULL;
1305 1305                  }
1306 1306          }
1307 1307          rw_exit(&zc->zc_zap->zap_rwlock);
1308 1308          return (err);
1309 1309  }
1310 1310  
1311 1311  void
1312 1312  zap_cursor_advance(zap_cursor_t *zc)
1313 1313  {
1314 1314          if (zc->zc_hash == -1ULL)
1315 1315                  return;
1316 1316          zc->zc_cd++;
1317 1317  }
1318 1318  
1319 1319  int
1320 1320  zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
1321 1321  {
1322 1322          int err = 0;
1323 1323          mzap_ent_t *mze;
1324 1324          zap_name_t *zn;
1325 1325  
1326 1326          if (zc->zc_zap == NULL) {
1327 1327                  err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1328 1328                      RW_READER, TRUE, FALSE, &zc->zc_zap);
1329 1329                  if (err)
1330 1330                          return (err);
1331 1331          } else {
1332 1332                  rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1333 1333          }
1334 1334  
1335 1335          zn = zap_name_alloc(zc->zc_zap, name, mt);
1336 1336          if (zn == NULL) {
1337 1337                  rw_exit(&zc->zc_zap->zap_rwlock);
1338 1338                  return (ENOTSUP);
1339 1339          }
1340 1340  
1341 1341          if (!zc->zc_zap->zap_ismicro) {
1342 1342                  err = fzap_cursor_move_to_key(zc, zn);
1343 1343          } else {
1344 1344                  mze = mze_find(zn);
1345 1345                  if (mze == NULL) {
1346 1346                          err = ENOENT;
1347 1347                          goto out;
1348 1348                  }
1349 1349                  zc->zc_hash = mze->mze_hash;
1350 1350                  zc->zc_cd = mze->mze_cd;
1351 1351          }
1352 1352  
1353 1353  out:
1354 1354          zap_name_free(zn);
1355 1355          rw_exit(&zc->zc_zap->zap_rwlock);
1356 1356          return (err);
1357 1357  }
1358 1358  
1359 1359  int
1360 1360  zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
1361 1361  {
1362 1362          int err;
1363 1363          zap_t *zap;
1364 1364  
1365 1365          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
1366 1366          if (err)
1367 1367                  return (err);
1368 1368  
1369 1369          bzero(zs, sizeof (zap_stats_t));
1370 1370  
1371 1371          if (zap->zap_ismicro) {
1372 1372                  zs->zs_blocksize = zap->zap_dbuf->db_size;
1373 1373                  zs->zs_num_entries = zap->zap_m.zap_num_entries;
1374 1374                  zs->zs_num_blocks = 1;
1375 1375          } else {
1376 1376                  fzap_get_stats(zap, zs);
1377 1377          }
1378 1378          zap_unlockdir(zap);
1379 1379          return (0);
1380 1380  }
1381 1381  
1382 1382  int
1383 1383  zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
1384 1384      uint64_t *towrite, uint64_t *tooverwrite)
1385 1385  {
1386 1386          zap_t *zap;
1387 1387          int err = 0;
1388 1388  
1389 1389  
1390 1390          /*
1391 1391           * Since, we don't have a name, we cannot figure out which blocks will
1392 1392           * be affected in this operation. So, account for the worst case :
1393 1393           * - 3 blocks overwritten: target leaf, ptrtbl block, header block
1394 1394           * - 4 new blocks written if adding:
1395 1395           *      - 2 blocks for possibly split leaves,
1396 1396           *      - 2 grown ptrtbl blocks
1397 1397           *
1398 1398           * This also accomodates the case where an add operation to a fairly
1399 1399           * large microzap results in a promotion to fatzap.
1400 1400           */
1401 1401          if (name == NULL) {
1402 1402                  *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
1403 1403                  return (err);
1404 1404          }
1405 1405  
1406 1406          /*
1407 1407           * We lock the zap with adding == FALSE. Because, if we pass
1408 1408           * the actual value of add, it could trigger a mzap_upgrade().
1409 1409           * At present we are just evaluating the possibility of this operation
1410 1410           * and hence we donot want to trigger an upgrade.
1411 1411           */
1412 1412          err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
1413 1413          if (err)
1414 1414                  return (err);
1415 1415  
1416 1416          if (!zap->zap_ismicro) {
1417 1417                  zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
1418 1418                  if (zn) {
1419 1419                          err = fzap_count_write(zn, add, towrite,
1420 1420                              tooverwrite);
1421 1421                          zap_name_free(zn);
1422 1422                  } else {
1423 1423                          /*
1424 1424                           * We treat this case as similar to (name == NULL)
1425 1425                           */
1426 1426                          *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
1427 1427                  }
1428 1428          } else {
1429 1429                  /*
1430 1430                   * We are here if (name != NULL) and this is a micro-zap.
1431 1431                   * We account for the header block depending on whether it
1432 1432                   * is freeable.
1433 1433                   *
1434 1434                   * Incase of an add-operation it is hard to find out
1435 1435                   * if this add will promote this microzap to fatzap.
1436 1436                   * Hence, we consider the worst case and account for the
1437 1437                   * blocks assuming this microzap would be promoted to a
1438 1438                   * fatzap.
1439 1439                   *
1440 1440                   * 1 block overwritten  : header block
1441 1441                   * 4 new blocks written : 2 new split leaf, 2 grown
1442 1442                   *                      ptrtbl blocks
1443 1443                   */
1444 1444                  if (dmu_buf_freeable(zap->zap_dbuf))
1445 1445                          *tooverwrite += SPA_MAXBLOCKSIZE;
1446 1446                  else
1447 1447                          *towrite += SPA_MAXBLOCKSIZE;
1448 1448  
1449 1449                  if (add) {
1450 1450                          *towrite += 4 * SPA_MAXBLOCKSIZE;
1451 1451                  }
1452 1452          }
1453 1453  
1454 1454          zap_unlockdir(zap);
1455 1455          return (err);
1456 1456  }
  
    | 
      ↓ open down ↓ | 
    858 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX