3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/dmu.h>
  26 #include <sys/dmu_impl.h>
  27 #include <sys/dmu_tx.h>
  28 #include <sys/dbuf.h>
  29 #include <sys/dnode.h>
  30 #include <sys/zfs_context.h>
  31 #include <sys/dmu_objset.h>
  32 #include <sys/dmu_traverse.h>
  33 #include <sys/dsl_dataset.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/dsl_pool.h>
  36 #include <sys/dsl_synctask.h>
  37 #include <sys/dsl_prop.h>
  38 #include <sys/dmu_zfetch.h>
  39 #include <sys/zfs_ioctl.h>
  40 #include <sys/zap.h>
  41 #include <sys/zio_checksum.h>
  42 #include <sys/sa.h>
  43 #ifdef _KERNEL
  44 #include <sys/vmsystm.h>
  45 #include <sys/zfs_znode.h>
  46 #endif
  47 
  48 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
  49         {       byteswap_uint8_array,   TRUE,   "unallocated"           },
  50         {       zap_byteswap,           TRUE,   "object directory"      },
  51         {       byteswap_uint64_array,  TRUE,   "object array"          },
  52         {       byteswap_uint8_array,   TRUE,   "packed nvlist"         },
  53         {       byteswap_uint64_array,  TRUE,   "packed nvlist size"    },
  54         {       byteswap_uint64_array,  TRUE,   "bpobj"                 },
  55         {       byteswap_uint64_array,  TRUE,   "bpobj header"          },
  56         {       byteswap_uint64_array,  TRUE,   "SPA space map header"  },
  57         {       byteswap_uint64_array,  TRUE,   "SPA space map"         },
  58         {       byteswap_uint64_array,  TRUE,   "ZIL intent log"        },
  59         {       dnode_buf_byteswap,     TRUE,   "DMU dnode"             },
  60         {       dmu_objset_byteswap,    TRUE,   "DMU objset"            },
  61         {       byteswap_uint64_array,  TRUE,   "DSL directory"         },
  62         {       zap_byteswap,           TRUE,   "DSL directory child map"},
  63         {       zap_byteswap,           TRUE,   "DSL dataset snap map"  },
  64         {       zap_byteswap,           TRUE,   "DSL props"             },
  65         {       byteswap_uint64_array,  TRUE,   "DSL dataset"           },
  66         {       zfs_znode_byteswap,     TRUE,   "ZFS znode"             },
  67         {       zfs_oldacl_byteswap,    TRUE,   "ZFS V0 ACL"            },
  68         {       byteswap_uint8_array,   FALSE,  "ZFS plain file"        },
  69         {       zap_byteswap,           TRUE,   "ZFS directory"         },
  70         {       zap_byteswap,           TRUE,   "ZFS master node"       },
  71         {       zap_byteswap,           TRUE,   "ZFS delete queue"      },
  72         {       byteswap_uint8_array,   FALSE,  "zvol object"           },
  73         {       zap_byteswap,           TRUE,   "zvol prop"             },
  74         {       byteswap_uint8_array,   FALSE,  "other uint8[]"         },
  75         {       byteswap_uint64_array,  FALSE,  "other uint64[]"        },
  76         {       zap_byteswap,           TRUE,   "other ZAP"             },
  77         {       zap_byteswap,           TRUE,   "persistent error log"  },
  78         {       byteswap_uint8_array,   TRUE,   "SPA history"           },
  79         {       byteswap_uint64_array,  TRUE,   "SPA history offsets"   },
  80         {       zap_byteswap,           TRUE,   "Pool properties"       },
  81         {       zap_byteswap,           TRUE,   "DSL permissions"       },
  82         {       zfs_acl_byteswap,       TRUE,   "ZFS ACL"               },
  83         {       byteswap_uint8_array,   TRUE,   "ZFS SYSACL"            },
  84         {       byteswap_uint8_array,   TRUE,   "FUID table"            },
  85         {       byteswap_uint64_array,  TRUE,   "FUID table size"       },
  86         {       zap_byteswap,           TRUE,   "DSL dataset next clones"},
  87         {       zap_byteswap,           TRUE,   "scan work queue"       },
  88         {       zap_byteswap,           TRUE,   "ZFS user/group used"   },
  89         {       zap_byteswap,           TRUE,   "ZFS user/group quota"  },
  90         {       zap_byteswap,           TRUE,   "snapshot refcount tags"},
  91         {       zap_byteswap,           TRUE,   "DDT ZAP algorithm"     },
  92         {       zap_byteswap,           TRUE,   "DDT statistics"        },
  93         {       byteswap_uint8_array,   TRUE,   "System attributes"     },
  94         {       zap_byteswap,           TRUE,   "SA master node"        },
  95         {       zap_byteswap,           TRUE,   "SA attr registration"  },
  96         {       zap_byteswap,           TRUE,   "SA attr layouts"       },
  97         {       zap_byteswap,           TRUE,   "scan translations"     },
  98         {       byteswap_uint8_array,   FALSE,  "deduplicated block"    },
  99         {       zap_byteswap,           TRUE,   "DSL deadlist map"      },
 100         {       byteswap_uint64_array,  TRUE,   "DSL deadlist map hdr"  },
 101         {       zap_byteswap,           TRUE,   "DSL dir clones"        },
 102         {       byteswap_uint64_array,  TRUE,   "bpobj subobj"          },
 103 };
 104 
 105 int
 106 dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 107     void *tag, dmu_buf_t **dbp, int flags)
 108 {
 109         dnode_t *dn;
 110         uint64_t blkid;
 111         dmu_buf_impl_t *db;
 112         int err;
 113         int db_flags = DB_RF_CANFAIL;
 114 
 115         if (flags & DMU_READ_NO_PREFETCH)
 116                 db_flags |= DB_RF_NOPREFETCH;
 117 
 118         err = dnode_hold(os, object, FTAG, &dn);
 119         if (err)
 120                 return (err);
 121         blkid = dbuf_whichblock(dn, offset);
 122         rw_enter(&dn->dn_struct_rwlock, RW_READER);
 123         db = dbuf_hold(dn, blkid, tag);
 124         rw_exit(&dn->dn_struct_rwlock);
 
 159                 error = EINVAL;
 160         } else {
 161                 dnode_setbonuslen(dn, newsize, tx);
 162                 error = 0;
 163         }
 164 
 165         DB_DNODE_EXIT(db);
 166         return (error);
 167 }
 168 
 169 int
 170 dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
 171 {
 172         dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 173         dnode_t *dn;
 174         int error;
 175 
 176         DB_DNODE_ENTER(db);
 177         dn = DB_DNODE(db);
 178 
 179         if (type > DMU_OT_NUMTYPES) {
 180                 error = EINVAL;
 181         } else if (dn->dn_bonus != db) {
 182                 error = EINVAL;
 183         } else {
 184                 dnode_setbonus_type(dn, type, tx);
 185                 error = 0;
 186         }
 187 
 188         DB_DNODE_EXIT(db);
 189         return (error);
 190 }
 191 
 192 dmu_object_type_t
 193 dmu_get_bonustype(dmu_buf_t *db_fake)
 194 {
 195         dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 196         dnode_t *dn;
 197         dmu_object_type_t type;
 198 
 199         DB_DNODE_ENTER(db);
 
1486 void
1487 dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
1488         dmu_tx_t *tx)
1489 {
1490         dnode_t *dn;
1491 
1492         /* XXX assumes dnode_hold will not get an i/o error */
1493         (void) dnode_hold(os, object, FTAG, &dn);
1494         ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
1495         dn->dn_compress = compress;
1496         dnode_setdirty(dn, tx);
1497         dnode_rele(dn, FTAG);
1498 }
1499 
1500 int zfs_mdcomp_disable = 0;
1501 
1502 void
1503 dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
1504 {
1505         dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
1506         boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
1507             (wp & WP_SPILL));
1508         enum zio_checksum checksum = os->os_checksum;
1509         enum zio_compress compress = os->os_compress;
1510         enum zio_checksum dedup_checksum = os->os_dedup_checksum;
1511         boolean_t dedup;
1512         boolean_t dedup_verify = os->os_dedup_verify;
1513         int copies = os->os_copies;
1514 
1515         /*
1516          * Determine checksum setting.
1517          */
1518         if (ismd) {
1519                 /*
1520                  * Metadata always gets checksummed.  If the data
1521                  * checksum is multi-bit correctable, and it's not a
1522                  * ZBT-style checksum, then it's suitable for metadata
1523                  * as well.  Otherwise, the metadata checksum defaults
1524                  * to fletcher4.
1525                  */
1526                 if (zio_checksum_table[checksum].ci_correctable < 1 ||
 
 | 
 
 
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <sys/dmu.h>
  27 #include <sys/dmu_impl.h>
  28 #include <sys/dmu_tx.h>
  29 #include <sys/dbuf.h>
  30 #include <sys/dnode.h>
  31 #include <sys/zfs_context.h>
  32 #include <sys/dmu_objset.h>
  33 #include <sys/dmu_traverse.h>
  34 #include <sys/dsl_dataset.h>
  35 #include <sys/dsl_dir.h>
  36 #include <sys/dsl_pool.h>
  37 #include <sys/dsl_synctask.h>
  38 #include <sys/dsl_prop.h>
  39 #include <sys/dmu_zfetch.h>
  40 #include <sys/zfs_ioctl.h>
  41 #include <sys/zap.h>
  42 #include <sys/zio_checksum.h>
  43 #include <sys/sa.h>
  44 #ifdef _KERNEL
  45 #include <sys/vmsystm.h>
  46 #include <sys/zfs_znode.h>
  47 #endif
  48 
  49 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
  50         {       DMU_BSWAP_UINT8,        TRUE,   "unallocated"           },
  51         {       DMU_BSWAP_ZAP,          TRUE,   "object directory"      },
  52         {       DMU_BSWAP_UINT64,       TRUE,   "object array"          },
  53         {       DMU_BSWAP_UINT8,        TRUE,   "packed nvlist"         },
  54         {       DMU_BSWAP_UINT64,       TRUE,   "packed nvlist size"    },
  55         {       DMU_BSWAP_UINT64,       TRUE,   "bpobj"                 },
  56         {       DMU_BSWAP_UINT64,       TRUE,   "bpobj header"          },
  57         {       DMU_BSWAP_UINT64,       TRUE,   "SPA space map header"  },
  58         {       DMU_BSWAP_UINT64,       TRUE,   "SPA space map"         },
  59         {       DMU_BSWAP_UINT64,       TRUE,   "ZIL intent log"        },
  60         {       DMU_BSWAP_DNODE,        TRUE,   "DMU dnode"             },
  61         {       DMU_BSWAP_OBJSET,       TRUE,   "DMU objset"            },
  62         {       DMU_BSWAP_UINT64,       TRUE,   "DSL directory"         },
  63         {       DMU_BSWAP_ZAP,          TRUE,   "DSL directory child map"},
  64         {       DMU_BSWAP_ZAP,          TRUE,   "DSL dataset snap map"  },
  65         {       DMU_BSWAP_ZAP,          TRUE,   "DSL props"             },
  66         {       DMU_BSWAP_UINT64,       TRUE,   "DSL dataset"           },
  67         {       DMU_BSWAP_ZNODE,        TRUE,   "ZFS znode"             },
  68         {       DMU_BSWAP_OLDACL,       TRUE,   "ZFS V0 ACL"            },
  69         {       DMU_BSWAP_UINT8,        FALSE,  "ZFS plain file"        },
  70         {       DMU_BSWAP_ZAP,          TRUE,   "ZFS directory"         },
  71         {       DMU_BSWAP_ZAP,          TRUE,   "ZFS master node"       },
  72         {       DMU_BSWAP_ZAP,          TRUE,   "ZFS delete queue"      },
  73         {       DMU_BSWAP_UINT8,        FALSE,  "zvol object"           },
  74         {       DMU_BSWAP_ZAP,          TRUE,   "zvol prop"             },
  75         {       DMU_BSWAP_UINT8,        FALSE,  "other uint8[]"         },
  76         {       DMU_BSWAP_UINT64,       FALSE,  "other uint64[]"        },
  77         {       DMU_BSWAP_ZAP,          TRUE,   "other ZAP"             },
  78         {       DMU_BSWAP_ZAP,          TRUE,   "persistent error log"  },
  79         {       DMU_BSWAP_UINT8,        TRUE,   "SPA history"           },
  80         {       DMU_BSWAP_UINT64,       TRUE,   "SPA history offsets"   },
  81         {       DMU_BSWAP_ZAP,          TRUE,   "Pool properties"       },
  82         {       DMU_BSWAP_ZAP,          TRUE,   "DSL permissions"       },
  83         {       DMU_BSWAP_ACL,          TRUE,   "ZFS ACL"               },
  84         {       DMU_BSWAP_UINT8,        TRUE,   "ZFS SYSACL"            },
  85         {       DMU_BSWAP_UINT8,        TRUE,   "FUID table"            },
  86         {       DMU_BSWAP_UINT64,       TRUE,   "FUID table size"       },
  87         {       DMU_BSWAP_ZAP,          TRUE,   "DSL dataset next clones"},
  88         {       DMU_BSWAP_ZAP,          TRUE,   "scan work queue"       },
  89         {       DMU_BSWAP_ZAP,          TRUE,   "ZFS user/group used"   },
  90         {       DMU_BSWAP_ZAP,          TRUE,   "ZFS user/group quota"  },
  91         {       DMU_BSWAP_ZAP,          TRUE,   "snapshot refcount tags"},
  92         {       DMU_BSWAP_ZAP,          TRUE,   "DDT ZAP algorithm"     },
  93         {       DMU_BSWAP_ZAP,          TRUE,   "DDT statistics"        },
  94         {       DMU_BSWAP_UINT8,        TRUE,   "System attributes"     },
  95         {       DMU_BSWAP_ZAP,          TRUE,   "SA master node"        },
  96         {       DMU_BSWAP_ZAP,          TRUE,   "SA attr registration"  },
  97         {       DMU_BSWAP_ZAP,          TRUE,   "SA attr layouts"       },
  98         {       DMU_BSWAP_ZAP,          TRUE,   "scan translations"     },
  99         {       DMU_BSWAP_UINT8,        FALSE,  "deduplicated block"    },
 100         {       DMU_BSWAP_ZAP,          TRUE,   "DSL deadlist map"      },
 101         {       DMU_BSWAP_UINT64,       TRUE,   "DSL deadlist map hdr"  },
 102         {       DMU_BSWAP_ZAP,          TRUE,   "DSL dir clones"        },
 103         {       DMU_BSWAP_UINT64,       TRUE,   "bpobj subobj"          }
 104 };
 105 
 106 const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
 107         {       byteswap_uint8_array,   "uint8"         },
 108         {       byteswap_uint16_array,  "uint16"        },
 109         {       byteswap_uint32_array,  "uint32"        },
 110         {       byteswap_uint64_array,  "uint64"        },
 111         {       zap_byteswap,           "zap"           },
 112         {       dnode_buf_byteswap,     "dnode"         },
 113         {       dmu_objset_byteswap,    "objset"        },
 114         {       zfs_znode_byteswap,     "znode"         },
 115         {       zfs_oldacl_byteswap,    "oldacl"        },
 116         {       zfs_acl_byteswap,       "acl"           }
 117 };
 118 
 119 int
 120 dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 121     void *tag, dmu_buf_t **dbp, int flags)
 122 {
 123         dnode_t *dn;
 124         uint64_t blkid;
 125         dmu_buf_impl_t *db;
 126         int err;
 127         int db_flags = DB_RF_CANFAIL;
 128 
 129         if (flags & DMU_READ_NO_PREFETCH)
 130                 db_flags |= DB_RF_NOPREFETCH;
 131 
 132         err = dnode_hold(os, object, FTAG, &dn);
 133         if (err)
 134                 return (err);
 135         blkid = dbuf_whichblock(dn, offset);
 136         rw_enter(&dn->dn_struct_rwlock, RW_READER);
 137         db = dbuf_hold(dn, blkid, tag);
 138         rw_exit(&dn->dn_struct_rwlock);
 
 173                 error = EINVAL;
 174         } else {
 175                 dnode_setbonuslen(dn, newsize, tx);
 176                 error = 0;
 177         }
 178 
 179         DB_DNODE_EXIT(db);
 180         return (error);
 181 }
 182 
 183 int
 184 dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
 185 {
 186         dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 187         dnode_t *dn;
 188         int error;
 189 
 190         DB_DNODE_ENTER(db);
 191         dn = DB_DNODE(db);
 192 
 193         if (!DMU_OT_IS_VALID(type)) {
 194                 error = EINVAL;
 195         } else if (dn->dn_bonus != db) {
 196                 error = EINVAL;
 197         } else {
 198                 dnode_setbonus_type(dn, type, tx);
 199                 error = 0;
 200         }
 201 
 202         DB_DNODE_EXIT(db);
 203         return (error);
 204 }
 205 
 206 dmu_object_type_t
 207 dmu_get_bonustype(dmu_buf_t *db_fake)
 208 {
 209         dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 210         dnode_t *dn;
 211         dmu_object_type_t type;
 212 
 213         DB_DNODE_ENTER(db);
 
1500 void
1501 dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
1502         dmu_tx_t *tx)
1503 {
1504         dnode_t *dn;
1505 
1506         /* XXX assumes dnode_hold will not get an i/o error */
1507         (void) dnode_hold(os, object, FTAG, &dn);
1508         ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
1509         dn->dn_compress = compress;
1510         dnode_setdirty(dn, tx);
1511         dnode_rele(dn, FTAG);
1512 }
1513 
1514 int zfs_mdcomp_disable = 0;
1515 
1516 void
1517 dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
1518 {
1519         dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
1520         boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
1521             (wp & WP_SPILL));
1522         enum zio_checksum checksum = os->os_checksum;
1523         enum zio_compress compress = os->os_compress;
1524         enum zio_checksum dedup_checksum = os->os_dedup_checksum;
1525         boolean_t dedup;
1526         boolean_t dedup_verify = os->os_dedup_verify;
1527         int copies = os->os_copies;
1528 
1529         /*
1530          * Determine checksum setting.
1531          */
1532         if (ismd) {
1533                 /*
1534                  * Metadata always gets checksummed.  If the data
1535                  * checksum is multi-bit correctable, and it's not a
1536                  * ZBT-style checksum, then it's suitable for metadata
1537                  * as well.  Otherwise, the metadata checksum defaults
1538                  * to fletcher4.
1539                  */
1540                 if (zio_checksum_table[checksum].ci_correctable < 1 ||
 
 |