Print this page
701 UNMAP support for COMSTAR
Contributed by: Sumit Gupta <sumit.gupta@nexenta.com>
Reviewed by: Garrett D'Amore <garrett@nexenta.com>
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Reviewed by: George Wilson <gwilson@zfsmail.com>

*** 18,31 **** * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ - /* Portions Copyright 2010 Robert Milkowski */ - /* * ZFS volume emulation driver. * * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. * Volumes are accessed through the symbolic links named: --- 18,33 ---- * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * + * Portions Copyright 2010 Robert Milkowski + * + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * ZFS volume emulation driver. * * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. * Volumes are accessed through the symbolic links named:
*** 340,349 **** --- 342,369 ---- error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); ASSERT(error == 0); } /* + * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we + * implement DKIOCFREE/free-long-range. + */ + static int + zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap) + { + uint64_t offset, length; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + offset = lr->lr_offset; + length = lr->lr_length; + + return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length)); + } + + /* * Replay a TX_WRITE ZIL transaction that didn't get committed * after a system failure */ static int zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
*** 389,399 **** return (ENOTSUP); } /* * Callback vectors for replaying records. ! * Only TX_WRITE is needed for zvol. */ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* 0 no such transaction type */ zvol_replay_err, /* TX_CREATE */ zvol_replay_err, /* TX_MKDIR */ --- 409,419 ---- return (ENOTSUP); } /* * Callback vectors for replaying records. ! * Only TX_WRITE and TX_TRUNCATE are needed for zvol. */ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* 0 no such transaction type */ zvol_replay_err, /* TX_CREATE */ zvol_replay_err, /* TX_MKDIR */
*** 402,412 **** zvol_replay_err, /* TX_REMOVE */ zvol_replay_err, /* TX_RMDIR */ zvol_replay_err, /* TX_LINK */ zvol_replay_err, /* TX_RENAME */ zvol_replay_write, /* TX_WRITE */ ! zvol_replay_err, /* TX_TRUNCATE */ zvol_replay_err, /* TX_SETATTR */ zvol_replay_err, /* TX_ACL */ zvol_replay_err, /* TX_CREATE_ACL */ zvol_replay_err, /* TX_CREATE_ATTR */ zvol_replay_err, /* TX_CREATE_ACL_ATTR */ --- 422,432 ---- zvol_replay_err, /* TX_REMOVE */ zvol_replay_err, /* TX_RMDIR */ zvol_replay_err, /* TX_LINK */ zvol_replay_err, /* TX_RENAME */ zvol_replay_write, /* TX_WRITE */ ! zvol_replay_truncate, /* TX_TRUNCATE */ zvol_replay_err, /* TX_SETATTR */ zvol_replay_err, /* TX_ACL */ zvol_replay_err, /* TX_CREATE_ACL */ zvol_replay_err, /* TX_CREATE_ATTR */ zvol_replay_err, /* TX_CREATE_ACL_ATTR */
*** 1510,1520 **** --- 1530,1565 ---- /* * END entry points to allow external callers access to the volume. */ /* + * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. + */ + static void + zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, + boolean_t sync) + { + itx_t *itx; + lr_truncate_t *lr; + zilog_t *zilog = zv->zv_zilog; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); + lr = (lr_truncate_t *)&itx->itx_lr; + lr->lr_foid = ZVOL_OBJ; + lr->lr_offset = off; + lr->lr_length = len; + + itx->itx_sync = sync; + zil_itx_assign(zilog, itx, tx); + } + + /* * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). + * Also a dirtbag dkio ioctl for unmap/free-block functionality. */ /*ARGSUSED*/ int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) {
*** 1629,1638 **** --- 1674,1742 ---- RL_WRITER); error = zvol_dump_fini(zv); zfs_range_unlock(rl); break; + case DKIOCFREE: + { + dkioc_free_t df; + dmu_tx_t *tx; + + if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { + error = EFAULT; + break; + } + + /* + * Apply Postel's Law to length-checking. If they overshoot, + * just blank out until the end, if there's a need to blank + * out anything. + */ + if (df.df_start >= zv->zv_volsize) + break; /* No need to do anything... */ + if (df.df_start + df.df_length > zv->zv_volsize) + df.df_length = DMU_OBJECT_END; + + rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, + RL_WRITER); + tx = dmu_tx_create(zv->zv_objset); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error != 0) { + dmu_tx_abort(tx); + } else { + zvol_log_truncate(zv, tx, df.df_start, + df.df_length, B_TRUE); + error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, + df.df_start, df.df_length); + dmu_tx_commit(tx); + } + + zfs_range_unlock(rl); + + if (error == 0) { + /* + * If the write-cache is disabled or 'sync' property + * is set to 'always' then treat this as a synchronous + * operation (i.e. commit to zil). + */ + if (!(zv->zv_flags & ZVOL_WCE) || + (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) + zil_commit(zv->zv_zilog, ZVOL_OBJ); + + /* + * If the caller really wants synchronous writes, and + * can't wait for them, don't return until the write + * is done. + */ + if (df.df_flags & DF_WAIT_SYNC) { + txg_wait_synced( + dmu_objset_pool(zv->zv_objset), 0); + } + } + break; + } + default: error = ENOTTY; break; }