Print this page
6494 ASSERT supported zio_types for file and disk vdevs
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Albert Lee <trisk@omniti.com>
NEX-4229 Panic destroying the pool using file backing store on FS with nbmand=on
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-3958 CLONE - Port NEX-3957 TRIM on file-backed vdevs is broken
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
usr/src/uts/common/io/scsi/targets/sd.c
usr/src/uts/common/sys/scsi/targets/sddef.h
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/vdev_file.c
+++ new/usr/src/uts/common/fs/zfs/vdev_file.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
24 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 25 */
25 26
26 27 #include <sys/zfs_context.h>
27 28 #include <sys/spa.h>
28 29 #include <sys/spa_impl.h>
29 30 #include <sys/vdev_file.h>
30 31 #include <sys/vdev_impl.h>
31 32 #include <sys/zio.h>
32 33 #include <sys/fs/zfs.h>
33 34 #include <sys/fm/fs/zfs.h>
35 +#include <sys/fcntl.h>
36 +#include <sys/vnode.h>
37 +#include <sys/dkioc_free_util.h>
34 38 #include <sys/abd.h>
35 39
36 40 /*
37 41 * Virtual device vector for files.
38 42 */
39 43
40 44 static void
41 45 vdev_file_hold(vdev_t *vd)
42 46 {
43 47 ASSERT(vd->vdev_path != NULL);
44 48 }
45 49
46 50 static void
47 51 vdev_file_rele(vdev_t *vd)
48 52 {
49 53 ASSERT(vd->vdev_path != NULL);
50 54 }
51 55
52 56 static int
53 57 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
54 58 uint64_t *ashift)
55 59 {
56 60 vdev_file_t *vf;
57 61 vnode_t *vp;
58 62 vattr_t vattr;
59 63 int error;
60 64
61 65 /*
62 66 * We must have a pathname, and it must be absolute.
63 67 */
64 68 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
65 69 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
66 70 return (SET_ERROR(EINVAL));
67 71 }
68 72
69 73 /*
70 74 * Reopen the device if it's not currently open. Otherwise,
71 75 * just update the physical size of the device.
72 76 */
73 77 if (vd->vdev_tsd != NULL) {
74 78 ASSERT(vd->vdev_reopening);
75 79 vf = vd->vdev_tsd;
76 80 goto skip_open;
77 81 }
78 82
79 83 vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
80 84
81 85 /*
82 86 * We always open the files from the root of the global zone, even if
83 87 * we're in a local zone. If the user has gotten to this point, the
84 88 * administrator has already decided that the pool should be available
85 89 * to local zone users, so the underlying devices should be as well.
86 90 */
87 91 ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
88 92 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
89 93 spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
90 94
91 95 if (error) {
92 96 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
93 97 return (error);
94 98 }
95 99
96 100 vf->vf_vnode = vp;
97 101
98 102 #ifdef _KERNEL
99 103 /*
100 104 * Make sure it's a regular file.
101 105 */
102 106 if (vp->v_type != VREG) {
103 107 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
104 108 return (SET_ERROR(ENODEV));
105 109 }
106 110 #endif
107 111
108 112 skip_open:
109 113 /*
110 114 * Determine the physical size of the file.
111 115 */
112 116 vattr.va_mask = AT_SIZE;
113 117 error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
114 118 if (error) {
115 119 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
116 120 return (error);
117 121 }
118 122
|
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
119 123 *max_psize = *psize = vattr.va_size;
120 124 *ashift = SPA_MINBLOCKSHIFT;
121 125
122 126 return (0);
123 127 }
124 128
125 129 static void
126 130 vdev_file_close(vdev_t *vd)
127 131 {
128 132 vdev_file_t *vf = vd->vdev_tsd;
133 + caller_context_t ct = {0};
129 134
130 135 if (vd->vdev_reopening || vf == NULL)
131 136 return;
132 137
133 138 if (vf->vf_vnode != NULL) {
134 139 (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
140 + /*
141 + * We need to supply caller context with PID zero to fop_close
142 + * in order to clean properly a share reservation which might
143 + * have been created by vdev_file_open if nbmand was "on" for
144 + * underlaying filesystem. Mismatched PIDs in create and delete
145 + * reservation calls would lead to orphaned reservation.
146 + */
135 147 (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
136 - kcred, NULL);
148 + kcred, &ct);
137 149 VN_RELE(vf->vf_vnode);
138 150 }
139 151
140 152 vd->vdev_delayed_close = B_FALSE;
141 153 kmem_free(vf, sizeof (vdev_file_t));
142 154 vd->vdev_tsd = NULL;
143 155 }
144 156
145 157 /*
146 158 * Implements the interrupt side for file vdev types. This routine will be
147 159 * called when the I/O completes allowing us to transfer the I/O to the
148 160 * interrupt taskqs. For consistency, the code structure mimics disk vdev
149 161 * types.
150 162 */
151 163 static void
152 164 vdev_file_io_intr(buf_t *bp)
153 165 {
154 166 vdev_buf_t *vb = (vdev_buf_t *)bp;
155 167 zio_t *zio = vb->vb_io;
156 168
157 169 zio->io_error = (geterror(bp) != 0 ? EIO : 0);
158 170 if (zio->io_error == 0 && bp->b_resid != 0)
159 171 zio->io_error = SET_ERROR(ENOSPC);
160 172
161 173 if (zio->io_type == ZIO_TYPE_READ) {
162 174 abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr, zio->io_size);
163 175 } else {
164 176 abd_return_buf(zio->io_abd, bp->b_un.b_addr, zio->io_size);
165 177 }
166 178
167 179 kmem_free(vb, sizeof (vdev_buf_t));
168 180 zio_delay_interrupt(zio);
169 181 }
170 182
171 183 static void
172 184 vdev_file_io_strategy(void *arg)
173 185 {
174 186 buf_t *bp = arg;
175 187 vnode_t *vp = bp->b_private;
176 188 ssize_t resid;
177 189 int error;
178 190
179 191 error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE,
180 192 vp, bp->b_un.b_addr, bp->b_bcount, ldbtob(bp->b_lblkno),
181 193 UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
182 194
183 195 if (error == 0) {
184 196 bp->b_resid = resid;
185 197 biodone(bp);
186 198 } else {
187 199 bioerror(bp, error);
188 200 biodone(bp);
189 201 }
190 202 }
191 203
192 204 static void
193 205 vdev_file_io_start(zio_t *zio)
194 206 {
195 207 vdev_t *vd = zio->io_vd;
196 208 vdev_file_t *vf = vd->vdev_tsd;
197 209 vdev_buf_t *vb;
198 210 buf_t *bp;
199 211
200 212 if (zio->io_type == ZIO_TYPE_IOCTL) {
201 213 /* XXPOLICY */
202 214 if (!vdev_readable(vd)) {
|
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
203 215 zio->io_error = SET_ERROR(ENXIO);
204 216 zio_interrupt(zio);
205 217 return;
206 218 }
207 219
208 220 switch (zio->io_cmd) {
209 221 case DKIOCFLUSHWRITECACHE:
210 222 zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
211 223 kcred, NULL);
212 224 break;
225 + case DKIOCFREE:
226 + {
227 + dkioc_free_list_t *dfl = zio->io_private;
228 +
229 + ASSERT(dfl != NULL);
230 + for (int i = 0; i < dfl->dfl_num_exts; i++) {
231 + struct flock64 flck;
232 + int error;
233 +
234 + if (dfl->dfl_exts[i].dfle_length == 0)
235 + continue;
236 +
237 + bzero(&flck, sizeof (flck));
238 + flck.l_type = F_FREESP;
239 + flck.l_start = dfl->dfl_exts[i].dfle_start +
240 + dfl->dfl_offset;
241 + flck.l_len = dfl->dfl_exts[i].dfle_length;
242 +
243 + error = VOP_SPACE(vf->vf_vnode,
244 + F_FREESP, &flck, 0, 0, kcred, NULL);
245 + if (error != 0) {
246 + zio->io_error = SET_ERROR(error);
247 + break;
248 + }
249 + }
250 + break;
251 + }
213 252 default:
214 253 zio->io_error = SET_ERROR(ENOTSUP);
215 254 }
216 255
217 256 zio_execute(zio);
218 257 return;
219 258 }
220 259
221 260 ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
222 261 zio->io_target_timestamp = zio_handle_io_delay(zio);
223 262
224 263 vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
225 264
226 265 vb->vb_io = zio;
227 266 bp = &vb->vb_buf;
228 267
229 268 bioinit(bp);
230 269 bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
231 270 bp->b_bcount = zio->io_size;
232 271
233 272 if (zio->io_type == ZIO_TYPE_READ) {
234 273 bp->b_un.b_addr =
235 274 abd_borrow_buf(zio->io_abd, zio->io_size);
236 275 } else {
237 276 bp->b_un.b_addr =
238 277 abd_borrow_buf_copy(zio->io_abd, zio->io_size);
239 278 }
240 279
241 280 bp->b_lblkno = lbtodb(zio->io_offset);
242 281 bp->b_bufsize = zio->io_size;
243 282 bp->b_private = vf->vf_vnode;
244 283 bp->b_iodone = (int (*)())vdev_file_io_intr;
245 284
246 285 VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp,
247 286 TQ_SLEEP), !=, 0);
248 287 }
249 288
250 289 /* ARGSUSED */
251 290 static void
252 291 vdev_file_io_done(zio_t *zio)
253 292 {
254 293 }
255 294
256 295 vdev_ops_t vdev_file_ops = {
257 296 vdev_file_open,
258 297 vdev_file_close,
259 298 vdev_default_asize,
260 299 vdev_file_io_start,
261 300 vdev_file_io_done,
262 301 NULL,
263 302 vdev_file_hold,
264 303 vdev_file_rele,
265 304 NULL,
266 305 VDEV_TYPE_FILE, /* name of this vdev type */
267 306 B_TRUE /* leaf vdev */
268 307 };
269 308
270 309 /*
271 310 * From userland we access disks just like files.
272 311 */
273 312 #ifndef _KERNEL
274 313
275 314 vdev_ops_t vdev_disk_ops = {
276 315 vdev_file_open,
277 316 vdev_file_close,
278 317 vdev_default_asize,
279 318 vdev_file_io_start,
280 319 vdev_file_io_done,
281 320 NULL,
282 321 vdev_file_hold,
283 322 vdev_file_rele,
284 323 NULL,
285 324 VDEV_TYPE_DISK, /* name of this vdev type */
286 325 B_TRUE /* leaf vdev */
287 326 };
288 327
289 328 #endif
|
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX