Print this page
NEX-15262 xdf panics if it fails to attach the device
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-15262 xdf panics if it fails to attach the device
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-14823 xdf devices attach hybrid VTOC/EFI label
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-14791 rework PV-HVM disk device handling
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-14565 port upstream Xen-related fixes
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
7777 Expose xdf minor nodes when in PV-HVM mode
Reviewed by: Jeremy Jones <jeremy@delphix.com>
Reviewed by: Basil Crow <basil.crow@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
8020 Fix iostat on the EC2 instances
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Dan McDonald <danmcd@omniti.com>
re #13140 rb4270 hvm_sd module missing dependencies on scsi and cmlb
re #13166 rb4270 Check for Xen HVM even if CPUID signature returns Microsoft Hv
re #13187 rb4270 Fix Xen HVM related warnings
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/xen/io/xdf.c
+++ new/usr/src/uts/common/xen/io/xdf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
29 29 * Copyright 2017 Nexenta Systems, Inc.
30 30 */
31 31
32 32 /*
33 33 * xdf.c - Xen Virtual Block Device Driver
34 34 * TODO:
35 35 * - support alternate block size (currently only DEV_BSIZE supported)
36 36 * - revalidate geometry for removable devices
37 37 *
38 38 * This driver exports disk device nodes, accepts IO requests from those
39 39 * nodes, and services those requests by talking to a backend device
40 40 * in another domain.
41 41 *
42 42 * Communication with the backend device is done via a ringbuffer (which is
43 43 * managed via xvdi interfaces) and dma memory (which is managed via ddi
44 44 * interfaces).
45 45 *
46 46 * Communication with the backend device is dependant upon establishing a
47 47 * connection to the backend device. This connection process involves
48 48 * reading device configuration information from xenbus and publishing
49 49 * some frontend runtime configuration parameters via the xenbus (for
50 50 * consumption by the backend). Once we've published runtime configuration
51 51 * information via the xenbus, the backend device can enter the connected
52 52 * state and we'll enter the XD_CONNECTED state. But before we can allow
53 53 * random IO to begin, we need to do IO to the backend device to determine
54 54 * the device label and if flush operations are supported. Once this is
55 55 * done we enter the XD_READY state and can process any IO operations.
56 56 *
57 57 * We receive notifications of xenbus state changes for the backend device
58 58 * (aka, the "other end") via the xdf_oe_change() callback. This callback
59 59 * is single threaded, meaning that we can't receive new notification of
60 60 * other end state changes while we're processing an outstanding
61 61 * notification of an other end state change. There for we can't do any
62 62 * blocking operations from the xdf_oe_change() callback. This is why we
63 63 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary
64 64 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO
65 65 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
66 66 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs
67 67 * generated by the xdf_ready_tq_thread thread have priority over all
68 68 * other IO requests.
69 69 *
70 70 * We also communicate with the backend device via the xenbus "media-req"
71 71 * (XBP_MEDIA_REQ) property. For more information on this see the
72 72 * comments in blkif.h.
73 73 */
74 74
75 75 #include <io/xdf.h>
76 76
77 77 #include <sys/conf.h>
78 78 #include <sys/dkio.h>
79 79 #include <sys/promif.h>
80 80 #include <sys/sysmacros.h>
81 81 #include <sys/kstat.h>
82 82 #include <sys/mach_mmu.h>
83 83 #ifdef XPV_HVM_DRIVER
84 84 #include <sys/xpv_support.h>
85 85 #else /* !XPV_HVM_DRIVER */
86 86 #include <sys/evtchn_impl.h>
87 87 #endif /* !XPV_HVM_DRIVER */
88 88 #include <sys/sunndi.h>
89 89 #include <public/io/xenbus.h>
90 90 #include <xen/sys/xenbus_impl.h>
91 91 #include <sys/scsi/generic/inquiry.h>
92 92 #include <xen/io/blkif_impl.h>
93 93 #include <sys/fdio.h>
94 94 #include <sys/cdio.h>
95 95
96 96 /*
97 97 * DEBUG_EVAL can be used to include debug only statements without
98 98 * having to use '#ifdef DEBUG' statements
99 99 */
100 100 #ifdef DEBUG
101 101 #define DEBUG_EVAL(x) (x)
102 102 #else /* !DEBUG */
103 103 #define DEBUG_EVAL(x)
104 104 #endif /* !DEBUG */
105 105
106 106 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */
107 107 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */
108 108 #define XDF_STATE_TIMEOUT (30*1000*1000) /* 30.00 sec */
109 109
110 110 #define INVALID_DOMID ((domid_t)-1)
111 111 #define FLUSH_DISKCACHE 0x1
112 112 #define WRITE_BARRIER 0x2
113 113 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */
114 114 #define USE_WRITE_BARRIER(vdp) \
115 115 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
116 116 #define USE_FLUSH_DISKCACHE(vdp) \
117 117 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
118 118 #define IS_WRITE_BARRIER(vdp, bp) \
119 119 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \
120 120 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
121 121 #define IS_FLUSH_DISKCACHE(bp) \
122 122 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
123 123
124 124 #define VREQ_DONE(vreq) \
125 125 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \
126 126 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \
127 127 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws)))
128 128
129 129 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back))
130 130 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq)))
131 131
132 132 extern int do_polled_io;
133 133
134 134 /* run-time tunables that we don't want the compiler to optimize away */
135 135 volatile int xdf_debug = 0;
136 136 volatile boolean_t xdf_barrier_flush_disable = B_FALSE;
137 137
138 138 /* per module globals */
139 139 major_t xdf_major;
140 140 static void *xdf_ssp;
141 141 static kmem_cache_t *xdf_vreq_cache;
142 142 static kmem_cache_t *xdf_gs_cache;
143 143 static int xdf_maxphys = XB_MAXPHYS;
144 144 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK;
145 145 static int xdf_fbrewrites; /* flush block re-write count */
146 146
147 147 /* misc public functions */
148 148 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *);
149 149 int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
150 150
151 151 /* misc private functions */
152 152 static void xdf_io_start(xdf_t *);
153 153 static void xdf_devid_setup(xdf_t *);
154 154
155 155 /* callbacks from commmon label */
156 156 static cmlb_tg_ops_t xdf_lb_ops = {
157 157 TG_DK_OPS_VERSION_1,
158 158 xdf_lb_rdwr,
159 159 xdf_lb_getinfo
160 160 };
161 161
162 162 /*
163 163 * I/O buffer DMA attributes
164 164 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
165 165 */
166 166 static ddi_dma_attr_t xb_dma_attr = {
167 167 DMA_ATTR_V0,
168 168 (uint64_t)0, /* lowest address */
169 169 (uint64_t)0xffffffffffffffff, /* highest usable address */
170 170 (uint64_t)0xffffff, /* DMA counter limit max */
171 171 (uint64_t)XB_BSIZE, /* alignment in bytes */
172 172 XB_BSIZE - 1, /* bitmap of burst sizes */
173 173 XB_BSIZE, /* min transfer */
174 174 (uint64_t)XB_MAX_XFER, /* maximum transfer */
175 175 (uint64_t)PAGEOFFSET, /* 1 page segment length */
176 176 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */
177 177 XB_BSIZE, /* granularity */
178 178 0, /* flags (reserved) */
179 179 };
180 180
181 181 static ddi_device_acc_attr_t xc_acc_attr = {
182 182 DDI_DEVICE_ATTR_V0,
183 183 DDI_NEVERSWAP_ACC,
184 184 DDI_STRICTORDER_ACC
185 185 };
186 186
187 187 static void
188 188 xdf_timeout_handler(void *arg)
189 189 {
190 190 xdf_t *vdp = arg;
191 191
192 192 mutex_enter(&vdp->xdf_dev_lk);
193 193 vdp->xdf_timeout_id = 0;
194 194 mutex_exit(&vdp->xdf_dev_lk);
195 195
196 196 /* new timeout thread could be re-scheduled */
197 197 xdf_io_start(vdp);
198 198 }
199 199
200 200 /*
201 201 * callback func when DMA/GTE resources is available
202 202 *
203 203 * Note: we only register one callback function to grant table subsystem
204 204 * since we only have one 'struct gnttab_free_callback' in xdf_t.
205 205 */
206 206 static int
207 207 xdf_dmacallback(caddr_t arg)
208 208 {
209 209 xdf_t *vdp = (xdf_t *)arg;
210 210 ASSERT(vdp != NULL);
211 211
212 212 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n",
213 213 vdp->xdf_addr));
214 214
215 215 ddi_trigger_softintr(vdp->xdf_softintr_id);
216 216 return (DDI_DMA_CALLBACK_DONE);
217 217 }
218 218
219 219 static ge_slot_t *
220 220 gs_get(xdf_t *vdp, int isread)
221 221 {
222 222 grant_ref_t gh;
223 223 ge_slot_t *gs;
224 224
225 225 /* try to alloc GTEs needed in this slot, first */
226 226 if (gnttab_alloc_grant_references(
227 227 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) {
228 228 if (vdp->xdf_gnt_callback.next == NULL) {
229 229 SETDMACBON(vdp);
230 230 gnttab_request_free_callback(
231 231 &vdp->xdf_gnt_callback,
232 232 (void (*)(void *))xdf_dmacallback,
233 233 (void *)vdp,
234 234 BLKIF_MAX_SEGMENTS_PER_REQUEST);
235 235 }
236 236 return (NULL);
237 237 }
238 238
239 239 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP);
240 240 if (gs == NULL) {
241 241 gnttab_free_grant_references(gh);
242 242 if (vdp->xdf_timeout_id == 0)
243 243 /* restart I/O after one second */
244 244 vdp->xdf_timeout_id =
245 245 timeout(xdf_timeout_handler, vdp, hz);
246 246 return (NULL);
247 247 }
248 248
249 249 /* init gs_slot */
250 250 gs->gs_oeid = vdp->xdf_peer;
251 251 gs->gs_isread = isread;
252 252 gs->gs_ghead = gh;
253 253 gs->gs_ngrefs = 0;
254 254
255 255 return (gs);
256 256 }
257 257
258 258 static void
259 259 gs_free(ge_slot_t *gs)
260 260 {
261 261 int i;
262 262
263 263 /* release all grant table entry resources used in this slot */
264 264 for (i = 0; i < gs->gs_ngrefs; i++)
265 265 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0);
266 266 gnttab_free_grant_references(gs->gs_ghead);
267 267 list_remove(&gs->gs_vreq->v_gs, gs);
268 268 kmem_cache_free(xdf_gs_cache, gs);
269 269 }
270 270
271 271 static grant_ref_t
272 272 gs_grant(ge_slot_t *gs, mfn_t mfn)
273 273 {
274 274 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead);
275 275
276 276 ASSERT(gr != -1);
277 277 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
278 278 gs->gs_ge[gs->gs_ngrefs++] = gr;
279 279 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread);
280 280
281 281 return (gr);
282 282 }
283 283
284 284 /*
285 285 * Alloc a vreq for this bp
286 286 * bp->av_back contains the pointer to the vreq upon return
287 287 */
288 288 static v_req_t *
289 289 vreq_get(xdf_t *vdp, buf_t *bp)
290 290 {
291 291 v_req_t *vreq = NULL;
292 292
293 293 ASSERT(BP_VREQ(bp) == NULL);
294 294
295 295 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
296 296 if (vreq == NULL) {
297 297 if (vdp->xdf_timeout_id == 0)
298 298 /* restart I/O after one second */
299 299 vdp->xdf_timeout_id =
300 300 timeout(xdf_timeout_handler, vdp, hz);
301 301 return (NULL);
302 302 }
303 303 bzero(vreq, sizeof (v_req_t));
304 304 list_create(&vreq->v_gs, sizeof (ge_slot_t),
305 305 offsetof(ge_slot_t, gs_vreq_link));
306 306 vreq->v_buf = bp;
307 307 vreq->v_status = VREQ_INIT;
308 308 vreq->v_runq = B_FALSE;
309 309 BP_VREQ_SET(bp, vreq);
310 310 /* init of other fields in vreq is up to the caller */
311 311
312 312 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq);
313 313
314 314 return (vreq);
315 315 }
316 316
317 317 static void
318 318 vreq_free(xdf_t *vdp, v_req_t *vreq)
319 319 {
320 320 buf_t *bp = vreq->v_buf;
321 321
322 322 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
323 323 ASSERT(BP_VREQ(bp) == vreq);
324 324
325 325 list_remove(&vdp->xdf_vreq_act, vreq);
326 326
327 327 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE)
328 328 goto done;
329 329
330 330 switch (vreq->v_status) {
331 331 case VREQ_DMAWIN_DONE:
332 332 case VREQ_GS_ALLOCED:
333 333 case VREQ_DMABUF_BOUND:
334 334 (void) ddi_dma_unbind_handle(vreq->v_dmahdl);
335 335 /*FALLTHRU*/
336 336 case VREQ_DMAMEM_ALLOCED:
337 337 if (!ALIGNED_XFER(bp)) {
338 338 ASSERT(vreq->v_abuf != NULL);
339 339 if (!IS_ERROR(bp) && IS_READ(bp))
340 340 bcopy(vreq->v_abuf, bp->b_un.b_addr,
341 341 bp->b_bcount);
342 342 ddi_dma_mem_free(&vreq->v_align);
343 343 }
344 344 /*FALLTHRU*/
345 345 case VREQ_MEMDMAHDL_ALLOCED:
346 346 if (!ALIGNED_XFER(bp))
347 347 ddi_dma_free_handle(&vreq->v_memdmahdl);
348 348 /*FALLTHRU*/
349 349 case VREQ_DMAHDL_ALLOCED:
350 350 ddi_dma_free_handle(&vreq->v_dmahdl);
351 351 break;
352 352 default:
353 353 break;
354 354 }
355 355 done:
356 356 ASSERT(!vreq->v_runq);
357 357 list_destroy(&vreq->v_gs);
358 358 kmem_cache_free(xdf_vreq_cache, vreq);
359 359 }
360 360
361 361 /*
362 362 * Snarf new data if our flush block was re-written
363 363 */
364 364 static void
365 365 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno)
366 366 {
367 367 int nblks;
368 368 boolean_t mapin;
369 369
370 370 if (IS_WRITE_BARRIER(vdp, bp))
371 371 return; /* write was a flush write */
372 372
373 373 mapin = B_FALSE;
374 374 nblks = bp->b_bcount >> DEV_BSHIFT;
375 375 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
376 376 xdf_fbrewrites++;
377 377 if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
378 378 mapin = B_TRUE;
379 379 bp_mapin(bp);
380 380 }
381 381 bcopy(bp->b_un.b_addr +
382 382 ((xdf_flush_block - blkno) << DEV_BSHIFT),
383 383 vdp->xdf_cache_flush_block, DEV_BSIZE);
384 384 if (mapin)
385 385 bp_mapout(bp);
386 386 }
387 387 }
388 388
389 389 /*
390 390 * Initalize the DMA and grant table resources for the buf
391 391 */
392 392 static int
393 393 vreq_setup(xdf_t *vdp, v_req_t *vreq)
394 394 {
395 395 int rc;
396 396 ddi_dma_attr_t dmaattr;
397 397 uint_t ndcs, ndws;
398 398 ddi_dma_handle_t dh;
399 399 ddi_dma_handle_t mdh;
400 400 ddi_dma_cookie_t dc;
401 401 ddi_acc_handle_t abh;
402 402 caddr_t aba;
403 403 ge_slot_t *gs;
404 404 size_t bufsz;
405 405 off_t off;
406 406 size_t sz;
407 407 buf_t *bp = vreq->v_buf;
408 408 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
409 409 DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
410 410
411 411 switch (vreq->v_status) {
412 412 case VREQ_INIT:
413 413 if (IS_FLUSH_DISKCACHE(bp)) {
414 414 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
415 415 DPRINTF(DMA_DBG, ("xdf@%s: "
416 416 "get ge_slotfailed\n", vdp->xdf_addr));
417 417 return (DDI_FAILURE);
418 418 }
419 419 vreq->v_blkno = 0;
420 420 vreq->v_nslots = 1;
421 421 vreq->v_flush_diskcache = FLUSH_DISKCACHE;
422 422 vreq->v_status = VREQ_GS_ALLOCED;
423 423 gs->gs_vreq = vreq;
424 424 list_insert_head(&vreq->v_gs, gs);
425 425 return (DDI_SUCCESS);
426 426 }
427 427
428 428 if (IS_WRITE_BARRIER(vdp, bp))
429 429 vreq->v_flush_diskcache = WRITE_BARRIER;
430 430 vreq->v_blkno = bp->b_blkno +
431 431 (diskaddr_t)(uintptr_t)bp->b_private;
432 432 /* See if we wrote new data to our flush block */
433 433 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp))
434 434 check_fbwrite(vdp, bp, vreq->v_blkno);
435 435 vreq->v_status = VREQ_INIT_DONE;
436 436 /*FALLTHRU*/
437 437
438 438 case VREQ_INIT_DONE:
439 439 /*
440 440 * alloc DMA handle
441 441 */
442 442 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
443 443 xdf_dmacallback, (caddr_t)vdp, &dh);
444 444 if (rc != DDI_SUCCESS) {
445 445 SETDMACBON(vdp);
446 446 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
447 447 vdp->xdf_addr));
448 448 return (DDI_FAILURE);
449 449 }
450 450
451 451 vreq->v_dmahdl = dh;
452 452 vreq->v_status = VREQ_DMAHDL_ALLOCED;
453 453 /*FALLTHRU*/
454 454
455 455 case VREQ_DMAHDL_ALLOCED:
456 456 /*
457 457 * alloc dma handle for 512-byte aligned buf
458 458 */
459 459 if (!ALIGNED_XFER(bp)) {
460 460 /*
461 461 * XXPV: we need to temporarily enlarge the seg
462 462 * boundary and s/g length to work round CR6381968
463 463 */
464 464 dmaattr = xb_dma_attr;
465 465 dmaattr.dma_attr_seg = (uint64_t)-1;
466 466 dmaattr.dma_attr_sgllen = INT_MAX;
467 467 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr,
468 468 xdf_dmacallback, (caddr_t)vdp, &mdh);
469 469 if (rc != DDI_SUCCESS) {
470 470 SETDMACBON(vdp);
471 471 DPRINTF(DMA_DBG, ("xdf@%s: "
472 472 "unaligned buf DMAhandle alloc failed\n",
473 473 vdp->xdf_addr));
474 474 return (DDI_FAILURE);
475 475 }
476 476 vreq->v_memdmahdl = mdh;
477 477 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED;
478 478 }
479 479 /*FALLTHRU*/
480 480
481 481 case VREQ_MEMDMAHDL_ALLOCED:
482 482 /*
483 483 * alloc 512-byte aligned buf
484 484 */
485 485 if (!ALIGNED_XFER(bp)) {
486 486 if (bp->b_flags & (B_PAGEIO | B_PHYS))
487 487 bp_mapin(bp);
488 488 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
489 489 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
490 490 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
491 491 &aba, &bufsz, &abh);
492 492 if (rc != DDI_SUCCESS) {
493 493 SETDMACBON(vdp);
494 494 DPRINTF(DMA_DBG, ("xdf@%s: "
495 495 "DMA mem allocation failed\n",
496 496 vdp->xdf_addr));
497 497 return (DDI_FAILURE);
498 498 }
499 499
500 500 vreq->v_abuf = aba;
501 501 vreq->v_align = abh;
502 502 vreq->v_status = VREQ_DMAMEM_ALLOCED;
503 503
504 504 ASSERT(bufsz >= bp->b_bcount);
505 505 if (!IS_READ(bp))
506 506 bcopy(bp->b_un.b_addr, vreq->v_abuf,
507 507 bp->b_bcount);
508 508 }
509 509 /*FALLTHRU*/
510 510
511 511 case VREQ_DMAMEM_ALLOCED:
512 512 /*
513 513 * dma bind
514 514 */
515 515 if (ALIGNED_XFER(bp)) {
516 516 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp,
517 517 dma_flags, xdf_dmacallback, (caddr_t)vdp,
518 518 &dc, &ndcs);
519 519 } else {
520 520 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl,
521 521 NULL, vreq->v_abuf, bp->b_bcount, dma_flags,
522 522 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs);
523 523 }
524 524 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
525 525 /* get num of dma windows */
526 526 if (rc == DDI_DMA_PARTIAL_MAP) {
527 527 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws);
528 528 ASSERT(rc == DDI_SUCCESS);
529 529 } else {
530 530 ndws = 1;
531 531 }
532 532 } else {
533 533 SETDMACBON(vdp);
534 534 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n",
535 535 vdp->xdf_addr));
536 536 return (DDI_FAILURE);
537 537 }
538 538
539 539 vreq->v_dmac = dc;
540 540 vreq->v_dmaw = 0;
541 541 vreq->v_ndmacs = ndcs;
542 542 vreq->v_ndmaws = ndws;
543 543 vreq->v_nslots = ndws;
544 544 vreq->v_status = VREQ_DMABUF_BOUND;
545 545 /*FALLTHRU*/
546 546
547 547 case VREQ_DMABUF_BOUND:
548 548 /*
549 549 * get ge_slot, callback is set upon failure from gs_get(),
550 550 * if not set previously
551 551 */
552 552 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
553 553 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
554 554 vdp->xdf_addr));
555 555 return (DDI_FAILURE);
556 556 }
557 557
558 558 vreq->v_status = VREQ_GS_ALLOCED;
559 559 gs->gs_vreq = vreq;
560 560 list_insert_head(&vreq->v_gs, gs);
561 561 break;
562 562
563 563 case VREQ_GS_ALLOCED:
564 564 /* nothing need to be done */
565 565 break;
566 566
567 567 case VREQ_DMAWIN_DONE:
568 568 /*
569 569 * move to the next dma window
570 570 */
571 571 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws);
572 572
573 573 /* get a ge_slot for this DMA window */
574 574 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
575 575 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
576 576 vdp->xdf_addr));
577 577 return (DDI_FAILURE);
578 578 }
579 579
580 580 vreq->v_dmaw++;
581 581 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
582 582 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS);
583 583 vreq->v_status = VREQ_GS_ALLOCED;
584 584 gs->gs_vreq = vreq;
585 585 list_insert_head(&vreq->v_gs, gs);
586 586 break;
587 587
588 588 default:
589 589 return (DDI_FAILURE);
590 590 }
591 591
592 592 return (DDI_SUCCESS);
593 593 }
594 594
595 595 static int
596 596 xdf_cmlb_attach(xdf_t *vdp)
597 597 {
598 598 dev_info_t *dip = vdp->xdf_dip;
599 599
600 600 return (cmlb_attach(dip, &xdf_lb_ops,
601 601 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT,
602 602 XD_IS_RM(vdp), B_TRUE,
603 603 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD,
604 604 0, vdp->xdf_vd_lbl, NULL));
605 605 }
606 606
607 607 static void
608 608 xdf_io_err(buf_t *bp, int err, size_t resid)
609 609 {
610 610 bioerror(bp, err);
611 611 if (resid == 0)
612 612 bp->b_resid = bp->b_bcount;
613 613 biodone(bp);
614 614 }
615 615
616 616 static void
617 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp)
618 618 {
619 619 v_req_t *vreq = BP_VREQ(bp);
620 620
621 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
622 622
623 623 if (vdp->xdf_xdev_iostat == NULL)
624 624 return;
625 625 if ((vreq != NULL) && vreq->v_runq) {
626 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
627 627 } else {
628 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
629 629 }
630 630 }
631 631
632 632 static void
633 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp)
634 634 {
635 635 v_req_t *vreq = BP_VREQ(bp);
636 636
637 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
638 638
639 639 if (vdp->xdf_xdev_iostat == NULL)
640 640 return;
641 641
642 642 if ((vreq != NULL) && vreq->v_runq) {
643 643 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
644 644 } else {
645 645 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
646 646 }
647 647
648 648 if (bp->b_flags & B_READ) {
649 649 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->reads++;
650 650 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nread += bp->b_bcount;
651 651 } else if (bp->b_flags & B_WRITE) {
652 652 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->writes++;
653 653 KSTAT_IO_PTR(vdp->xdf_xdev_iostat)->nwritten += bp->b_bcount;
654 654 }
655 655 }
656 656
657 657 static void
658 658 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp)
659 659 {
660 660 v_req_t *vreq = BP_VREQ(bp);
661 661
662 662 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
663 663 ASSERT(!vreq->v_runq);
664 664
665 665 vreq->v_runq = B_TRUE;
666 666 if (vdp->xdf_xdev_iostat == NULL)
667 667 return;
668 668 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
669 669 }
670 670
671 671 static void
672 672 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp)
673 673 {
674 674 v_req_t *vreq = BP_VREQ(bp);
675 675
676 676 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
677 677 ASSERT(vreq->v_runq);
678 678
679 679 vreq->v_runq = B_FALSE;
680 680 if (vdp->xdf_xdev_iostat == NULL)
681 681 return;
682 682 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
683 683 }
684 684
685 685 int
686 686 xdf_kstat_create(dev_info_t *dip)
687 687 {
688 688 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
689 689 kstat_t *kstat;
690 690 buf_t *bp;
691 691
692 692 if ((kstat = kstat_create("xdf", ddi_get_instance(dip), NULL, "disk",
693 693 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL)
694 694 return (-1);
695 695
696 696 /* See comment about locking in xdf_kstat_delete(). */
697 697 mutex_enter(&vdp->xdf_iostat_lk);
698 698 mutex_enter(&vdp->xdf_dev_lk);
699 699
700 700 /* only one kstat can exist at a time */
701 701 if (vdp->xdf_xdev_iostat != NULL) {
702 702 mutex_exit(&vdp->xdf_dev_lk);
703 703 mutex_exit(&vdp->xdf_iostat_lk);
704 704 kstat_delete(kstat);
705 705 return (-1);
706 706 }
707 707
708 708 vdp->xdf_xdev_iostat = kstat;
709 709 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk;
710 710 kstat_install(vdp->xdf_xdev_iostat);
711 711
712 712 /*
713 713 * Now that we've created a kstat, we need to update the waitq and
714 714 * runq counts for the kstat to reflect our current state.
715 715 *
716 716 * For a buf_t structure to be on the runq, it must have a ring
717 717 * buffer slot associated with it. To get a ring buffer slot the
718 718 * buf must first have a v_req_t and a ge_slot_t associated with it.
719 719 * Then when it is granted a ring buffer slot, v_runq will be set to
720 720 * true.
721 721 *
722 722 * For a buf_t structure to be on the waitq, it must not be on the
723 723 * runq. So to find all the buf_t's that should be on waitq, we
724 724 * walk the active buf list and add any buf_t's which aren't on the
725 725 * runq to the waitq.
726 726 */
727 727 bp = vdp->xdf_f_act;
728 728 while (bp != NULL) {
729 729 xdf_kstat_enter(vdp, bp);
730 730 bp = bp->av_forw;
731 731 }
732 732 if (vdp->xdf_ready_tq_bp != NULL)
733 733 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp);
734 734
735 735 mutex_exit(&vdp->xdf_dev_lk);
736 736 mutex_exit(&vdp->xdf_iostat_lk);
737 737 return (0);
738 738 }
739 739
740 740 void
741 741 xdf_kstat_delete(dev_info_t *dip)
742 742 {
743 743 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
744 744 kstat_t *kstat;
745 745 buf_t *bp;
746 746
747 747 /*
748 748 * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
749 749 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
750 750 * and the contents of the our kstat. xdf_iostat_lk is used
751 751 * to protect the allocation and freeing of the actual kstat.
752 752 * xdf_dev_lk can't be used for this purpose because kstat
753 753 * readers use it to access the contents of the kstat and
754 754 * hence it can't be held when calling kstat_delete().
755 755 */
756 756 mutex_enter(&vdp->xdf_iostat_lk);
757 757 mutex_enter(&vdp->xdf_dev_lk);
758 758
759 759 if (vdp->xdf_xdev_iostat == NULL) {
760 760 mutex_exit(&vdp->xdf_dev_lk);
761 761 mutex_exit(&vdp->xdf_iostat_lk);
762 762 return;
763 763 }
764 764
765 765 /*
766 766 * We're about to destroy the kstat structures, so it isn't really
767 767 * necessary to update the runq and waitq counts. But, since this
768 768 * isn't a hot code path we can afford to be a little pedantic and
769 769 * go ahead and decrement the runq and waitq kstat counters to zero
770 770 * before free'ing them. This helps us ensure that we've gotten all
771 771 * our accounting correct.
772 772 *
773 773 * For an explanation of how we determine which buffers go on the
774 774 * runq vs which go on the waitq, see the comments in
775 775 * xdf_kstat_create().
776 776 */
777 777 bp = vdp->xdf_f_act;
778 778 while (bp != NULL) {
779 779 xdf_kstat_exit(vdp, bp);
780 780 bp = bp->av_forw;
781 781 }
782 782 if (vdp->xdf_ready_tq_bp != NULL)
783 783 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp);
784 784
785 785 kstat = vdp->xdf_xdev_iostat;
786 786 vdp->xdf_xdev_iostat = NULL;
787 787 mutex_exit(&vdp->xdf_dev_lk);
788 788 kstat_delete(kstat);
789 789 mutex_exit(&vdp->xdf_iostat_lk);
790 790 }
791 791
792 792 /*
793 793 * Add an IO requests onto the active queue.
794 794 *
795 795 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs
796 796 * are used to establish a connection to the backend, so they receive
797 797 * priority over all other IOs. Since xdf_ready_tq_thread only does
798 798 * synchronous IO, there can only be one xdf_ready_tq_thread request at any
799 799 * given time and we record the buf associated with that request in
800 800 * xdf_ready_tq_bp.
801 801 */
802 802 static void
803 803 xdf_bp_push(xdf_t *vdp, buf_t *bp)
804 804 {
805 805 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
806 806 ASSERT(bp->av_forw == NULL);
807 807
808 808 xdf_kstat_enter(vdp, bp);
809 809
810 810 if (curthread == vdp->xdf_ready_tq_thread) {
811 811 /* new IO requests from the ready thread */
812 812 ASSERT(vdp->xdf_ready_tq_bp == NULL);
813 813 vdp->xdf_ready_tq_bp = bp;
814 814 return;
815 815 }
816 816
817 817 /* this is normal IO request */
818 818 ASSERT(bp != vdp->xdf_ready_tq_bp);
819 819
820 820 if (vdp->xdf_f_act == NULL) {
821 821 /* this is only only IO on the active queue */
822 822 ASSERT(vdp->xdf_l_act == NULL);
823 823 ASSERT(vdp->xdf_i_act == NULL);
824 824 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp;
825 825 return;
826 826 }
827 827
828 828 /* add this IO to the tail of the active queue */
829 829 vdp->xdf_l_act->av_forw = bp;
830 830 vdp->xdf_l_act = bp;
831 831 if (vdp->xdf_i_act == NULL)
832 832 vdp->xdf_i_act = bp;
833 833 }
834 834
835 835 static void
836 836 xdf_bp_pop(xdf_t *vdp, buf_t *bp)
837 837 {
838 838 buf_t *bp_iter;
839 839
840 840 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
841 841 ASSERT(VREQ_DONE(BP_VREQ(bp)));
842 842
843 843 if (vdp->xdf_ready_tq_bp == bp) {
844 844 /* we're done with a ready thread IO request */
845 845 ASSERT(bp->av_forw == NULL);
846 846 vdp->xdf_ready_tq_bp = NULL;
847 847 return;
848 848 }
849 849
850 850 /* we're done with a normal IO request */
851 851 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act));
852 852 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act));
853 853 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act)));
854 854 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act);
855 855
856 856 if (bp == vdp->xdf_f_act) {
857 857 /* This IO was at the head of our active queue. */
858 858 vdp->xdf_f_act = bp->av_forw;
859 859 if (bp == vdp->xdf_l_act)
860 860 vdp->xdf_l_act = NULL;
861 861 } else {
862 862 /* There IO finished before some other pending IOs. */
863 863 bp_iter = vdp->xdf_f_act;
864 864 while (bp != bp_iter->av_forw) {
865 865 bp_iter = bp_iter->av_forw;
866 866 ASSERT(VREQ_DONE(BP_VREQ(bp_iter)));
867 867 ASSERT(bp_iter != vdp->xdf_i_act);
868 868 }
869 869 bp_iter->av_forw = bp->av_forw;
870 870 if (bp == vdp->xdf_l_act)
871 871 vdp->xdf_l_act = bp_iter;
872 872 }
873 873 bp->av_forw = NULL;
874 874 }
875 875
876 876 static buf_t *
877 877 xdf_bp_next(xdf_t *vdp)
878 878 {
879 879 v_req_t *vreq;
880 880 buf_t *bp;
881 881
882 882 if (vdp->xdf_state == XD_CONNECTED) {
883 883 /*
884 884 * If we're in the XD_CONNECTED state, we only service IOs
885 885 * from the xdf_ready_tq_thread thread.
886 886 */
887 887 if ((bp = vdp->xdf_ready_tq_bp) == NULL)
888 888 return (NULL);
889 889 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
890 890 return (bp);
891 891 return (NULL);
892 892 }
893 893
894 894 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
895 895 if (vdp->xdf_state != XD_READY)
896 896 return (NULL);
897 897
898 898 ASSERT(vdp->xdf_ready_tq_bp == NULL);
899 899 for (;;) {
900 900 if ((bp = vdp->xdf_i_act) == NULL)
901 901 return (NULL);
902 902 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
903 903 return (bp);
904 904
905 905 /* advance the active buf index pointer */
906 906 vdp->xdf_i_act = bp->av_forw;
907 907 }
908 908 }
909 909
910 910 static void
911 911 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr)
912 912 {
913 913 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id;
914 914 v_req_t *vreq = gs->gs_vreq;
915 915 buf_t *bp = vreq->v_buf;
916 916
917 917 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
918 918 ASSERT(BP_VREQ(bp) == vreq);
919 919
920 920 gs_free(gs);
921 921
922 922 if (bioerr != 0)
923 923 bioerror(bp, bioerr);
924 924 ASSERT(vreq->v_nslots > 0);
925 925 if (--vreq->v_nslots > 0)
926 926 return;
927 927
928 928 /* remove this IO from our active queue */
929 929 xdf_bp_pop(vdp, bp);
930 930
931 931 ASSERT(vreq->v_runq);
932 932 xdf_kstat_exit(vdp, bp);
933 933 vreq->v_runq = B_FALSE;
934 934 vreq_free(vdp, vreq);
935 935
936 936 if (IS_ERROR(bp)) {
937 937 xdf_io_err(bp, geterror(bp), 0);
938 938 } else if (bp->b_resid != 0) {
939 939 /* Partial transfers are an error */
940 940 xdf_io_err(bp, EIO, bp->b_resid);
941 941 } else {
942 942 biodone(bp);
943 943 }
944 944 }
945 945
946 946 /*
947 947 * xdf interrupt handler
948 948 */
949 949 static uint_t
950 950 xdf_intr_locked(xdf_t *vdp)
951 951 {
952 952 xendev_ring_t *xbr;
953 953 blkif_response_t *resp;
954 954 int bioerr;
955 955 uint64_t id;
956 956 uint8_t op;
957 957 uint16_t status;
958 958 ddi_acc_handle_t acchdl;
959 959
960 960 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
961 961
962 962 if ((xbr = vdp->xdf_xb_ring) == NULL)
963 963 return (DDI_INTR_UNCLAIMED);
964 964
965 965 acchdl = vdp->xdf_xb_ring_hdl;
966 966
967 967 /*
968 968 * complete all requests which have a response
969 969 */
970 970 while (resp = xvdi_ring_get_response(xbr)) {
971 971 id = ddi_get64(acchdl, &resp->id);
972 972 op = ddi_get8(acchdl, &resp->operation);
973 973 status = ddi_get16(acchdl, (uint16_t *)&resp->status);
974 974 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n",
975 975 op, id, status));
976 976
977 977 if (status != BLKIF_RSP_OKAY) {
978 978 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s",
979 979 vdp->xdf_addr,
980 980 (op == BLKIF_OP_READ) ? "reading" : "writing"));
981 981 bioerr = EIO;
982 982 } else {
983 983 bioerr = 0;
984 984 }
985 985
986 986 xdf_io_fini(vdp, id, bioerr);
987 987 }
988 988 return (DDI_INTR_CLAIMED);
989 989 }
990 990
991 991 /*
992 992 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and
993 993 * block at a lower pil.
994 994 */
995 995 static uint_t
996 996 xdf_intr(caddr_t arg)
997 997 {
998 998 xdf_t *vdp = (xdf_t *)arg;
999 999 int rv;
1000 1000
1001 1001 mutex_enter(&vdp->xdf_dev_lk);
1002 1002 rv = xdf_intr_locked(vdp);
1003 1003 mutex_exit(&vdp->xdf_dev_lk);
1004 1004
1005 1005 if (!do_polled_io)
1006 1006 xdf_io_start(vdp);
1007 1007
1008 1008 return (rv);
1009 1009 }
1010 1010
1011 1011 static void
1012 1012 xdf_ring_push(xdf_t *vdp)
1013 1013 {
1014 1014 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1015 1015
1016 1016 if (vdp->xdf_xb_ring == NULL)
1017 1017 return;
1018 1018
1019 1019 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) {
1020 1020 DPRINTF(IO_DBG, (
1021 1021 "xdf@%s: xdf_ring_push: sent request(s) to backend\n",
1022 1022 vdp->xdf_addr));
1023 1023 }
1024 1024
1025 1025 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN)
1026 1026 xvdi_notify_oe(vdp->xdf_dip);
1027 1027 }
1028 1028
1029 1029 static int
1030 1030 xdf_ring_drain_locked(xdf_t *vdp)
1031 1031 {
1032 1032 int pollc, rv = 0;
1033 1033
1034 1034 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1035 1035
1036 1036 if (xdf_debug & SUSRES_DBG)
1037 1037 xen_printf("xdf_ring_drain: start\n");
1038 1038
1039 1039 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) {
1040 1040 if (vdp->xdf_xb_ring == NULL)
1041 1041 goto out;
1042 1042
1043 1043 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
1044 1044 (void) xdf_intr_locked(vdp);
1045 1045 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring))
1046 1046 goto out;
1047 1047 xdf_ring_push(vdp);
1048 1048
1049 1049 /* file-backed devices can be slow */
1050 1050 mutex_exit(&vdp->xdf_dev_lk);
1051 1051 #ifdef XPV_HVM_DRIVER
1052 1052 (void) HYPERVISOR_yield();
1053 1053 #endif /* XPV_HVM_DRIVER */
1054 1054 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY));
1055 1055 mutex_enter(&vdp->xdf_dev_lk);
1056 1056 }
1057 1057 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr);
1058 1058
1059 1059 out:
1060 1060 if (vdp->xdf_xb_ring != NULL) {
1061 1061 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) ||
1062 1062 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
1063 1063 rv = EIO;
1064 1064 }
1065 1065 if (xdf_debug & SUSRES_DBG)
1066 1066 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
1067 1067 vdp->xdf_addr, rv);
1068 1068 return (rv);
1069 1069 }
1070 1070
1071 1071 static int
1072 1072 xdf_ring_drain(xdf_t *vdp)
1073 1073 {
1074 1074 int rv;
1075 1075 mutex_enter(&vdp->xdf_dev_lk);
1076 1076 rv = xdf_ring_drain_locked(vdp);
1077 1077 mutex_exit(&vdp->xdf_dev_lk);
1078 1078 return (rv);
1079 1079 }
1080 1080
1081 1081 /*
1082 1082 * Destroy all v_req_t, grant table entries, and our ring buffer.
1083 1083 */
1084 1084 static void
1085 1085 xdf_ring_destroy(xdf_t *vdp)
1086 1086 {
1087 1087 v_req_t *vreq;
1088 1088 buf_t *bp;
1089 1089 ge_slot_t *gs;
1090 1090
1091 1091 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1092 1092 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1093 1093
1094 1094 if ((vdp->xdf_state != XD_INIT) &&
1095 1095 (vdp->xdf_state != XD_CONNECTED) &&
1096 1096 (vdp->xdf_state != XD_READY)) {
1097 1097 ASSERT(vdp->xdf_xb_ring == NULL);
1098 1098 ASSERT(vdp->xdf_xb_ring_hdl == NULL);
1099 1099 ASSERT(vdp->xdf_peer == INVALID_DOMID);
1100 1100 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN);
1101 1101 ASSERT(list_is_empty(&vdp->xdf_vreq_act));
1102 1102 return;
1103 1103 }
1104 1104
1105 1105 /*
1106 1106 * We don't want to receive async notifications from the backend
1107 1107 * when it finishes processing ring entries.
1108 1108 */
1109 1109 #ifdef XPV_HVM_DRIVER
1110 1110 ec_unbind_evtchn(vdp->xdf_evtchn);
1111 1111 #else /* !XPV_HVM_DRIVER */
1112 1112 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
1113 1113 #endif /* !XPV_HVM_DRIVER */
1114 1114
1115 1115 /*
1116 1116 * Drain any requests in the ring. We need to do this before we
1117 1117 * can free grant table entries, because if active ring entries
1118 1118 * point to grants, then the backend could be trying to access
1119 1119 * those grants.
1120 1120 */
1121 1121 (void) xdf_ring_drain_locked(vdp);
1122 1122
1123 1123 /* We're done talking to the backend so free up our event channel */
1124 1124 xvdi_free_evtchn(vdp->xdf_dip);
1125 1125 vdp->xdf_evtchn = INVALID_EVTCHN;
1126 1126
1127 1127 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) {
1128 1128 bp = vreq->v_buf;
1129 1129 ASSERT(BP_VREQ(bp) == vreq);
1130 1130
1131 1131 /* Free up any grant table entries associaed with this IO */
1132 1132 while ((gs = list_head(&vreq->v_gs)) != NULL)
1133 1133 gs_free(gs);
1134 1134
1135 1135 /* If this IO was on the runq, move it back to the waitq. */
1136 1136 if (vreq->v_runq)
1137 1137 xdf_kstat_runq_to_waitq(vdp, bp);
1138 1138
1139 1139 /*
1140 1140 * Reset any buf IO state since we're going to re-issue the
1141 1141 * IO when we reconnect.
1142 1142 */
1143 1143 vreq_free(vdp, vreq);
1144 1144 BP_VREQ_SET(bp, NULL);
1145 1145 bioerror(bp, 0);
1146 1146 }
1147 1147
1148 1148 /* reset the active queue index pointer */
1149 1149 vdp->xdf_i_act = vdp->xdf_f_act;
1150 1150
1151 1151 /* Destroy the ring */
1152 1152 xvdi_free_ring(vdp->xdf_xb_ring);
1153 1153 vdp->xdf_xb_ring = NULL;
1154 1154 vdp->xdf_xb_ring_hdl = NULL;
1155 1155 vdp->xdf_peer = INVALID_DOMID;
1156 1156 }
1157 1157
1158 1158 void
1159 1159 xdfmin(struct buf *bp)
1160 1160 {
1161 1161 if (bp->b_bcount > xdf_maxphys)
1162 1162 bp->b_bcount = xdf_maxphys;
1163 1163 }
1164 1164
1165 1165 /*
1166 1166 * Check if we have a pending "eject" media request.
1167 1167 */
1168 1168 static int
1169 1169 xdf_eject_pending(xdf_t *vdp)
1170 1170 {
1171 1171 dev_info_t *dip = vdp->xdf_dip;
1172 1172 char *xsname, *str;
1173 1173
1174 1174 if (!vdp->xdf_media_req_supported)
1175 1175 return (B_FALSE);
1176 1176
1177 1177 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1178 1178 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0))
1179 1179 return (B_FALSE);
1180 1180
1181 1181 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
1182 1182 strfree(str);
1183 1183 return (B_FALSE);
1184 1184 }
1185 1185 strfree(str);
1186 1186 return (B_TRUE);
1187 1187 }
1188 1188
1189 1189 /*
1190 1190 * Generate a media request.
1191 1191 */
1192 1192 static int
1193 1193 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required)
1194 1194 {
1195 1195 dev_info_t *dip = vdp->xdf_dip;
1196 1196 char *xsname;
1197 1197
1198 1198 /*
1199 1199 * we can't be holding xdf_dev_lk because xenbus_printf() can
1200 1200 * block while waiting for a PIL 1 interrupt message. this
1201 1201 * would cause a deadlock with xdf_intr() which needs to grab
1202 1202 * xdf_dev_lk as well and runs at PIL 5.
1203 1203 */
1204 1204 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1205 1205 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1206 1206
1207 1207 if ((xsname = xvdi_get_xsname(dip)) == NULL)
1208 1208 return (ENXIO);
1209 1209
1210 1210 /* Check if we support media requests */
1211 1211 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported)
1212 1212 return (ENOTTY);
1213 1213
1214 1214 /* If an eject is pending then don't allow any new requests */
1215 1215 if (xdf_eject_pending(vdp))
1216 1216 return (ENXIO);
1217 1217
1218 1218 /* Make sure that there is media present */
1219 1219 if (media_required && (vdp->xdf_xdev_nblocks == 0))
1220 1220 return (ENXIO);
1221 1221
1222 1222 /* We only allow operations when the device is ready and connected */
1223 1223 if (vdp->xdf_state != XD_READY)
1224 1224 return (EIO);
1225 1225
1226 1226 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0)
1227 1227 return (EIO);
1228 1228
1229 1229 return (0);
1230 1230 }
1231 1231
1232 1232 /*
1233 1233 * populate a single blkif_request_t w/ a buf
1234 1234 */
1235 1235 static void
1236 1236 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
1237 1237 {
1238 1238 grant_ref_t gr;
1239 1239 uint8_t fsect, lsect;
1240 1240 size_t bcnt;
1241 1241 paddr_t dma_addr;
1242 1242 off_t blk_off;
1243 1243 dev_info_t *dip = vdp->xdf_dip;
1244 1244 blkif_vdev_t vdev = xvdi_get_vdevnum(dip);
1245 1245 v_req_t *vreq = BP_VREQ(bp);
1246 1246 uint64_t blkno = vreq->v_blkno;
1247 1247 uint_t ndmacs = vreq->v_ndmacs;
1248 1248 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl;
1249 1249 int seg = 0;
1250 1250 int isread = IS_READ(bp);
1251 1251 ge_slot_t *gs = list_head(&vreq->v_gs);
1252 1252
1253 1253 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1254 1254 ASSERT(vreq->v_status == VREQ_GS_ALLOCED);
1255 1255
1256 1256 if (isread)
1257 1257 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ);
1258 1258 else {
1259 1259 switch (vreq->v_flush_diskcache) {
1260 1260 case FLUSH_DISKCACHE:
1261 1261 ddi_put8(acchdl, &rreq->operation,
1262 1262 BLKIF_OP_FLUSH_DISKCACHE);
1263 1263 ddi_put16(acchdl, &rreq->handle, vdev);
1264 1264 ddi_put64(acchdl, &rreq->id,
1265 1265 (uint64_t)(uintptr_t)(gs));
1266 1266 ddi_put8(acchdl, &rreq->nr_segments, 0);
1267 1267 vreq->v_status = VREQ_DMAWIN_DONE;
1268 1268 return;
1269 1269 case WRITE_BARRIER:
1270 1270 ddi_put8(acchdl, &rreq->operation,
1271 1271 BLKIF_OP_WRITE_BARRIER);
1272 1272 break;
1273 1273 default:
1274 1274 if (!vdp->xdf_wce)
1275 1275 ddi_put8(acchdl, &rreq->operation,
1276 1276 BLKIF_OP_WRITE_BARRIER);
1277 1277 else
1278 1278 ddi_put8(acchdl, &rreq->operation,
1279 1279 BLKIF_OP_WRITE);
1280 1280 break;
1281 1281 }
1282 1282 }
1283 1283
1284 1284 ddi_put16(acchdl, &rreq->handle, vdev);
1285 1285 ddi_put64(acchdl, &rreq->sector_number, blkno);
1286 1286 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs));
1287 1287
1288 1288 /*
1289 1289 * loop until all segments are populated or no more dma cookie in buf
1290 1290 */
1291 1291 for (;;) {
1292 1292 /*
1293 1293 * Each segment of a blkif request can transfer up to
1294 1294 * one 4K page of data.
1295 1295 */
1296 1296 bcnt = vreq->v_dmac.dmac_size;
1297 1297 dma_addr = vreq->v_dmac.dmac_laddress;
1298 1298 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr);
1299 1299 fsect = blk_off >> XB_BSHIFT;
1300 1300 lsect = fsect + (bcnt >> XB_BSHIFT) - 1;
1301 1301
1302 1302 ASSERT(bcnt <= PAGESIZE);
1303 1303 ASSERT((bcnt % XB_BSIZE) == 0);
1304 1304 ASSERT((blk_off & XB_BMASK) == 0);
1305 1305 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE &&
1306 1306 lsect < XB_MAX_SEGLEN / XB_BSIZE);
1307 1307
1308 1308 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT);
1309 1309 ddi_put32(acchdl, &rreq->seg[seg].gref, gr);
1310 1310 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect);
1311 1311 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect);
1312 1312
1313 1313 DPRINTF(IO_DBG, (
1314 1314 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
1315 1315 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off));
1316 1316 DPRINTF(IO_DBG, (
1317 1317 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n",
1318 1318 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr));
1319 1319
1320 1320 blkno += (bcnt >> XB_BSHIFT);
1321 1321 seg++;
1322 1322 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
1323 1323 if (--ndmacs) {
1324 1324 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac);
1325 1325 continue;
1326 1326 }
1327 1327
1328 1328 vreq->v_status = VREQ_DMAWIN_DONE;
1329 1329 vreq->v_blkno = blkno;
1330 1330 break;
1331 1331 }
1332 1332 ddi_put8(acchdl, &rreq->nr_segments, seg);
1333 1333 DPRINTF(IO_DBG, (
1334 1334 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n",
1335 1335 vdp->xdf_addr, rreq->id));
1336 1336 }
1337 1337
1338 1338 static void
1339 1339 xdf_io_start(xdf_t *vdp)
1340 1340 {
1341 1341 struct buf *bp;
1342 1342 v_req_t *vreq;
1343 1343 blkif_request_t *rreq;
1344 1344 boolean_t rreqready = B_FALSE;
1345 1345
1346 1346 mutex_enter(&vdp->xdf_dev_lk);
1347 1347
1348 1348 /*
1349 1349 * Populate the ring request(s). Loop until there is no buf to
1350 1350 * transfer or no free slot available in I/O ring.
1351 1351 */
1352 1352 for (;;) {
1353 1353 /* don't start any new IO if we're suspending */
1354 1354 if (vdp->xdf_suspending)
1355 1355 break;
1356 1356 if ((bp = xdf_bp_next(vdp)) == NULL)
1357 1357 break;
1358 1358
1359 1359 /* if the buf doesn't already have a vreq, allocate one */
1360 1360 if (((vreq = BP_VREQ(bp)) == NULL) &&
1361 1361 ((vreq = vreq_get(vdp, bp)) == NULL))
1362 1362 break;
1363 1363
1364 1364 /* alloc DMA/GTE resources */
1365 1365 if (vreq_setup(vdp, vreq) != DDI_SUCCESS)
1366 1366 break;
1367 1367
1368 1368 /* get next blkif_request in the ring */
1369 1369 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL)
1370 1370 break;
1371 1371 bzero(rreq, sizeof (blkif_request_t));
1372 1372 rreqready = B_TRUE;
1373 1373
1374 1374 /* populate blkif_request with this buf */
1375 1375 xdf_process_rreq(vdp, bp, rreq);
1376 1376
1377 1377 /*
1378 1378 * This buffer/vreq pair is has been allocated a ring buffer
1379 1379 * resources, so if it isn't already in our runq, add it.
1380 1380 */
1381 1381 if (!vreq->v_runq)
1382 1382 xdf_kstat_waitq_to_runq(vdp, bp);
1383 1383 }
1384 1384
1385 1385 /* Send the request(s) to the backend */
1386 1386 if (rreqready)
1387 1387 xdf_ring_push(vdp);
1388 1388
1389 1389 mutex_exit(&vdp->xdf_dev_lk);
1390 1390 }
1391 1391
1392 1392
1393 1393 /* check if partition is open, -1 - check all partitions on the disk */
1394 1394 static boolean_t
1395 1395 xdf_isopen(xdf_t *vdp, int partition)
1396 1396 {
1397 1397 int i;
1398 1398 ulong_t parbit;
1399 1399 boolean_t rval = B_FALSE;
1400 1400
1401 1401 ASSERT((partition == -1) ||
1402 1402 ((partition >= 0) || (partition < XDF_PEXT)));
1403 1403
1404 1404 if (partition == -1)
1405 1405 parbit = (ulong_t)-1;
1406 1406 else
1407 1407 parbit = 1 << partition;
1408 1408
1409 1409 for (i = 0; i < OTYPCNT; i++) {
1410 1410 if (vdp->xdf_vd_open[i] & parbit)
1411 1411 rval = B_TRUE;
1412 1412 }
1413 1413
1414 1414 return (rval);
1415 1415 }
1416 1416
1417 1417 /*
1418 1418 * The connection should never be closed as long as someone is holding
1419 1419 * us open, there is pending IO, or someone is waiting waiting for a
1420 1420 * connection.
1421 1421 */
1422 1422 static boolean_t
1423 1423 xdf_busy(xdf_t *vdp)
1424 1424 {
1425 1425 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1426 1426
1427 1427 if ((vdp->xdf_xb_ring != NULL) &&
1428 1428 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
1429 1429 ASSERT(vdp->xdf_state != XD_CLOSED);
1430 1430 return (B_TRUE);
1431 1431 }
1432 1432
1433 1433 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) {
1434 1434 ASSERT(vdp->xdf_state != XD_CLOSED);
1435 1435 return (B_TRUE);
1436 1436 }
1437 1437
1438 1438 if (xdf_isopen(vdp, -1)) {
1439 1439 ASSERT(vdp->xdf_state != XD_CLOSED);
1440 1440 return (B_TRUE);
1441 1441 }
1442 1442
1443 1443 if (vdp->xdf_connect_req > 0) {
1444 1444 ASSERT(vdp->xdf_state != XD_CLOSED);
1445 1445 return (B_TRUE);
1446 1446 }
1447 1447
1448 1448 return (B_FALSE);
1449 1449 }
1450 1450
1451 1451 static void
1452 1452 xdf_set_state(xdf_t *vdp, xdf_state_t new_state)
1453 1453 {
1454 1454 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1455 1455 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1456 1456 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n",
1457 1457 vdp->xdf_addr, vdp->xdf_state, new_state));
1458 1458 vdp->xdf_state = new_state;
1459 1459 cv_broadcast(&vdp->xdf_dev_cv);
1460 1460 }
1461 1461
1462 1462 static void
1463 1463 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet)
1464 1464 {
1465 1465 dev_info_t *dip = vdp->xdf_dip;
1466 1466 boolean_t busy;
1467 1467
1468 1468 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1469 1469 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1470 1470 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED));
1471 1471
1472 1472 /* Check if we're already there. */
1473 1473 if (vdp->xdf_state == new_state)
1474 1474 return;
1475 1475
1476 1476 mutex_enter(&vdp->xdf_dev_lk);
1477 1477 busy = xdf_busy(vdp);
1478 1478
1479 1479 /* If we're already closed then there's nothing todo. */
1480 1480 if (vdp->xdf_state == XD_CLOSED) {
1481 1481 ASSERT(!busy);
1482 1482 xdf_set_state(vdp, new_state);
1483 1483 mutex_exit(&vdp->xdf_dev_lk);
1484 1484 return;
1485 1485 }
1486 1486
1487 1487 #ifdef DEBUG
1488 1488 /* UhOh. Warn the user that something bad has happened. */
1489 1489 if (!quiet && busy && (vdp->xdf_state == XD_READY) &&
1490 1490 (vdp->xdf_xdev_nblocks != 0)) {
1491 1491 cmn_err(CE_WARN, "xdf@%s: disconnected while in use",
1492 1492 vdp->xdf_addr);
1493 1493 }
1494 1494 #endif /* DEBUG */
1495 1495
1496 1496 xdf_ring_destroy(vdp);
1497 1497
1498 1498 /* If we're busy then we can only go into the unknown state */
1499 1499 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state);
1500 1500 mutex_exit(&vdp->xdf_dev_lk);
1501 1501
1502 1502 /* if we're closed now, let the other end know */
1503 1503 if (vdp->xdf_state == XD_CLOSED)
1504 1504 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1505 1505 }
1506 1506
1507 1507
1508 1508 /*
1509 1509 * Kick-off connect process
1510 1510 * Status should be XD_UNKNOWN or XD_CLOSED
1511 1511 * On success, status will be changed to XD_INIT
1512 1512 * On error, it will be changed to XD_UNKNOWN
1513 1513 */
1514 1514 static int
1515 1515 xdf_setstate_init(xdf_t *vdp)
1516 1516 {
1517 1517 dev_info_t *dip = vdp->xdf_dip;
1518 1518 xenbus_transaction_t xbt;
1519 1519 grant_ref_t gref;
1520 1520 char *xsname, *str;
1521 1521 int rv;
1522 1522
1523 1523 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1524 1524 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1525 1525 ASSERT((vdp->xdf_state == XD_UNKNOWN) ||
1526 1526 (vdp->xdf_state == XD_CLOSED));
1527 1527
1528 1528 DPRINTF(DDI_DBG,
1529 1529 ("xdf@%s: starting connection process\n", vdp->xdf_addr));
1530 1530
1531 1531 /*
1532 1532 * If an eject is pending then don't allow a new connection.
1533 1533 * (Only the backend can clear media request eject request.)
1534 1534 */
1535 1535 if (xdf_eject_pending(vdp))
1536 1536 return (DDI_FAILURE);
1537 1537
1538 1538 if ((xsname = xvdi_get_xsname(dip)) == NULL)
1539 1539 goto errout;
1540 1540
1541 1541 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID)
1542 1542 goto errout;
1543 1543
1544 1544 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising);
1545 1545
1546 1546 /*
1547 1547 * Sanity check for the existance of the xenbus device-type property.
1548 1548 * This property might not exist if our xenbus device nodes were
1549 1549 * force destroyed while we were still connected to the backend.
1550 1550 */
1551 1551 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0)
1552 1552 goto errout;
1553 1553 strfree(str);
1554 1554
1555 1555 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS)
1556 1556 goto errout;
1557 1557
1558 1558 vdp->xdf_evtchn = xvdi_get_evtchn(dip);
1559 1559 #ifdef XPV_HVM_DRIVER
1560 1560 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp);
1561 1561 #else /* !XPV_HVM_DRIVER */
1562 1562 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
1563 1563 DDI_SUCCESS) {
1564 1564 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: "
1565 1565 "failed to add intr handler", vdp->xdf_addr);
1566 1566 goto errout1;
1567 1567 }
1568 1568 #endif /* !XPV_HVM_DRIVER */
1569 1569
1570 1570 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE,
1571 1571 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
1572 1572 DDI_SUCCESS) {
1573 1573 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring",
1574 1574 vdp->xdf_addr);
1575 1575 goto errout2;
1576 1576 }
1577 1577 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */
1578 1578
1579 1579 /*
1580 1580 * Write into xenstore the info needed by backend
1581 1581 */
1582 1582 trans_retry:
1583 1583 if (xenbus_transaction_start(&xbt)) {
1584 1584 cmn_err(CE_WARN, "xdf@%s: failed to start transaction",
1585 1585 vdp->xdf_addr);
1586 1586 xvdi_fatal_error(dip, EIO, "connect transaction init");
1587 1587 goto fail_trans;
1588 1588 }
1589 1589
1590 1590 /*
1591 1591 * XBP_PROTOCOL is written by the domain builder in the case of PV
1592 1592 * domains. However, it is not written for HVM domains, so let's
1593 1593 * write it here.
1594 1594 */
1595 1595 if (((rv = xenbus_printf(xbt, xsname,
1596 1596 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) ||
1597 1597 ((rv = xenbus_printf(xbt, xsname,
1598 1598 XBP_RING_REF, "%u", gref)) != 0) ||
1599 1599 ((rv = xenbus_printf(xbt, xsname,
1600 1600 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) ||
1601 1601 ((rv = xenbus_printf(xbt, xsname,
1602 1602 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) ||
1603 1603 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) {
1604 1604 (void) xenbus_transaction_end(xbt, 1);
1605 1605 xvdi_fatal_error(dip, rv, "connect transaction setup");
1606 1606 goto fail_trans;
1607 1607 }
1608 1608
1609 1609 /* kick-off connect process */
1610 1610 if (rv = xenbus_transaction_end(xbt, 0)) {
1611 1611 if (rv == EAGAIN)
1612 1612 goto trans_retry;
1613 1613 xvdi_fatal_error(dip, rv, "connect transaction commit");
1614 1614 goto fail_trans;
1615 1615 }
1616 1616
1617 1617 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1618 1618 mutex_enter(&vdp->xdf_dev_lk);
1619 1619 xdf_set_state(vdp, XD_INIT);
1620 1620 mutex_exit(&vdp->xdf_dev_lk);
1621 1621
1622 1622 return (DDI_SUCCESS);
1623 1623
1624 1624 fail_trans:
1625 1625 xvdi_free_ring(vdp->xdf_xb_ring);
1626 1626 errout2:
1627 1627 #ifdef XPV_HVM_DRIVER
1628 1628 ec_unbind_evtchn(vdp->xdf_evtchn);
1629 1629 #else /* !XPV_HVM_DRIVER */
1630 1630 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
1631 1631 #endif /* !XPV_HVM_DRIVER */
1632 1632 errout1:
1633 1633 xvdi_free_evtchn(dip);
1634 1634 vdp->xdf_evtchn = INVALID_EVTCHN;
1635 1635 errout:
1636 1636 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1637 1637 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend",
1638 1638 vdp->xdf_addr);
1639 1639 return (DDI_FAILURE);
1640 1640 }
1641 1641
1642 1642 int
1643 1643 xdf_get_flush_block(xdf_t *vdp)
1644 1644 {
1645 1645 /*
1646 1646 * Get a DEV_BSIZE aligned bufer
1647 1647 */
1648 1648 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP);
1649 1649 vdp->xdf_cache_flush_block =
1650 1650 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem),
1651 1651 (int)vdp->xdf_xdev_secsize);
1652 1652
1653 1653 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block,
1654 1654 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0)
1655 1655 return (DDI_FAILURE);
1656 1656 return (DDI_SUCCESS);
1657 1657 }
1658 1658
1659 1659 static void
1660 1660 xdf_setstate_ready(void *arg)
1661 1661 {
1662 1662 xdf_t *vdp = (xdf_t *)arg;
1663 1663 dev_info_t *dip = vdp->xdf_dip;
1664 1664
1665 1665 vdp->xdf_ready_tq_thread = curthread;
1666 1666
1667 1667 /* Create minor nodes now when we are almost connected */
1668 1668 mutex_enter(&vdp->xdf_dev_lk);
1669 1669 if (vdp->xdf_cmlb_reattach) {
1670 1670 vdp->xdf_cmlb_reattach = B_FALSE;
1671 1671 mutex_exit(&vdp->xdf_dev_lk);
1672 1672 if (xdf_cmlb_attach(vdp) != 0) {
1673 1673 cmn_err(CE_WARN,
1674 1674 "xdf@%s: cmlb attach failed",
1675 1675 ddi_get_name_addr(dip));
1676 1676 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1677 1677 return;
1678 1678 }
1679 1679 mutex_enter(&vdp->xdf_dev_lk);
1680 1680 }
1681 1681
1682 1682 /* If we're not still trying to get to the ready state, then bail. */
1683 1683 if (vdp->xdf_state != XD_CONNECTED) {
1684 1684 mutex_exit(&vdp->xdf_dev_lk);
1685 1685 return;
1686 1686 }
1687 1687 mutex_exit(&vdp->xdf_dev_lk);
1688 1688
1689 1689 /*
1690 1690 * If backend has feature-barrier, see if it supports disk
1691 1691 * cache flush op.
1692 1692 */
1693 1693 vdp->xdf_flush_supported = B_FALSE;
1694 1694 if (vdp->xdf_feature_barrier) {
1695 1695 /*
1696 1696 * Pretend we already know flush is supported so probe
1697 1697 * will attempt the correct op.
1698 1698 */
1699 1699 vdp->xdf_flush_supported = B_TRUE;
1700 1700 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) {
1701 1701 vdp->xdf_flush_supported = B_TRUE;
1702 1702 } else {
1703 1703 vdp->xdf_flush_supported = B_FALSE;
1704 1704 /*
1705 1705 * If the other end does not support the cache flush op
1706 1706 * then we must use a barrier-write to force disk
1707 1707 * cache flushing. Barrier writes require that a data
1708 1708 * block actually be written.
1709 1709 * Cache a block to barrier-write when we are
1710 1710 * asked to perform a flush.
1711 1711 * XXX - would it be better to just copy 1 block
1712 1712 * (512 bytes) from whatever write we did last
1713 1713 * and rewrite that block?
1714 1714 */
1715 1715 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) {
1716 1716 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1717 1717 return;
1718 1718 }
1719 1719 }
1720 1720 }
1721 1721
1722 1722 mutex_enter(&vdp->xdf_cb_lk);
1723 1723 mutex_enter(&vdp->xdf_dev_lk);
1724 1724 if (vdp->xdf_state == XD_CONNECTED)
1725 1725 xdf_set_state(vdp, XD_READY);
1726 1726 mutex_exit(&vdp->xdf_dev_lk);
1727 1727
1728 1728 /* Restart any currently queued up io */
1729 1729 xdf_io_start(vdp);
1730 1730
1731 1731 mutex_exit(&vdp->xdf_cb_lk);
1732 1732 }
1733 1733
1734 1734 /*
1735 1735 * synthetic geometry
1736 1736 */
1737 1737 #define XDF_NSECTS 256
1738 1738 #define XDF_NHEADS 16
1739 1739
1740 1740 static void
1741 1741 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp)
1742 1742 {
1743 1743 xdf_t *vdp;
1744 1744 uint_t ncyl;
1745 1745
1746 1746 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
1747 1747
1748 1748 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS);
1749 1749
1750 1750 bzero(geomp, sizeof (*geomp));
1751 1751 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl;
1752 1752 geomp->g_acyl = 0;
1753 1753 geomp->g_nhead = XDF_NHEADS;
1754 1754 geomp->g_nsect = XDF_NSECTS;
1755 1755 geomp->g_secsize = vdp->xdf_xdev_secsize;
1756 1756 geomp->g_capacity = vdp->xdf_xdev_nblocks;
1757 1757 geomp->g_intrlv = 0;
1758 1758 geomp->g_rpm = 7200;
1759 1759 }
1760 1760
1761 1761 /*
1762 1762 * Finish other initialization after we've connected to backend
1763 1763 * Status should be XD_INIT before calling this routine
1764 1764 * On success, status should be changed to XD_CONNECTED.
1765 1765 * On error, status should stay XD_INIT
1766 1766 */
1767 1767 static int
1768 1768 xdf_setstate_connected(xdf_t *vdp)
1769 1769 {
1770 1770 dev_info_t *dip = vdp->xdf_dip;
1771 1771 cmlb_geom_t pgeom;
1772 1772 diskaddr_t nblocks = 0;
1773 1773 uint_t secsize = 0;
1774 1774 char *oename, *xsname, *str;
1775 1775 uint_t dinfo;
1776 1776
1777 1777 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1778 1778 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1779 1779 ASSERT(vdp->xdf_state == XD_INIT);
1780 1780
1781 1781 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1782 1782 ((oename = xvdi_get_oename(dip)) == NULL))
1783 1783 return (DDI_FAILURE);
1784 1784
1785 1785 /* Make sure the other end is XenbusStateConnected */
1786 1786 if (xenbus_read_driver_state(oename) != XenbusStateConnected)
1787 1787 return (DDI_FAILURE);
1788 1788
1789 1789 /* Determine if feature barrier is supported by backend */
1790 1790 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB)))
1791 1791 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported",
1792 1792 vdp->xdf_addr);
1793 1793
1794 1794 /*
1795 1795 * Probe backend. Read the device size into xdf_xdev_nblocks
1796 1796 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
1797 1797 * flags in xdf_dinfo. If the emulated device type is "cdrom",
1798 1798 * we always set VDISK_CDROM, regardless of if it's present in
1799 1799 * the xenbus info parameter.
1800 1800 */
1801 1801 if (xenbus_gather(XBT_NULL, oename,
1802 1802 XBP_SECTORS, "%"SCNu64, &nblocks,
1803 1803 XBP_SECTOR_SIZE, "%u", &secsize,
1804 1804 XBP_INFO, "%u", &dinfo,
1805 1805 NULL) != 0) {
1806 1806 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
1807 1807 "cannot read backend info", vdp->xdf_addr);
1808 1808 return (DDI_FAILURE);
1809 1809 }
1810 1810 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
1811 1811 cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
1812 1812 vdp->xdf_addr);
1813 1813 return (DDI_FAILURE);
1814 1814 }
1815 1815 if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
1816 1816 dinfo |= VDISK_CDROM;
1817 1817 strfree(str);
1818 1818
1819 1819 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE)))
1820 1820 secsize = DEV_BSIZE;
1821 1821 vdp->xdf_xdev_nblocks = nblocks;
1822 1822 vdp->xdf_xdev_secsize = secsize;
1823 1823 #ifdef _ILP32
1824 1824 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
1825 1825 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
1826 1826 "backend disk device too large with %llu blocks for"
1827 1827 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks);
1828 1828 xvdi_fatal_error(dip, EFBIG, "reading backend info");
1829 1829 return (DDI_FAILURE);
1830 1830 }
1831 1831 #endif
1832 1832
1833 1833 /*
1834 1834 * If the physical geometry for a fixed disk has been explicity
1835 1835 * set then make sure that the specified physical geometry isn't
1836 1836 * larger than the device we connected to.
1837 1837 */
1838 1838 if (vdp->xdf_pgeom_fixed &&
1839 1839 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) {
1840 1840 cmn_err(CE_WARN,
1841 1841 "xdf@%s: connect failed, fixed geometry too large",
1842 1842 vdp->xdf_addr);
1843 1843 return (DDI_FAILURE);
1844 1844 }
1845 1845
1846 1846 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP);
1847 1847
1848 1848 /* mark vbd is ready for I/O */
1849 1849 mutex_enter(&vdp->xdf_dev_lk);
1850 1850 xdf_set_state(vdp, XD_CONNECTED);
1851 1851
1852 1852 /* check if the cmlb label should be updated */
1853 1853 xdf_synthetic_pgeom(dip, &pgeom);
1854 1854 if ((vdp->xdf_dinfo != dinfo) ||
1855 1855 (!vdp->xdf_pgeom_fixed &&
1856 1856 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) {
1857 1857 vdp->xdf_cmlb_reattach = B_TRUE;
1858 1858
1859 1859 vdp->xdf_dinfo = dinfo;
1860 1860 if (!vdp->xdf_pgeom_fixed)
1861 1861 vdp->xdf_pgeom = pgeom;
1862 1862 }
1863 1863
1864 1864 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) {
1865 1865 if (vdp->xdf_xdev_nblocks == 0) {
1866 1866 vdp->xdf_mstate = DKIO_EJECTED;
1867 1867 cv_broadcast(&vdp->xdf_mstate_cv);
1868 1868 } else {
1869 1869 vdp->xdf_mstate = DKIO_INSERTED;
1870 1870 cv_broadcast(&vdp->xdf_mstate_cv);
1871 1871 }
1872 1872 } else {
1873 1873 if (vdp->xdf_mstate != DKIO_NONE) {
1874 1874 vdp->xdf_mstate = DKIO_NONE;
1875 1875 cv_broadcast(&vdp->xdf_mstate_cv);
1876 1876 }
1877 1877 }
1878 1878
1879 1879 mutex_exit(&vdp->xdf_dev_lk);
1880 1880
1881 1881 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr,
1882 1882 (uint64_t)vdp->xdf_xdev_nblocks);
1883 1883
1884 1884 /* Restart any currently queued up io */
1885 1885 xdf_io_start(vdp);
1886 1886
1887 1887 /*
1888 1888 * To get to the ready state we have to do IO to the backend device,
1889 1889 * but we can't initiate IO from the other end change callback thread
1890 1890 * (which is the current context we're executing in.) This is because
1891 1891 * if the other end disconnects while we're doing IO from the callback
1892 1892 * thread, then we can't receive that disconnect event and we hang
1893 1893 * waiting for an IO that can never complete.
1894 1894 */
1895 1895 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp,
1896 1896 DDI_SLEEP);
1897 1897
1898 1898 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
1899 1899 return (DDI_SUCCESS);
1900 1900 }
1901 1901
1902 1902 /*ARGSUSED*/
1903 1903 static void
1904 1904 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data)
1905 1905 {
1906 1906 XenbusState new_state = *(XenbusState *)impl_data;
1907 1907 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
1908 1908
1909 1909 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n",
1910 1910 vdp->xdf_addr, new_state));
1911 1911
1912 1912 mutex_enter(&vdp->xdf_cb_lk);
1913 1913
1914 1914 /* We assume that this callback is single threaded */
1915 1915 ASSERT(vdp->xdf_oe_change_thread == NULL);
1916 1916 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread);
1917 1917
1918 1918 /* ignore any backend state changes if we're suspending/suspended */
1919 1919 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) {
1920 1920 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
1921 1921 mutex_exit(&vdp->xdf_cb_lk);
1922 1922 return;
1923 1923 }
1924 1924
1925 1925 switch (new_state) {
1926 1926 case XenbusStateUnknown:
1927 1927 case XenbusStateInitialising:
1928 1928 case XenbusStateInitWait:
1929 1929 case XenbusStateInitialised:
1930 1930 if (vdp->xdf_state == XD_INIT)
1931 1931 break;
1932 1932
1933 1933 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1934 1934 if (xdf_setstate_init(vdp) != DDI_SUCCESS)
1935 1935 break;
1936 1936 ASSERT(vdp->xdf_state == XD_INIT);
1937 1937 break;
1938 1938
1939 1939 case XenbusStateConnected:
1940 1940 if ((vdp->xdf_state == XD_CONNECTED) ||
1941 1941 (vdp->xdf_state == XD_READY))
1942 1942 break;
1943 1943
1944 1944 if (vdp->xdf_state != XD_INIT) {
1945 1945 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1946 1946 if (xdf_setstate_init(vdp) != DDI_SUCCESS)
1947 1947 break;
1948 1948 ASSERT(vdp->xdf_state == XD_INIT);
1949 1949 }
1950 1950
1951 1951 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) {
1952 1952 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1953 1953 break;
1954 1954 }
1955 1955 ASSERT(vdp->xdf_state == XD_CONNECTED);
1956 1956 break;
1957 1957
1958 1958 case XenbusStateClosing:
1959 1959 if (xdf_isopen(vdp, -1)) {
1960 1960 cmn_err(CE_NOTE,
1961 1961 "xdf@%s: hot-unplug failed, still in use",
1962 1962 vdp->xdf_addr);
1963 1963 break;
1964 1964 }
1965 1965 /*FALLTHROUGH*/
1966 1966 case XenbusStateClosed:
1967 1967 xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
1968 1968 break;
1969 1969 }
1970 1970
1971 1971 /* notify anybody waiting for oe state change */
1972 1972 cv_broadcast(&vdp->xdf_dev_cv);
1973 1973 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
1974 1974 mutex_exit(&vdp->xdf_cb_lk);
1975 1975 }
1976 1976
1977 1977 static int
1978 1978 xdf_connect_locked(xdf_t *vdp, boolean_t wait)
1979 1979 {
1980 1980 int rv, timeouts = 0, reset = 20;
1981 1981
1982 1982 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1983 1983 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1984 1984
1985 1985 /* we can't connect once we're in the closed state */
1986 1986 if (vdp->xdf_state == XD_CLOSED)
1987 1987 return (XD_CLOSED);
1988 1988
1989 1989 vdp->xdf_connect_req++;
1990 1990 while (vdp->xdf_state != XD_READY) {
1991 1991 mutex_exit(&vdp->xdf_dev_lk);
1992 1992
1993 1993 /* only one thread at a time can be the connection thread */
1994 1994 if (vdp->xdf_connect_thread == NULL)
1995 1995 vdp->xdf_connect_thread = curthread;
1996 1996
1997 1997 if (vdp->xdf_connect_thread == curthread) {
1998 1998 if ((timeouts > 0) && ((timeouts % reset) == 0)) {
1999 1999 /*
2000 2000 * If we haven't establised a connection
2001 2001 * within the reset time, then disconnect
2002 2002 * so we can try again, and double the reset
2003 2003 * time. The reset time starts at 2 sec.
2004 2004 */
2005 2005 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
2006 2006 reset *= 2;
2007 2007 }
2008 2008 if (vdp->xdf_state == XD_UNKNOWN)
2009 2009 (void) xdf_setstate_init(vdp);
2010 2010 if (vdp->xdf_state == XD_INIT)
2011 2011 (void) xdf_setstate_connected(vdp);
2012 2012 }
2013 2013
2014 2014 mutex_enter(&vdp->xdf_dev_lk);
2015 2015 if (!wait || (vdp->xdf_state == XD_READY))
2016 2016 goto out;
2017 2017
2018 2018 mutex_exit((&vdp->xdf_cb_lk));
2019 2019 if (vdp->xdf_connect_thread != curthread) {
2020 2020 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk);
2021 2021 } else {
2022 2022 /* delay for 0.1 sec */
2023 2023 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv,
2024 2024 &vdp->xdf_dev_lk, drv_usectohz(100*1000),
2025 2025 TR_CLOCK_TICK);
2026 2026 if (rv == -1)
2027 2027 timeouts++;
2028 2028 }
2029 2029 mutex_exit((&vdp->xdf_dev_lk));
2030 2030 mutex_enter((&vdp->xdf_cb_lk));
2031 2031 mutex_enter((&vdp->xdf_dev_lk));
2032 2032 if (rv == 0)
2033 2033 goto out;
2034 2034 }
2035 2035
2036 2036 out:
2037 2037 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2038 2038 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
2039 2039
2040 2040 if (vdp->xdf_connect_thread == curthread) {
2041 2041 /*
2042 2042 * wake up someone else so they can become the connection
2043 2043 * thread.
2044 2044 */
2045 2045 cv_signal(&vdp->xdf_dev_cv);
2046 2046 vdp->xdf_connect_thread = NULL;
2047 2047 }
2048 2048
2049 2049 /* Try to lock the media */
2050 2050 mutex_exit((&vdp->xdf_dev_lk));
2051 2051 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2052 2052 mutex_enter((&vdp->xdf_dev_lk));
2053 2053
2054 2054 vdp->xdf_connect_req--;
2055 2055 return (vdp->xdf_state);
2056 2056 }
2057 2057
2058 2058 static uint_t
2059 2059 xdf_iorestart(caddr_t arg)
2060 2060 {
2061 2061 xdf_t *vdp = (xdf_t *)arg;
2062 2062
2063 2063 ASSERT(vdp != NULL);
2064 2064
2065 2065 mutex_enter(&vdp->xdf_dev_lk);
2066 2066 ASSERT(ISDMACBON(vdp));
2067 2067 SETDMACBOFF(vdp);
2068 2068 mutex_exit(&vdp->xdf_dev_lk);
2069 2069
2070 2070 xdf_io_start(vdp);
2071 2071
2072 2072 return (DDI_INTR_CLAIMED);
2073 2073 }
2074 2074
2075 2075 #ifdef XPV_HVM_DRIVER
2076 2076
2077 2077 typedef struct xdf_hvm_entry {
2078 2078 list_node_t xdf_he_list;
2079 2079 char *xdf_he_path;
2080 2080 dev_info_t *xdf_he_dip;
2081 2081 } xdf_hvm_entry_t;
2082 2082
2083 2083 static list_t xdf_hvm_list;
2084 2084 static kmutex_t xdf_hvm_list_lock;
2085 2085
2086 2086 static xdf_hvm_entry_t *
2087 2087 i_xdf_hvm_find(const char *path, dev_info_t *dip)
2088 2088 {
2089 2089 xdf_hvm_entry_t *i;
2090 2090
2091 2091 ASSERT((path != NULL) || (dip != NULL));
2092 2092 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock));
2093 2093
2094 2094 i = list_head(&xdf_hvm_list);
2095 2095 while (i != NULL) {
2096 2096 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) {
2097 2097 i = list_next(&xdf_hvm_list, i);
2098 2098 continue;
2099 2099 }
2100 2100 if ((dip != NULL) && (i->xdf_he_dip != dip)) {
2101 2101 i = list_next(&xdf_hvm_list, i);
2102 2102 continue;
2103 2103 }
2104 2104 break;
2105 2105 }
2106 2106 return (i);
2107 2107 }
2108 2108
2109 2109 dev_info_t *
2110 2110 xdf_hvm_hold(const char *path)
2111 2111 {
2112 2112 xdf_hvm_entry_t *i;
2113 2113 dev_info_t *dip;
2114 2114
2115 2115 mutex_enter(&xdf_hvm_list_lock);
2116 2116 i = i_xdf_hvm_find(path, NULL);
2117 2117 if (i == NULL) {
2118 2118 mutex_exit(&xdf_hvm_list_lock);
2119 2119 return (B_FALSE);
2120 2120 }
2121 2121 ndi_hold_devi(dip = i->xdf_he_dip);
2122 2122 mutex_exit(&xdf_hvm_list_lock);
2123 2123 return (dip);
2124 2124 }
2125 2125
2126 2126 static void
2127 2127 xdf_hvm_add(dev_info_t *dip)
2128 2128 {
2129 2129 xdf_hvm_entry_t *i;
2130 2130 char *path;
2131 2131
2132 2132 /* figure out the path for the dip */
2133 2133 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2134 2134 (void) ddi_pathname(dip, path);
2135 2135
2136 2136 i = kmem_alloc(sizeof (*i), KM_SLEEP);
2137 2137 i->xdf_he_dip = dip;
2138 2138 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP);
2139 2139
2140 2140 mutex_enter(&xdf_hvm_list_lock);
2141 2141 ASSERT(i_xdf_hvm_find(path, NULL) == NULL);
2142 2142 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL);
2143 2143 list_insert_head(&xdf_hvm_list, i);
2144 2144 mutex_exit(&xdf_hvm_list_lock);
2145 2145
2146 2146 kmem_free(path, MAXPATHLEN);
2147 2147 }
2148 2148
2149 2149 static void
2150 2150 xdf_hvm_rm(dev_info_t *dip)
2151 2151 {
2152 2152 xdf_hvm_entry_t *i;
2153 2153
2154 2154 mutex_enter(&xdf_hvm_list_lock);
2155 2155 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL);
2156 2156 list_remove(&xdf_hvm_list, i);
2157 2157 mutex_exit(&xdf_hvm_list_lock);
2158 2158
2159 2159 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1);
2160 2160 kmem_free(i, sizeof (*i));
2161 2161 }
2162 2162
2163 2163 static void
2164 2164 xdf_hvm_init(void)
2165 2165 {
2166 2166 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t),
2167 2167 offsetof(xdf_hvm_entry_t, xdf_he_list));
2168 2168 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL);
2169 2169 }
2170 2170
2171 2171 static void
2172 2172 xdf_hvm_fini(void)
2173 2173 {
2174 2174 ASSERT(list_head(&xdf_hvm_list) == NULL);
2175 2175 list_destroy(&xdf_hvm_list);
2176 2176 mutex_destroy(&xdf_hvm_list_lock);
2177 2177 }
2178 2178
2179 2179 boolean_t
2180 2180 xdf_hvm_connect(dev_info_t *dip)
2181 2181 {
2182 2182 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2183 2183 char *oename, *str;
2184 2184 int rv;
2185 2185
2186 2186 mutex_enter(&vdp->xdf_cb_lk);
2187 2187
2188 2188 /*
2189 2189 * Before try to establish a connection we need to wait for the
2190 2190 * backend hotplug scripts to have run. Once they are run the
2191 2191 * "<oename>/hotplug-status" property will be set to "connected".
2192 2192 */
2193 2193 for (;;) {
2194 2194 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2195 2195
2196 2196 /*
2197 2197 * Get the xenbus path to the backend device. Note that
2198 2198 * we can't cache this path (and we look it up on each pass
2199 2199 * through this loop) because it could change during
2200 2200 * suspend, resume, and migration operations.
2201 2201 */
2202 2202 if ((oename = xvdi_get_oename(dip)) == NULL) {
2203 2203 mutex_exit(&vdp->xdf_cb_lk);
2204 2204 return (B_FALSE);
2205 2205 }
2206 2206
2207 2207 str = NULL;
2208 2208 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) &&
2209 2209 (strcmp(str, XBV_HP_STATUS_CONN) == 0))
2210 2210 break;
2211 2211
2212 2212 if (str != NULL)
2213 2213 strfree(str);
2214 2214
2215 2215 /* wait for an update to "<oename>/hotplug-status" */
2216 2216 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) {
2217 2217 /* we got interrupted by a signal */
2218 2218 mutex_exit(&vdp->xdf_cb_lk);
2219 2219 return (B_FALSE);
2220 2220 }
2221 2221 }
2222 2222
2223 2223 /* Good news. The backend hotplug scripts have been run. */
2224 2224 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2225 2225 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0);
2226 2226 strfree(str);
2227 2227
2228 2228 /*
2229 2229 * If we're emulating a cd device and if the backend doesn't support
2230 2230 * media request opreations, then we're not going to bother trying
2231 2231 * to establish a connection for a couple reasons. First off, media
2232 2232 * requests support is required to support operations like eject and
2233 2233 * media locking. Second, other backend platforms like Linux don't
2234 2234 * support hvm pv cdrom access. They don't even have a backend pv
2235 2235 * driver for cdrom device nodes, so we don't want to block forever
2236 2236 * waiting for a connection to a backend driver that doesn't exist.
2237 2237 */
2238 2238 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) {
2239 2239 mutex_exit(&vdp->xdf_cb_lk);
2240 2240 return (B_FALSE);
2241 2241 }
2242 2242
2243 2243 mutex_enter(&vdp->xdf_dev_lk);
2244 2244 rv = xdf_connect_locked(vdp, B_TRUE);
2245 2245 mutex_exit(&vdp->xdf_dev_lk);
2246 2246 mutex_exit(&vdp->xdf_cb_lk);
2247 2247
2248 2248 return ((rv == XD_READY) ? B_TRUE : B_FALSE);
2249 2249 }
2250 2250
2251 2251 int
2252 2252 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2253 2253 {
2254 2254 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2255 2255
2256 2256 /* sanity check the requested physical geometry */
2257 2257 mutex_enter(&vdp->xdf_dev_lk);
2258 2258 if ((geomp->g_secsize != XB_BSIZE) ||
2259 2259 (geomp->g_capacity == 0)) {
2260 2260 mutex_exit(&vdp->xdf_dev_lk);
2261 2261 return (EINVAL);
2262 2262 }
2263 2263
2264 2264 /*
2265 2265 * If we've already connected to the backend device then make sure
2266 2266 * we're not defining a physical geometry larger than our backend
2267 2267 * device.
2268 2268 */
2269 2269 if ((vdp->xdf_xdev_nblocks != 0) &&
2270 2270 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) {
2271 2271 mutex_exit(&vdp->xdf_dev_lk);
2272 2272 return (EINVAL);
2273 2273 }
2274 2274
2275 2275 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom));
2276 2276 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl;
2277 2277 vdp->xdf_pgeom.g_acyl = geomp->g_acyl;
2278 2278 vdp->xdf_pgeom.g_nhead = geomp->g_nhead;
2279 2279 vdp->xdf_pgeom.g_nsect = geomp->g_nsect;
2280 2280 vdp->xdf_pgeom.g_secsize = geomp->g_secsize;
2281 2281 vdp->xdf_pgeom.g_capacity = geomp->g_capacity;
2282 2282 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv;
2283 2283 vdp->xdf_pgeom.g_rpm = geomp->g_rpm;
2284 2284
2285 2285 vdp->xdf_pgeom_fixed = B_TRUE;
2286 2286 mutex_exit(&vdp->xdf_dev_lk);
2287 2287
2288 2288 /* force a re-validation */
2289 2289 cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
2290 2290
2291 2291 return (0);
2292 2292 }
2293 2293
2294 2294 boolean_t
2295 2295 xdf_is_cd(dev_info_t *dip)
2296 2296 {
2297 2297 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2298 2298 boolean_t rv;
2299 2299
2300 2300 mutex_enter(&vdp->xdf_cb_lk);
2301 2301 rv = XD_IS_CD(vdp);
2302 2302 mutex_exit(&vdp->xdf_cb_lk);
2303 2303 return (rv);
2304 2304 }
2305 2305
2306 2306 boolean_t
2307 2307 xdf_is_rm(dev_info_t *dip)
2308 2308 {
2309 2309 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2310 2310 boolean_t rv;
2311 2311
2312 2312 mutex_enter(&vdp->xdf_cb_lk);
2313 2313 rv = XD_IS_RM(vdp);
2314 2314 mutex_exit(&vdp->xdf_cb_lk);
2315 2315 return (rv);
2316 2316 }
2317 2317
2318 2318 boolean_t
2319 2319 xdf_media_req_supported(dev_info_t *dip)
2320 2320 {
2321 2321 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2322 2322 boolean_t rv;
2323 2323
2324 2324 mutex_enter(&vdp->xdf_cb_lk);
2325 2325 rv = vdp->xdf_media_req_supported;
2326 2326 mutex_exit(&vdp->xdf_cb_lk);
2327 2327 return (rv);
2328 2328 }
2329 2329
2330 2330 #endif /* XPV_HVM_DRIVER */
2331 2331
2332 2332 static int
2333 2333 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp)
2334 2334 {
2335 2335 xdf_t *vdp;
2336 2336 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
2337 2337
2338 2338 if (vdp == NULL)
2339 2339 return (ENXIO);
2340 2340
2341 2341 mutex_enter(&vdp->xdf_dev_lk);
2342 2342 *capp = vdp->xdf_pgeom.g_capacity;
2343 2343 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp));
2344 2344 mutex_exit(&vdp->xdf_dev_lk);
2345 2345 return (0);
2346 2346 }
2347 2347
2348 2348 static int
2349 2349 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2350 2350 {
2351 2351 xdf_t *vdp;
2352 2352
2353 2353 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
2354 2354 return (ENXIO);
2355 2355 *geomp = vdp->xdf_pgeom;
2356 2356 return (0);
2357 2357 }
2358 2358
2359 2359 /*
2360 2360 * No real HBA, no geometry available from it
2361 2361 */
2362 2362 /*ARGSUSED*/
2363 2363 static int
2364 2364 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2365 2365 {
2366 2366 return (EINVAL);
2367 2367 }
2368 2368
2369 2369 static int
2370 2370 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep)
2371 2371 {
2372 2372 xdf_t *vdp;
2373 2373
2374 2374 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))))
2375 2375 return (ENXIO);
2376 2376
2377 2377 if (XD_IS_RO(vdp))
2378 2378 tgattributep->media_is_writable = 0;
2379 2379 else
2380 2380 tgattributep->media_is_writable = 1;
2381 2381 tgattributep->media_is_rotational = 0;
2382 2382 return (0);
2383 2383 }
2384 2384
2385 2385 /* ARGSUSED3 */
2386 2386 int
2387 2387 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
2388 2388 {
2389 2389 int instance;
2390 2390 xdf_t *vdp;
2391 2391
2392 2392 instance = ddi_get_instance(dip);
2393 2393
2394 2394 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
2395 2395 return (ENXIO);
2396 2396
2397 2397 switch (cmd) {
2398 2398 case TG_GETPHYGEOM:
2399 2399 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg));
2400 2400 case TG_GETVIRTGEOM:
2401 2401 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg));
2402 2402 case TG_GETCAPACITY:
2403 2403 return (xdf_lb_getcap(dip, (diskaddr_t *)arg));
2404 2404 case TG_GETBLOCKSIZE:
2405 2405 mutex_enter(&vdp->xdf_cb_lk);
2406 2406 *(uint32_t *)arg = vdp->xdf_xdev_secsize;
2407 2407 mutex_exit(&vdp->xdf_cb_lk);
2408 2408 return (0);
2409 2409 case TG_GETATTR:
2410 2410 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg));
2411 2411 default:
2412 2412 return (ENOTTY);
2413 2413 }
2414 2414 }
2415 2415
2416 2416 /* ARGSUSED5 */
2417 2417 int
2418 2418 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
2419 2419 diskaddr_t start, size_t reqlen, void *tg_cookie)
2420 2420 {
2421 2421 xdf_t *vdp;
2422 2422 struct buf *bp;
2423 2423 int err = 0;
2424 2424
2425 2425 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
2426 2426
2427 2427 /* We don't allow IO from the oe_change callback thread */
2428 2428 ASSERT(curthread != vdp->xdf_oe_change_thread);
2429 2429
2430 2430 /*
2431 2431 * Having secsize of 0 means that device isn't connected yet.
2432 2432 * FIXME This happens for CD devices, and there's nothing we
2433 2433 * can do about it at the moment.
2434 2434 */
2435 2435 if (vdp->xdf_xdev_secsize == 0)
2436 2436 return (EIO);
2437 2437
2438 2438 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE))
2439 2439 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
2440 2440 return (EINVAL);
2441 2441
2442 2442 bp = getrbuf(KM_SLEEP);
2443 2443 if (cmd == TG_READ)
2444 2444 bp->b_flags = B_BUSY | B_READ;
2445 2445 else
2446 2446 bp->b_flags = B_BUSY | B_WRITE;
2447 2447
2448 2448 bp->b_un.b_addr = bufp;
2449 2449 bp->b_bcount = reqlen;
2450 2450 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE);
2451 2451 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
2452 2452
2453 2453 mutex_enter(&vdp->xdf_dev_lk);
2454 2454 xdf_bp_push(vdp, bp);
2455 2455 mutex_exit(&vdp->xdf_dev_lk);
2456 2456 xdf_io_start(vdp);
2457 2457 if (curthread == vdp->xdf_ready_tq_thread)
2458 2458 (void) xdf_ring_drain(vdp);
2459 2459 err = biowait(bp);
2460 2460 ASSERT(bp->b_flags & B_DONE);
2461 2461 freerbuf(bp);
2462 2462 return (err);
2463 2463 }
2464 2464
2465 2465 /*
2466 2466 * Lock the current media. Set the media state to "lock".
2467 2467 * (Media locks are only respected by the backend driver.)
2468 2468 */
2469 2469 static int
2470 2470 xdf_ioctl_mlock(xdf_t *vdp)
2471 2471 {
2472 2472 int rv;
2473 2473 mutex_enter(&vdp->xdf_cb_lk);
2474 2474 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2475 2475 mutex_exit(&vdp->xdf_cb_lk);
2476 2476 return (rv);
2477 2477 }
2478 2478
2479 2479 /*
2480 2480 * Release a media lock. Set the media state to "none".
2481 2481 */
2482 2482 static int
2483 2483 xdf_ioctl_munlock(xdf_t *vdp)
2484 2484 {
2485 2485 int rv;
2486 2486 mutex_enter(&vdp->xdf_cb_lk);
2487 2487 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE);
2488 2488 mutex_exit(&vdp->xdf_cb_lk);
2489 2489 return (rv);
2490 2490 }
2491 2491
2492 2492 /*
2493 2493 * Eject the current media. Ignores any media locks. (Media locks
2494 2494 * are only for benifit of the the backend.)
2495 2495 */
2496 2496 static int
2497 2497 xdf_ioctl_eject(xdf_t *vdp)
2498 2498 {
2499 2499 int rv;
2500 2500
2501 2501 mutex_enter(&vdp->xdf_cb_lk);
2502 2502 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) {
2503 2503 mutex_exit(&vdp->xdf_cb_lk);
2504 2504 return (rv);
2505 2505 }
2506 2506
2507 2507 /*
2508 2508 * We've set the media requests xenbus parameter to eject, so now
2509 2509 * disconnect from the backend, wait for the backend to clear
2510 2510 * the media requets xenbus paramter, and then we can reconnect
2511 2511 * to the backend.
2512 2512 */
2513 2513 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
2514 2514 mutex_enter(&vdp->xdf_dev_lk);
2515 2515 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) {
2516 2516 mutex_exit(&vdp->xdf_dev_lk);
2517 2517 mutex_exit(&vdp->xdf_cb_lk);
2518 2518 return (EIO);
2519 2519 }
2520 2520 mutex_exit(&vdp->xdf_dev_lk);
2521 2521 mutex_exit(&vdp->xdf_cb_lk);
2522 2522 return (0);
2523 2523 }
2524 2524
2525 2525 /*
2526 2526 * Watch for media state changes. This can be an insertion of a device
2527 2527 * (triggered by a 'xm block-configure' request in another domain) or
2528 2528 * the ejection of a device (triggered by a local "eject" operation).
2529 2529 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
2530 2530 */
2531 2531 static int
2532 2532 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate)
2533 2533 {
2534 2534 enum dkio_state prev_state;
2535 2535
2536 2536 mutex_enter(&vdp->xdf_cb_lk);
2537 2537 prev_state = vdp->xdf_mstate;
2538 2538
2539 2539 if (vdp->xdf_mstate == mstate) {
2540 2540 while (vdp->xdf_mstate == prev_state) {
2541 2541 if (cv_wait_sig(&vdp->xdf_mstate_cv,
2542 2542 &vdp->xdf_cb_lk) == 0) {
2543 2543 mutex_exit(&vdp->xdf_cb_lk);
2544 2544 return (EINTR);
2545 2545 }
2546 2546 }
2547 2547 }
2548 2548
2549 2549 if ((prev_state != DKIO_INSERTED) &&
2550 2550 (vdp->xdf_mstate == DKIO_INSERTED)) {
2551 2551 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2552 2552 mutex_exit(&vdp->xdf_cb_lk);
2553 2553 return (0);
2554 2554 }
2555 2555
2556 2556 mutex_exit(&vdp->xdf_cb_lk);
2557 2557 return (0);
2558 2558 }
2559 2559
2560 2560 /*ARGSUSED*/
2561 2561 static int
2562 2562 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
2563 2563 int *rvalp)
2564 2564 {
2565 2565 minor_t minor = getminor(dev);
2566 2566 int part = XDF_PART(minor);
2567 2567 xdf_t *vdp;
2568 2568 int rv;
2569 2569
2570 2570 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) ||
2571 2571 (!xdf_isopen(vdp, part)))
2572 2572 return (ENXIO);
2573 2573
2574 2574 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n",
2575 2575 vdp->xdf_addr, cmd, cmd));
2576 2576
2577 2577 switch (cmd) {
2578 2578 default:
2579 2579 return (ENOTTY);
2580 2580 case DKIOCG_PHYGEOM:
2581 2581 case DKIOCG_VIRTGEOM:
2582 2582 case DKIOCGGEOM:
2583 2583 case DKIOCSGEOM:
2584 2584 case DKIOCGAPART:
2585 2585 case DKIOCSAPART:
2586 2586 case DKIOCGVTOC:
2587 2587 case DKIOCSVTOC:
2588 2588 case DKIOCPARTINFO:
2589 2589 case DKIOCGEXTVTOC:
2590 2590 case DKIOCSEXTVTOC:
2591 2591 case DKIOCEXTPARTINFO:
2592 2592 case DKIOCGMBOOT:
2593 2593 case DKIOCSMBOOT:
2594 2594 case DKIOCGETEFI:
2595 2595 case DKIOCSETEFI:
2596 2596 case DKIOCSETEXTPART:
2597 2597 case DKIOCPARTITION:
2598 2598 rv = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
2599 2599 rvalp, NULL);
2600 2600 if (rv != 0)
2601 2601 return (rv);
2602 2602 /*
2603 2603 * If we're labelling the disk, we have to update the geometry
2604 2604 * in the cmlb data structures, and we also have to write a new
2605 2605 * devid to the disk. Note that writing an EFI label currently
2606 2606 * requires 4 ioctls, and devid setup will fail on all but the
2607 2607 * last.
2608 2608 */
2609 2609 if (cmd == DKIOCSEXTVTOC || cmd == DKIOCSVTOC ||
2610 2610 cmd == DKIOCSETEFI) {
2611 2611 rv = cmlb_validate(vdp->xdf_vd_lbl, 0, 0);
2612 2612 if (rv == 0) {
2613 2613 xdf_devid_setup(vdp);
2614 2614 } else {
2615 2615 cmn_err(CE_WARN,
2616 2616 "xdf@%s, labeling failed on validate",
2617 2617 vdp->xdf_addr);
2618 2618 }
2619 2619 }
2620 2620 return (rv);
2621 2621 case FDEJECT:
2622 2622 case DKIOCEJECT:
2623 2623 case CDROMEJECT:
2624 2624 return (xdf_ioctl_eject(vdp));
2625 2625 case DKIOCLOCK:
2626 2626 return (xdf_ioctl_mlock(vdp));
2627 2627 case DKIOCUNLOCK:
2628 2628 return (xdf_ioctl_munlock(vdp));
2629 2629 case CDROMREADOFFSET: {
2630 2630 int offset = 0;
2631 2631 if (!XD_IS_CD(vdp))
2632 2632 return (ENOTTY);
2633 2633 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode))
2634 2634 return (EFAULT);
2635 2635 return (0);
2636 2636 }
2637 2637 case DKIOCGMEDIAINFO: {
2638 2638 struct dk_minfo media_info;
2639 2639
2640 2640 media_info.dki_lbsize = vdp->xdf_xdev_secsize;
2641 2641 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
2642 2642 if (XD_IS_CD(vdp))
2643 2643 media_info.dki_media_type = DK_CDROM;
2644 2644 else
2645 2645 media_info.dki_media_type = DK_FIXED_DISK;
2646 2646
2647 2647 if (ddi_copyout(&media_info, (void *)arg,
2648 2648 sizeof (struct dk_minfo), mode))
2649 2649 return (EFAULT);
2650 2650 return (0);
2651 2651 }
2652 2652 case DKIOCINFO: {
2653 2653 struct dk_cinfo info;
2654 2654
2655 2655 /* controller information */
2656 2656 if (XD_IS_CD(vdp))
2657 2657 info.dki_ctype = DKC_CDROM;
2658 2658 else
2659 2659 info.dki_ctype = DKC_VBD;
2660 2660
2661 2661 info.dki_cnum = 0;
2662 2662 (void) strncpy((char *)(&info.dki_cname), "xdf", 8);
2663 2663
2664 2664 /* unit information */
2665 2665 info.dki_unit = ddi_get_instance(vdp->xdf_dip);
2666 2666 (void) strncpy((char *)(&info.dki_dname), "xdf", 8);
2667 2667 info.dki_flags = DKI_FMTVOL;
2668 2668 info.dki_partition = part;
2669 2669 info.dki_maxtransfer = maxphys / DEV_BSIZE;
2670 2670 info.dki_addr = 0;
2671 2671 info.dki_space = 0;
2672 2672 info.dki_prio = 0;
2673 2673 info.dki_vec = 0;
2674 2674
2675 2675 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
2676 2676 return (EFAULT);
2677 2677 return (0);
2678 2678 }
2679 2679 case DKIOCSTATE: {
2680 2680 enum dkio_state mstate;
2681 2681
2682 2682 if (ddi_copyin((void *)arg, &mstate,
2683 2683 sizeof (mstate), mode) != 0)
2684 2684 return (EFAULT);
2685 2685 if ((rv = xdf_dkstate(vdp, mstate)) != 0)
2686 2686 return (rv);
2687 2687 mstate = vdp->xdf_mstate;
2688 2688 if (ddi_copyout(&mstate, (void *)arg,
2689 2689 sizeof (mstate), mode) != 0)
2690 2690 return (EFAULT);
2691 2691 return (0);
2692 2692 }
2693 2693 case DKIOCREMOVABLE: {
2694 2694 int i = BOOLEAN2VOID(XD_IS_RM(vdp));
2695 2695 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode))
2696 2696 return (EFAULT);
2697 2697 return (0);
2698 2698 }
2699 2699 case DKIOCGETWCE: {
2700 2700 int i = BOOLEAN2VOID(XD_IS_RM(vdp));
2701 2701 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode))
2702 2702 return (EFAULT);
2703 2703 return (0);
2704 2704 }
2705 2705 case DKIOCSETWCE: {
2706 2706 int i;
2707 2707 if (ddi_copyin((void *)arg, &i, sizeof (i), mode))
2708 2708 return (EFAULT);
2709 2709 vdp->xdf_wce = VOID2BOOLEAN(i);
2710 2710 return (0);
2711 2711 }
2712 2712 case DKIOCFLUSHWRITECACHE: {
2713 2713 struct dk_callback *dkc = (struct dk_callback *)arg;
2714 2714
2715 2715 if (vdp->xdf_flush_supported) {
2716 2716 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
2717 2717 NULL, 0, 0, (void *)dev);
2718 2718 } else if (vdp->xdf_feature_barrier &&
2719 2719 !xdf_barrier_flush_disable) {
2720 2720 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
2721 2721 vdp->xdf_cache_flush_block, xdf_flush_block,
2722 2722 vdp->xdf_xdev_secsize, (void *)dev);
2723 2723 } else {
2724 2724 return (ENOTTY);
2725 2725 }
2726 2726 if ((mode & FKIOCTL) && (dkc != NULL) &&
2727 2727 (dkc->dkc_callback != NULL)) {
2728 2728 (*dkc->dkc_callback)(dkc->dkc_cookie, rv);
2729 2729 /* need to return 0 after calling callback */
2730 2730 rv = 0;
2731 2731 }
2732 2732 return (rv);
2733 2733 }
2734 2734 }
2735 2735 /*NOTREACHED*/
2736 2736 }
2737 2737
2738 2738 static int
2739 2739 xdf_strategy(struct buf *bp)
2740 2740 {
2741 2741 xdf_t *vdp;
2742 2742 minor_t minor;
2743 2743 diskaddr_t p_blkct, p_blkst;
2744 2744 daddr_t blkno;
2745 2745 ulong_t nblks;
2746 2746 int part;
2747 2747
2748 2748 minor = getminor(bp->b_edev);
2749 2749 part = XDF_PART(minor);
2750 2750 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor));
2751 2751
2752 2752 mutex_enter(&vdp->xdf_dev_lk);
2753 2753 if (!xdf_isopen(vdp, part)) {
2754 2754 mutex_exit(&vdp->xdf_dev_lk);
2755 2755 xdf_io_err(bp, ENXIO, 0);
2756 2756 return (0);
2757 2757 }
2758 2758
2759 2759 /* We don't allow IO from the oe_change callback thread */
2760 2760 ASSERT(curthread != vdp->xdf_oe_change_thread);
2761 2761
2762 2762 /* Check for writes to a read only device */
2763 2763 if (!IS_READ(bp) && XD_IS_RO(vdp)) {
2764 2764 mutex_exit(&vdp->xdf_dev_lk);
2765 2765 xdf_io_err(bp, EROFS, 0);
2766 2766 return (0);
2767 2767 }
2768 2768
2769 2769 /* Check if this I/O is accessing a partition or the entire disk */
2770 2770 if ((long)bp->b_private == XB_SLICE_NONE) {
2771 2771 /* This I/O is using an absolute offset */
2772 2772 p_blkct = vdp->xdf_xdev_nblocks;
2773 2773 p_blkst = 0;
2774 2774 } else {
2775 2775 /* This I/O is using a partition relative offset */
2776 2776 mutex_exit(&vdp->xdf_dev_lk);
2777 2777 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
2778 2778 &p_blkst, NULL, NULL, NULL)) {
2779 2779 xdf_io_err(bp, ENXIO, 0);
2780 2780 return (0);
2781 2781 }
2782 2782 mutex_enter(&vdp->xdf_dev_lk);
2783 2783 }
2784 2784
2785 2785 /*
2786 2786 * Adjust the real blkno and bcount according to the underline
2787 2787 * physical sector size.
2788 2788 */
2789 2789 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE);
2790 2790
2791 2791 /* check for a starting block beyond the disk or partition limit */
2792 2792 if (blkno > p_blkct) {
2793 2793 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64,
2794 2794 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct));
2795 2795 mutex_exit(&vdp->xdf_dev_lk);
2796 2796 xdf_io_err(bp, EINVAL, 0);
2797 2797 return (0);
2798 2798 }
2799 2799
2800 2800 /* Legacy: don't set error flag at this case */
2801 2801 if (blkno == p_blkct) {
2802 2802 mutex_exit(&vdp->xdf_dev_lk);
2803 2803 bp->b_resid = bp->b_bcount;
2804 2804 biodone(bp);
2805 2805 return (0);
2806 2806 }
2807 2807
2808 2808 /* sanitize the input buf */
2809 2809 bioerror(bp, 0);
2810 2810 bp->b_resid = 0;
2811 2811 bp->av_back = bp->av_forw = NULL;
2812 2812
2813 2813 /* Adjust for partial transfer, this will result in an error later */
2814 2814 if (vdp->xdf_xdev_secsize != 0 &&
2815 2815 vdp->xdf_xdev_secsize != XB_BSIZE) {
2816 2816 nblks = bp->b_bcount / vdp->xdf_xdev_secsize;
2817 2817 } else {
2818 2818 nblks = bp->b_bcount >> XB_BSHIFT;
2819 2819 }
2820 2820
2821 2821 if ((blkno + nblks) > p_blkct) {
2822 2822 if (vdp->xdf_xdev_secsize != 0 &&
2823 2823 vdp->xdf_xdev_secsize != XB_BSIZE) {
2824 2824 bp->b_resid =
2825 2825 ((blkno + nblks) - p_blkct) *
2826 2826 vdp->xdf_xdev_secsize;
2827 2827 } else {
2828 2828 bp->b_resid =
2829 2829 ((blkno + nblks) - p_blkct) <<
2830 2830 XB_BSHIFT;
2831 2831 }
2832 2832 bp->b_bcount -= bp->b_resid;
2833 2833 }
2834 2834
2835 2835 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n",
2836 2836 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount));
2837 2837
2838 2838 /* Fix up the buf struct */
2839 2839 bp->b_flags |= B_BUSY;
2840 2840 bp->b_private = (void *)(uintptr_t)p_blkst;
2841 2841
2842 2842 xdf_bp_push(vdp, bp);
2843 2843 mutex_exit(&vdp->xdf_dev_lk);
2844 2844 xdf_io_start(vdp);
2845 2845 if (do_polled_io)
2846 2846 (void) xdf_ring_drain(vdp);
2847 2847 return (0);
2848 2848 }
2849 2849
2850 2850 /*ARGSUSED*/
2851 2851 static int
2852 2852 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
2853 2853 {
2854 2854 xdf_t *vdp;
2855 2855 minor_t minor;
2856 2856 diskaddr_t p_blkcnt;
2857 2857 int part;
2858 2858
2859 2859 minor = getminor(dev);
2860 2860 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2861 2861 return (ENXIO);
2862 2862
2863 2863 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n",
2864 2864 vdp->xdf_addr, (int64_t)uiop->uio_offset));
2865 2865
2866 2866 part = XDF_PART(minor);
2867 2867 if (!xdf_isopen(vdp, part))
2868 2868 return (ENXIO);
2869 2869
2870 2870 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2871 2871 NULL, NULL, NULL, NULL))
2872 2872 return (ENXIO);
2873 2873
2874 2874 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2875 2875 return (ENOSPC);
2876 2876
2877 2877 if (U_INVAL(uiop))
2878 2878 return (EINVAL);
2879 2879
2880 2880 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
2881 2881 }
2882 2882
2883 2883 /*ARGSUSED*/
2884 2884 static int
2885 2885 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
2886 2886 {
2887 2887 xdf_t *vdp;
2888 2888 minor_t minor;
2889 2889 diskaddr_t p_blkcnt;
2890 2890 int part;
2891 2891
2892 2892 minor = getminor(dev);
2893 2893 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2894 2894 return (ENXIO);
2895 2895
2896 2896 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n",
2897 2897 vdp->xdf_addr, (int64_t)uiop->uio_offset));
2898 2898
2899 2899 part = XDF_PART(minor);
2900 2900 if (!xdf_isopen(vdp, part))
2901 2901 return (ENXIO);
2902 2902
2903 2903 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2904 2904 NULL, NULL, NULL, NULL))
2905 2905 return (ENXIO);
2906 2906
2907 2907 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2908 2908 return (ENOSPC);
2909 2909
2910 2910 if (U_INVAL(uiop))
2911 2911 return (EINVAL);
2912 2912
2913 2913 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop));
2914 2914 }
2915 2915
2916 2916 /*ARGSUSED*/
2917 2917 static int
2918 2918 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
2919 2919 {
2920 2920 xdf_t *vdp;
2921 2921 minor_t minor;
2922 2922 struct uio *uiop = aiop->aio_uio;
2923 2923 diskaddr_t p_blkcnt;
2924 2924 int part;
2925 2925
2926 2926 minor = getminor(dev);
2927 2927 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2928 2928 return (ENXIO);
2929 2929
2930 2930 part = XDF_PART(minor);
2931 2931 if (!xdf_isopen(vdp, part))
2932 2932 return (ENXIO);
2933 2933
2934 2934 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2935 2935 NULL, NULL, NULL, NULL))
2936 2936 return (ENXIO);
2937 2937
2938 2938 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2939 2939 return (ENOSPC);
2940 2940
2941 2941 if (U_INVAL(uiop))
2942 2942 return (EINVAL);
2943 2943
2944 2944 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop));
2945 2945 }
2946 2946
2947 2947 /*ARGSUSED*/
2948 2948 static int
2949 2949 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
2950 2950 {
2951 2951 xdf_t *vdp;
2952 2952 minor_t minor;
2953 2953 struct uio *uiop = aiop->aio_uio;
2954 2954 diskaddr_t p_blkcnt;
2955 2955 int part;
2956 2956
2957 2957 minor = getminor(dev);
2958 2958 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2959 2959 return (ENXIO);
2960 2960
2961 2961 part = XDF_PART(minor);
2962 2962 if (!xdf_isopen(vdp, part))
2963 2963 return (ENXIO);
2964 2964
2965 2965 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2966 2966 NULL, NULL, NULL, NULL))
2967 2967 return (ENXIO);
2968 2968
2969 2969 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2970 2970 return (ENOSPC);
2971 2971
2972 2972 if (U_INVAL(uiop))
2973 2973 return (EINVAL);
2974 2974
2975 2975 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop));
2976 2976 }
2977 2977
2978 2978 static int
2979 2979 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
2980 2980 {
2981 2981 struct buf dumpbuf, *dbp = &dumpbuf;
2982 2982 xdf_t *vdp;
2983 2983 minor_t minor;
2984 2984 int err = 0;
2985 2985 int part;
2986 2986 diskaddr_t p_blkcnt, p_blkst;
2987 2987
2988 2988 minor = getminor(dev);
2989 2989 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2990 2990 return (ENXIO);
2991 2991
2992 2992 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n",
2993 2993 vdp->xdf_addr, (void *)addr, blkno, nblk));
2994 2994
2995 2995 /* We don't allow IO from the oe_change callback thread */
2996 2996 ASSERT(curthread != vdp->xdf_oe_change_thread);
2997 2997
2998 2998 part = XDF_PART(minor);
2999 2999 if (!xdf_isopen(vdp, part))
3000 3000 return (ENXIO);
3001 3001
3002 3002 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
3003 3003 NULL, NULL, NULL))
3004 3004 return (ENXIO);
3005 3005
3006 3006 if ((blkno + nblk) >
3007 3007 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) {
3008 3008 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64,
3009 3009 vdp->xdf_addr, (daddr_t)((blkno + nblk) /
3010 3010 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt);
3011 3011 return (EINVAL);
3012 3012 }
3013 3013
3014 3014 bioinit(dbp);
3015 3015 dbp->b_flags = B_BUSY;
3016 3016 dbp->b_un.b_addr = addr;
3017 3017 dbp->b_bcount = nblk << DEV_BSHIFT;
3018 3018 dbp->b_blkno = blkno;
3019 3019 dbp->b_edev = dev;
3020 3020 dbp->b_private = (void *)(uintptr_t)p_blkst;
3021 3021
3022 3022 mutex_enter(&vdp->xdf_dev_lk);
3023 3023 xdf_bp_push(vdp, dbp);
3024 3024 mutex_exit(&vdp->xdf_dev_lk);
3025 3025 xdf_io_start(vdp);
3026 3026 err = xdf_ring_drain(vdp);
3027 3027 biofini(dbp);
3028 3028 return (err);
3029 3029 }
3030 3030
3031 3031 /*ARGSUSED*/
3032 3032 static int
3033 3033 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
3034 3034 {
3035 3035 minor_t minor;
3036 3036 xdf_t *vdp;
3037 3037 int part;
3038 3038 ulong_t parbit;
3039 3039
3040 3040 minor = getminor(dev);
3041 3041 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
3042 3042 return (ENXIO);
3043 3043
3044 3044 mutex_enter(&vdp->xdf_dev_lk);
3045 3045 part = XDF_PART(minor);
3046 3046 if (!xdf_isopen(vdp, part)) {
3047 3047 mutex_exit(&vdp->xdf_dev_lk);
3048 3048 return (ENXIO);
3049 3049 }
3050 3050 parbit = 1 << part;
3051 3051
3052 3052 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0);
3053 3053 if (otyp == OTYP_LYR) {
3054 3054 ASSERT(vdp->xdf_vd_lyropen[part] > 0);
3055 3055 if (--vdp->xdf_vd_lyropen[part] == 0)
3056 3056 vdp->xdf_vd_open[otyp] &= ~parbit;
3057 3057 } else {
3058 3058 vdp->xdf_vd_open[otyp] &= ~parbit;
3059 3059 }
3060 3060 vdp->xdf_vd_exclopen &= ~parbit;
3061 3061
3062 3062 mutex_exit(&vdp->xdf_dev_lk);
3063 3063 return (0);
3064 3064 }
3065 3065
3066 3066 static int
3067 3067 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp)
3068 3068 {
3069 3069 minor_t minor;
3070 3070 xdf_t *vdp;
3071 3071 int part;
3072 3072 ulong_t parbit;
3073 3073 diskaddr_t p_blkct = 0;
3074 3074 boolean_t firstopen;
3075 3075 boolean_t nodelay;
3076 3076
3077 3077 minor = getminor(*devp);
3078 3078 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
3079 3079 return (ENXIO);
3080 3080
3081 3081 nodelay = (flag & (FNDELAY | FNONBLOCK));
3082 3082
3083 3083 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr));
3084 3084
3085 3085 /* do cv_wait until connected or failed */
3086 3086 mutex_enter(&vdp->xdf_cb_lk);
3087 3087 mutex_enter(&vdp->xdf_dev_lk);
3088 3088 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) {
3089 3089 mutex_exit(&vdp->xdf_dev_lk);
3090 3090 mutex_exit(&vdp->xdf_cb_lk);
3091 3091 return (ENXIO);
3092 3092 }
3093 3093 mutex_exit(&vdp->xdf_cb_lk);
3094 3094
3095 3095 if ((flag & FWRITE) && XD_IS_RO(vdp)) {
3096 3096 mutex_exit(&vdp->xdf_dev_lk);
3097 3097 return (EROFS);
3098 3098 }
3099 3099
3100 3100 part = XDF_PART(minor);
3101 3101 parbit = 1 << part;
3102 3102 if ((vdp->xdf_vd_exclopen & parbit) ||
3103 3103 ((flag & FEXCL) && xdf_isopen(vdp, part))) {
3104 3104 mutex_exit(&vdp->xdf_dev_lk);
3105 3105 return (EBUSY);
3106 3106 }
3107 3107
3108 3108 /* are we the first one to open this node? */
3109 3109 firstopen = !xdf_isopen(vdp, -1);
3110 3110
3111 3111 if (otyp == OTYP_LYR)
3112 3112 vdp->xdf_vd_lyropen[part]++;
3113 3113
3114 3114 vdp->xdf_vd_open[otyp] |= parbit;
3115 3115
3116 3116 if (flag & FEXCL)
3117 3117 vdp->xdf_vd_exclopen |= parbit;
3118 3118
3119 3119 mutex_exit(&vdp->xdf_dev_lk);
3120 3120
3121 3121 /* force a re-validation */
3122 3122 if (firstopen)
3123 3123 cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
3124 3124
3125 3125 /* If this is a non-blocking open then we're done */
3126 3126 if (nodelay)
3127 3127 return (0);
3128 3128
3129 3129 /*
3130 3130 * This is a blocking open, so we require:
3131 3131 * - that the disk have a valid label on it
3132 3132 * - that the size of the partition that we're opening is non-zero
3133 3133 */
3134 3134 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
3135 3135 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) {
3136 3136 (void) xdf_close(*devp, flag, otyp, credp);
3137 3137 return (ENXIO);
3138 3138 }
3139 3139
3140 3140 return (0);
3141 3141 }
3142 3142
3143 3143 /*ARGSUSED*/
3144 3144 static void
3145 3145 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg)
3146 3146 {
3147 3147 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
3148 3148 cv_broadcast(&vdp->xdf_hp_status_cv);
3149 3149 }
3150 3150
3151 3151 static int
3152 3152 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
3153 3153 char *name, caddr_t valuep, int *lengthp)
3154 3154 {
3155 3155 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
3156 3156
3157 3157 /*
3158 3158 * Sanity check that if a dev_t or dip were specified that they
3159 3159 * correspond to this device driver. On debug kernels we'll
3160 3160 * panic and on non-debug kernels we'll return failure.
3161 3161 */
3162 3162 ASSERT(ddi_driver_major(dip) == xdf_major);
3163 3163 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major));
3164 3164 if ((ddi_driver_major(dip) != xdf_major) ||
3165 3165 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major)))
3166 3166 return (DDI_PROP_NOT_FOUND);
3167 3167
3168 3168 if (vdp == NULL)
3169 3169 return (ddi_prop_op(dev, dip, prop_op, flags,
3170 3170 name, valuep, lengthp));
3171 3171
3172 3172 return (cmlb_prop_op(vdp->xdf_vd_lbl,
3173 3173 dev, dip, prop_op, flags, name, valuep, lengthp,
3174 3174 XDF_PART(getminor(dev)), NULL));
3175 3175 }
3176 3176
3177 3177 /*ARGSUSED*/
3178 3178 static int
3179 3179 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp)
3180 3180 {
3181 3181 int instance = XDF_INST(getminor((dev_t)arg));
3182 3182 xdf_t *vbdp;
3183 3183
3184 3184 switch (cmd) {
3185 3185 case DDI_INFO_DEVT2DEVINFO:
3186 3186 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) {
3187 3187 *rp = NULL;
3188 3188 return (DDI_FAILURE);
3189 3189 }
3190 3190 *rp = vbdp->xdf_dip;
3191 3191 return (DDI_SUCCESS);
3192 3192
3193 3193 case DDI_INFO_DEVT2INSTANCE:
3194 3194 *rp = (void *)(uintptr_t)instance;
3195 3195 return (DDI_SUCCESS);
3196 3196
3197 3197 default:
3198 3198 return (DDI_FAILURE);
3199 3199 }
3200 3200 }
3201 3201
3202 3202 /*ARGSUSED*/
3203 3203 static int
3204 3204 xdf_resume(dev_info_t *dip)
3205 3205 {
3206 3206 xdf_t *vdp;
3207 3207 char *oename;
3208 3208
3209 3209 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
3210 3210 goto err;
3211 3211
3212 3212 if (xdf_debug & SUSRES_DBG)
3213 3213 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr);
3214 3214
3215 3215 mutex_enter(&vdp->xdf_cb_lk);
3216 3216
3217 3217 if (xvdi_resume(dip) != DDI_SUCCESS) {
3218 3218 mutex_exit(&vdp->xdf_cb_lk);
3219 3219 goto err;
3220 3220 }
3221 3221
3222 3222 if (((oename = xvdi_get_oename(dip)) == NULL) ||
3223 3223 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
3224 3224 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) {
3225 3225 mutex_exit(&vdp->xdf_cb_lk);
3226 3226 goto err;
3227 3227 }
3228 3228
3229 3229 mutex_enter(&vdp->xdf_dev_lk);
3230 3230 ASSERT(vdp->xdf_state != XD_READY);
3231 3231 xdf_set_state(vdp, XD_UNKNOWN);
3232 3232 mutex_exit(&vdp->xdf_dev_lk);
3233 3233
3234 3234 if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
3235 3235 mutex_exit(&vdp->xdf_cb_lk);
3236 3236 goto err;
3237 3237 }
3238 3238
3239 3239 mutex_exit(&vdp->xdf_cb_lk);
3240 3240
3241 3241 if (xdf_debug & SUSRES_DBG)
3242 3242 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr);
3243 3243 return (DDI_SUCCESS);
3244 3244 err:
3245 3245 if (xdf_debug & SUSRES_DBG)
3246 3246 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr);
3247 3247 return (DDI_FAILURE);
3248 3248 }
3249 3249
3250 3250 /*
3251 3251 * Uses the in-memory devid if one exists.
3252 3252 *
3253 3253 * Create a devid and write it on the first block of the last track of
3254 3254 * the last cylinder.
3255 3255 * Return DDI_SUCCESS or DDI_FAILURE.
3256 3256 */
3257 3257 static int
3258 3258 xdf_devid_fabricate(xdf_t *vdp)
3259 3259 {
3260 3260 ddi_devid_t devid = vdp->xdf_tgt_devid; /* null if no devid */
3261 3261 struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */
3262 3262 diskaddr_t blk;
3263 3263 uint_t *ip, chksum;
3264 3264 int i, devid_size;
3265 3265
3266 3266 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0)
3267 3267 goto err;
3268 3268
3269 3269 if (devid == NULL && ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0,
3270 3270 NULL, &devid) != DDI_SUCCESS)
3271 3271 goto err;
3272 3272
3273 3273 /* allocate a buffer */
3274 3274 dkdevidp = (struct dk_devid *)kmem_zalloc(NBPSCTR, KM_SLEEP);
3275 3275
3276 3276 /* Fill in the revision */
3277 3277 dkdevidp->dkd_rev_hi = DK_DEVID_REV_MSB;
3278 3278 dkdevidp->dkd_rev_lo = DK_DEVID_REV_LSB;
3279 3279
3280 3280 /* Copy in the device id */
3281 3281 devid_size = ddi_devid_sizeof(devid);
3282 3282 if (devid_size > DK_DEVID_SIZE)
3283 3283 goto err;
3284 3284 bcopy(devid, dkdevidp->dkd_devid, devid_size);
3285 3285
3286 3286 /* Calculate the chksum */
3287 3287 chksum = 0;
3288 3288 ip = (uint_t *)dkdevidp;
3289 3289 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++)
3290 3290 chksum ^= ip[i];
3291 3291
3292 3292 /* Fill in the checksum */
3293 3293 DKD_FORMCHKSUM(chksum, dkdevidp);
3294 3294
3295 3295 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, dkdevidp, blk,
3296 3296 NBPSCTR, NULL) != 0)
3297 3297 goto err;
3298 3298
3299 3299 kmem_free(dkdevidp, NBPSCTR);
3300 3300
3301 3301 vdp->xdf_tgt_devid = devid;
3302 3302 return (DDI_SUCCESS);
3303 3303
3304 3304 err:
3305 3305 if (dkdevidp != NULL)
3306 3306 kmem_free(dkdevidp, NBPSCTR);
3307 3307 if (devid != NULL && vdp->xdf_tgt_devid == NULL)
3308 3308 ddi_devid_free(devid);
3309 3309 return (DDI_FAILURE);
3310 3310 }
3311 3311
3312 3312 /*
3313 3313 * xdf_devid_read() is a local copy of xdfs_devid_read(), modified to use xdf
3314 3314 * functions.
3315 3315 *
3316 3316 * Read a devid from on the first block of the last track of
3317 3317 * the last cylinder. Make sure what we read is a valid devid.
3318 3318 * Return DDI_SUCCESS or DDI_FAILURE.
3319 3319 */
3320 3320 static int
3321 3321 xdf_devid_read(xdf_t *vdp)
3322 3322 {
3323 3323 diskaddr_t blk;
3324 3324 struct dk_devid *dkdevidp;
3325 3325 uint_t *ip, chksum;
3326 3326 int i;
3327 3327
3328 3328 if (cmlb_get_devid_block(vdp->xdf_vd_lbl, &blk, NULL) != 0)
3329 3329 return (DDI_FAILURE);
3330 3330
3331 3331 dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP);
3332 3332 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, dkdevidp, blk,
3333 3333 NBPSCTR, NULL) != 0)
3334 3334 goto err;
3335 3335
3336 3336 /* Validate the revision */
3337 3337 if ((dkdevidp->dkd_rev_hi != DK_DEVID_REV_MSB) ||
3338 3338 (dkdevidp->dkd_rev_lo != DK_DEVID_REV_LSB))
3339 3339 goto err;
3340 3340
3341 3341 /* Calculate the checksum */
3342 3342 chksum = 0;
3343 3343 ip = (uint_t *)dkdevidp;
3344 3344 for (i = 0; i < (NBPSCTR / sizeof (int)) - 1; i++)
3345 3345 chksum ^= ip[i];
3346 3346 if (DKD_GETCHKSUM(dkdevidp) != chksum)
3347 3347 goto err;
3348 3348
3349 3349 /* Validate the device id */
3350 3350 if (ddi_devid_valid((ddi_devid_t)dkdevidp->dkd_devid) != DDI_SUCCESS)
3351 3351 goto err;
3352 3352
3353 3353 /* keep a copy of the device id */
3354 3354 i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid);
3355 3355 vdp->xdf_tgt_devid = kmem_alloc(i, KM_SLEEP);
3356 3356 bcopy(dkdevidp->dkd_devid, vdp->xdf_tgt_devid, i);
3357 3357 kmem_free(dkdevidp, NBPSCTR);
3358 3358 return (DDI_SUCCESS);
3359 3359
3360 3360 err:
3361 3361 kmem_free(dkdevidp, NBPSCTR);
3362 3362 return (DDI_FAILURE);
3363 3363 }
3364 3364
3365 3365 /*
3366 3366 * xdf_devid_setup() is a modified copy of cmdk_devid_setup().
3367 3367 *
3368 3368 * This function creates a devid if we don't already have one, and
3369 3369 * registers it. If we already have one, we make sure that it can be
3370 3370 * read from the disk, otherwise we write it to the disk ourselves. If
3371 3371 * we didn't already have a devid, and we create one, we also need to
3372 3372 * register it.
3373 3373 */
3374 3374 void
3375 3375 xdf_devid_setup(xdf_t *vdp)
3376 3376 {
3377 3377 int rc;
3378 3378 boolean_t existed = vdp->xdf_tgt_devid != NULL;
3379 3379
3380 3380 /* Read devid from the disk, if present */
3381 3381 rc = xdf_devid_read(vdp);
3382 3382
3383 3383 /* Otherwise write a devid (which we create if necessary) on the disk */
3384 3384 if (rc != DDI_SUCCESS)
3385 3385 rc = xdf_devid_fabricate(vdp);
3386 3386
3387 3387 /* If we created a devid or found it on the disk, register it */
3388 3388 if (rc == DDI_SUCCESS && !existed)
3389 3389 (void) ddi_devid_register(vdp->xdf_dip, vdp->xdf_tgt_devid);
3390 3390 }
3391 3391
3392 3392 static int
3393 3393 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3394 3394 {
3395 3395 int n, instance = ddi_get_instance(dip);
3396 3396 ddi_iblock_cookie_t ibc, softibc;
3397 3397 boolean_t dev_iscd = B_FALSE;
3398 3398 xdf_t *vdp;
3399 3399 char *oename, *xsname, *str;
3400 3400 clock_t timeout;
3401 3401 int err = 0;
3402 3402
3403 3403 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM,
3404 3404 "xdf_debug", 0)) != 0)
3405 3405 xdf_debug = n;
3406 3406
3407 3407 switch (cmd) {
3408 3408 case DDI_RESUME:
3409 3409 return (xdf_resume(dip));
3410 3410 case DDI_ATTACH:
3411 3411 break;
3412 3412 default:
3413 3413 return (DDI_FAILURE);
3414 3414 }
3415 3415 /* DDI_ATTACH */
3416 3416
3417 3417 if ((xsname = xvdi_get_xsname(dip)) == NULL ||
3418 3418 (oename = xvdi_get_oename(dip)) == NULL)
3419 3419 return (DDI_FAILURE);
3420 3420
3421 3421 /*
3422 3422 * Disable auto-detach. This is necessary so that we don't get
3423 3423 * detached while we're disconnected from the back end.
3424 3424 */
3425 3425 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip,
3426 3426 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS))
3427 3427 return (DDI_FAILURE);
3428 3428
3429 3429 /* driver handles kernel-issued IOCTLs */
3430 3430 if (ddi_prop_create(DDI_DEV_T_NONE, dip,
3431 3431 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS)
3432 3432 return (DDI_FAILURE);
3433 3433
3434 3434 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
3435 3435 return (DDI_FAILURE);
3436 3436
3437 3437 if (ddi_get_soft_iblock_cookie(dip,
3438 3438 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS)
3439 3439 return (DDI_FAILURE);
3440 3440
3441 3441 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
3442 3442 cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
3443 3443 ddi_get_name_addr(dip));
3444 3444 return (DDI_FAILURE);
3445 3445 }
3446 3446 if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
3447 3447 dev_iscd = B_TRUE;
3448 3448 strfree(str);
3449 3449
3450 3450 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS)
3451 3451 return (DDI_FAILURE);
3452 3452
3453 3453 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip)));
3454 3454 vdp = ddi_get_soft_state(xdf_ssp, instance);
3455 3455 ddi_set_driver_private(dip, vdp);
3456 3456 vdp->xdf_dip = dip;
3457 3457 vdp->xdf_addr = ddi_get_name_addr(dip);
3458 3458 vdp->xdf_suspending = B_FALSE;
3459 3459 vdp->xdf_media_req_supported = B_FALSE;
3460 3460 vdp->xdf_peer = INVALID_DOMID;
3461 3461 vdp->xdf_evtchn = INVALID_EVTCHN;
3462 3462 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t),
3463 3463 offsetof(v_req_t, v_link));
3464 3464 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL);
3465 3465 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL);
3466 3466 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL);
3467 3467 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3468 3468 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3469 3469 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3470 3470 vdp->xdf_cmlb_reattach = B_TRUE;
3471 3471 if (dev_iscd) {
3472 3472 vdp->xdf_dinfo |= VDISK_CDROM;
3473 3473 vdp->xdf_mstate = DKIO_EJECTED;
3474 3474 } else {
3475 3475 vdp->xdf_mstate = DKIO_NONE;
3476 3476 }
3477 3477
3478 3478 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq",
3479 3479 1, TASKQ_DEFAULTPRI, 0)) == NULL)
3480 3480 goto errout0;
3481 3481
3482 3482 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
3483 3483 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)
3484 3484 goto errout0;
3485 3485
3486 3486 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id,
3487 3487 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) {
3488 3488 cmn_err(CE_WARN, "xdf@%s: failed to add softintr",
3489 3489 ddi_get_name_addr(dip));
3490 3490 goto errout0;
3491 3491 }
3492 3492
3493 3493 /*
3494 3494 * Initialize the physical geometry stucture. Note that currently
3495 3495 * we don't know the size of the backend device so the number
3496 3496 * of blocks on the device will be initialized to zero. Once
3497 3497 * we connect to the backend device we'll update the physical
3498 3498 * geometry to reflect the real size of the device.
3499 3499 */
3500 3500 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom);
3501 3501 vdp->xdf_pgeom_fixed = B_FALSE;
3502 3502
3503 3503 /*
3504 3504 * Allocate the cmlb handle, minor nodes will be created once
3505 3505 * the device is connected with backend.
3506 3506 */
3507 3507 cmlb_alloc_handle(&vdp->xdf_vd_lbl);
3508 3508
3509 3509 /* We ship with cache-enabled disks */
3510 3510 vdp->xdf_wce = B_TRUE;
3511 3511
3512 3512 mutex_enter(&vdp->xdf_cb_lk);
3513 3513 /* Watch backend XenbusState change */
3514 3514 if (xvdi_add_event_handler(dip,
3515 3515 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) {
3516 3516 mutex_exit(&vdp->xdf_cb_lk);
3517 3517 goto errout0;
3518 3518 }
3519 3519
3520 3520 if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
3521 3521 cmn_err(CE_WARN, "xdf@%s: start connection failed",
3522 3522 ddi_get_name_addr(dip));
3523 3523 mutex_exit(&vdp->xdf_cb_lk);
3524 3524 goto errout1;
3525 3525 }
3526 3526
3527 3527 /* Nothing else to do for CD devices */
3528 3528 if (dev_iscd) {
3529 3529 mutex_exit(&vdp->xdf_cb_lk);
3530 3530 goto done;
3531 3531 }
3532 3532
3533 3533 /*
3534 3534 * In order to do cmlb_validate, we have to wait for the disk to
3535 3535 * acknowledge the attach, so we can query the backend for the disk
3536 3536 * geometry (see xdf_setstate_connected).
3537 3537 *
3538 3538 * We only wait 30 seconds; if this is the root disk, the boot
3539 3539 * will fail, but it would fail anyway if the device never
3540 3540 * connected. If this is a non-boot disk, that disk will fail
3541 3541 * to connect, but again, it would fail anyway.
3542 3542 */
3543 3543 timeout = ddi_get_lbolt() + drv_usectohz(XDF_STATE_TIMEOUT);
3544 3544 while (vdp->xdf_state != XD_CONNECTED && vdp->xdf_state != XD_READY) {
3545 3545 if (cv_timedwait(&vdp->xdf_dev_cv, &vdp->xdf_cb_lk,
3546 3546 timeout) < 0) {
3547 3547 cmn_err(CE_WARN, "xdf@%s: disk failed to connect",
3548 3548 ddi_get_name_addr(dip));
3549 3549 mutex_exit(&vdp->xdf_cb_lk);
3550 3550 goto errout1;
3551 3551 }
3552 3552 }
3553 3553 mutex_exit(&vdp->xdf_cb_lk);
3554 3554
3555 3555 /*
3556 3556 * We call cmlb_validate so that the geometry information in
3557 3557 * vdp->xdf_vd_lbl is correct; this fills out the number of
3558 3558 * alternate cylinders so that we have a place to write the
3559 3559 * devid.
3560 3560 */
3561 3561 if ((err = cmlb_validate(vdp->xdf_vd_lbl, 0, NULL)) != 0) {
3562 3562 cmn_err(CE_NOTE,
3563 3563 "xdf@%s: cmlb_validate failed: %d",
3564 3564 ddi_get_name_addr(dip), err);
3565 3565 /*
3566 3566 * We can carry on even if cmlb_validate() returns EINVAL here,
3567 3567 * as we'll rewrite the disk label anyway.
3568 3568 */
3569 3569 if (err != EINVAL)
3570 3570 goto errout1;
3571 3571 }
3572 3572
3573 3573 /*
3574 3574 * xdf_devid_setup will only write a devid if one isn't
3575 3575 * already present. If it fails to find or create one, we
3576 3576 * create one in-memory so that when we label the disk later,
3577 3577 * it will have a devid to use. This is helpful to deal with
3578 3578 * cases where people use the devids of their disks before
3579 3579 * labelling them; note that this does cause problems if
3580 3580 * people rely on the devids of unlabelled disks to persist
3581 3581 * across reboot.
3582 3582 */
3583 3583 xdf_devid_setup(vdp);
3584 3584 if (vdp->xdf_tgt_devid == NULL) {
3585 3585 if (ddi_devid_init(vdp->xdf_dip, DEVID_FAB, 0, NULL,
3586 3586 &vdp->xdf_tgt_devid) != DDI_SUCCESS) {
3587 3587 cmn_err(CE_WARN,
3588 3588 "xdf@%s_ attach failed, devid_init failed",
3589 3589 ddi_get_name_addr(dip));
3590 3590 goto errout1;
|
↓ open down ↓ |
3590 lines elided |
↑ open up ↑ |
3591 3591 } else {
3592 3592 (void) ddi_devid_register(vdp->xdf_dip,
3593 3593 vdp->xdf_tgt_devid);
3594 3594 }
3595 3595 }
3596 3596
3597 3597 done:
3598 3598 #ifdef XPV_HVM_DRIVER
3599 3599 xdf_hvm_add(dip);
3600 3600
3601 - /* Report our version to dom0. */
3602 - if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d",
3603 - HVMPV_XDF_VERS))
3604 - cmn_err(CE_WARN, "xdf: couldn't write version\n");
3605 -
3601 + /* Report our version to dom0 */
3602 + (void) xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d",
3603 + HVMPV_XDF_VERS);
3606 3604 #endif /* XPV_HVM_DRIVER */
3607 3605
3608 3606 /* Create kstat for iostat(1M) */
3609 3607 if (xdf_kstat_create(dip) != 0) {
3610 3608 cmn_err(CE_WARN, "xdf@%s: failed to create kstat",
3611 3609 ddi_get_name_addr(dip));
3612 3610 goto errout1;
3613 3611 }
3614 3612
3615 3613 /*
3616 3614 * Don't bother with getting real device identification
3617 3615 * strings (is it even possible?), they are unlikely to
3618 3616 * change often (if at all).
3619 3617 */
3620 3618 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_VENDOR_ID,
3621 3619 "Xen");
3622 3620 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_PRODUCT_ID,
3623 3621 dev_iscd ? "Virtual CD" : "Virtual disk");
3624 3622 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, INQUIRY_REVISION_ID,
3625 3623 "1.0");
3626 3624
3627 3625 ddi_report_dev(dip);
3628 3626 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr));
3629 3627 return (DDI_SUCCESS);
3630 3628
3631 3629 errout1:
3632 3630 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed);
3633 3631 xvdi_remove_event_handler(dip, XS_OE_STATE);
3634 3632 errout0:
3635 3633 if (vdp->xdf_vd_lbl != NULL) {
3636 3634 cmlb_free_handle(&vdp->xdf_vd_lbl);
3637 3635 vdp->xdf_vd_lbl = NULL;
3638 3636 }
3639 3637 if (vdp->xdf_softintr_id != NULL)
3640 3638 ddi_remove_softintr(vdp->xdf_softintr_id);
3641 3639 xvdi_remove_xb_watch_handlers(dip);
3642 3640 if (vdp->xdf_ready_tq != NULL)
3643 3641 ddi_taskq_destroy(vdp->xdf_ready_tq);
3644 3642 mutex_destroy(&vdp->xdf_cb_lk);
3645 3643 mutex_destroy(&vdp->xdf_dev_lk);
3646 3644 cv_destroy(&vdp->xdf_dev_cv);
3647 3645 cv_destroy(&vdp->xdf_hp_status_cv);
3648 3646 ddi_soft_state_free(xdf_ssp, instance);
3649 3647 ddi_set_driver_private(dip, NULL);
3650 3648 ddi_prop_remove_all(dip);
3651 3649 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip));
3652 3650 return (DDI_FAILURE);
3653 3651 }
3654 3652
3655 3653 static int
3656 3654 xdf_suspend(dev_info_t *dip)
3657 3655 {
3658 3656 int instance = ddi_get_instance(dip);
3659 3657 xdf_t *vdp;
3660 3658
3661 3659 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
3662 3660 return (DDI_FAILURE);
3663 3661
3664 3662 if (xdf_debug & SUSRES_DBG)
3665 3663 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr);
3666 3664
3667 3665 xvdi_suspend(dip);
3668 3666
3669 3667 mutex_enter(&vdp->xdf_cb_lk);
3670 3668 mutex_enter(&vdp->xdf_dev_lk);
3671 3669
3672 3670 vdp->xdf_suspending = B_TRUE;
3673 3671 xdf_ring_destroy(vdp);
3674 3672 xdf_set_state(vdp, XD_SUSPEND);
3675 3673 vdp->xdf_suspending = B_FALSE;
3676 3674
3677 3675 mutex_exit(&vdp->xdf_dev_lk);
3678 3676 mutex_exit(&vdp->xdf_cb_lk);
3679 3677
3680 3678 if (xdf_debug & SUSRES_DBG)
3681 3679 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr);
3682 3680
3683 3681 return (DDI_SUCCESS);
3684 3682 }
3685 3683
3686 3684 static int
3687 3685 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3688 3686 {
3689 3687 xdf_t *vdp;
3690 3688 int instance;
3691 3689
3692 3690 switch (cmd) {
3693 3691
3694 3692 case DDI_PM_SUSPEND:
3695 3693 break;
3696 3694
3697 3695 case DDI_SUSPEND:
3698 3696 return (xdf_suspend(dip));
3699 3697
3700 3698 case DDI_DETACH:
3701 3699 break;
3702 3700
3703 3701 default:
3704 3702 return (DDI_FAILURE);
3705 3703 }
3706 3704
3707 3705 instance = ddi_get_instance(dip);
3708 3706 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip)));
3709 3707 vdp = ddi_get_soft_state(xdf_ssp, instance);
3710 3708
3711 3709 if (vdp == NULL)
3712 3710 return (DDI_FAILURE);
3713 3711
3714 3712 mutex_enter(&vdp->xdf_cb_lk);
3715 3713 xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
3716 3714 if (vdp->xdf_state != XD_CLOSED) {
3717 3715 mutex_exit(&vdp->xdf_cb_lk);
3718 3716 return (DDI_FAILURE);
3719 3717 }
3720 3718 mutex_exit(&vdp->xdf_cb_lk);
3721 3719
3722 3720 ASSERT(!ISDMACBON(vdp));
3723 3721
3724 3722 #ifdef XPV_HVM_DRIVER
3725 3723 xdf_hvm_rm(dip);
3726 3724 #endif /* XPV_HVM_DRIVER */
3727 3725
3728 3726 if (vdp->xdf_timeout_id != 0)
3729 3727 (void) untimeout(vdp->xdf_timeout_id);
3730 3728
3731 3729 xvdi_remove_event_handler(dip, XS_OE_STATE);
3732 3730 ddi_taskq_destroy(vdp->xdf_ready_tq);
3733 3731
3734 3732 cmlb_detach(vdp->xdf_vd_lbl, NULL);
3735 3733 cmlb_free_handle(&vdp->xdf_vd_lbl);
3736 3734
3737 3735 /* we'll support backend running in domU later */
3738 3736 #ifdef DOMU_BACKEND
3739 3737 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
3740 3738 #endif
3741 3739
3742 3740 list_destroy(&vdp->xdf_vreq_act);
3743 3741 ddi_prop_remove_all(dip);
3744 3742 xdf_kstat_delete(dip);
3745 3743 ddi_remove_softintr(vdp->xdf_softintr_id);
3746 3744 xvdi_remove_xb_watch_handlers(dip);
3747 3745 ddi_set_driver_private(dip, NULL);
3748 3746 cv_destroy(&vdp->xdf_dev_cv);
3749 3747 mutex_destroy(&vdp->xdf_cb_lk);
3750 3748 mutex_destroy(&vdp->xdf_dev_lk);
3751 3749 if (vdp->xdf_cache_flush_block != NULL)
3752 3750 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize);
3753 3751 ddi_soft_state_free(xdf_ssp, instance);
3754 3752 return (DDI_SUCCESS);
3755 3753 }
3756 3754
3757 3755 /*
3758 3756 * Driver linkage structures.
3759 3757 */
3760 3758 static struct cb_ops xdf_cbops = {
3761 3759 xdf_open,
3762 3760 xdf_close,
3763 3761 xdf_strategy,
3764 3762 nodev,
3765 3763 xdf_dump,
3766 3764 xdf_read,
3767 3765 xdf_write,
3768 3766 xdf_ioctl,
3769 3767 nodev,
3770 3768 nodev,
3771 3769 nodev,
3772 3770 nochpoll,
3773 3771 xdf_prop_op,
3774 3772 NULL,
3775 3773 D_MP | D_NEW | D_64BIT,
3776 3774 CB_REV,
3777 3775 xdf_aread,
3778 3776 xdf_awrite
3779 3777 };
3780 3778
3781 3779 struct dev_ops xdf_devops = {
3782 3780 DEVO_REV, /* devo_rev */
3783 3781 0, /* devo_refcnt */
3784 3782 xdf_getinfo, /* devo_getinfo */
3785 3783 nulldev, /* devo_identify */
3786 3784 nulldev, /* devo_probe */
3787 3785 xdf_attach, /* devo_attach */
3788 3786 xdf_detach, /* devo_detach */
3789 3787 nodev, /* devo_reset */
3790 3788 &xdf_cbops, /* devo_cb_ops */
3791 3789 NULL, /* devo_bus_ops */
3792 3790 NULL, /* devo_power */
3793 3791 ddi_quiesce_not_supported, /* devo_quiesce */
3794 3792 };
3795 3793
3796 3794 /*
3797 3795 * Module linkage structures.
3798 3796 */
3799 3797 static struct modldrv modldrv = {
3800 3798 &mod_driverops, /* Type of module. This one is a driver */
3801 3799 "virtual block driver", /* short description */
3802 3800 &xdf_devops /* driver specific ops */
3803 3801 };
3804 3802
3805 3803 static struct modlinkage xdf_modlinkage = {
3806 3804 MODREV_1, (void *)&modldrv, NULL
3807 3805 };
3808 3806
3809 3807 /*
3810 3808 * standard module entry points
3811 3809 */
3812 3810 int
3813 3811 _init(void)
3814 3812 {
3815 3813 int rc;
3816 3814
3817 3815 xdf_major = ddi_name_to_major("xdf");
3818 3816 if (xdf_major == (major_t)-1)
3819 3817 return (EINVAL);
3820 3818
3821 3819 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0)
3822 3820 return (rc);
3823 3821
3824 3822 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache",
3825 3823 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3826 3824 xdf_gs_cache = kmem_cache_create("xdf_gs_cache",
3827 3825 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3828 3826
3829 3827 #ifdef XPV_HVM_DRIVER
3830 3828 xdf_hvm_init();
3831 3829 #endif /* XPV_HVM_DRIVER */
3832 3830
3833 3831 if ((rc = mod_install(&xdf_modlinkage)) != 0) {
3834 3832 #ifdef XPV_HVM_DRIVER
3835 3833 xdf_hvm_fini();
3836 3834 #endif /* XPV_HVM_DRIVER */
3837 3835 kmem_cache_destroy(xdf_vreq_cache);
3838 3836 kmem_cache_destroy(xdf_gs_cache);
3839 3837 ddi_soft_state_fini(&xdf_ssp);
3840 3838 return (rc);
3841 3839 }
3842 3840
3843 3841 return (rc);
3844 3842 }
3845 3843
3846 3844 int
3847 3845 _fini(void)
3848 3846 {
3849 3847 int err;
3850 3848 if ((err = mod_remove(&xdf_modlinkage)) != 0)
3851 3849 return (err);
3852 3850
3853 3851 #ifdef XPV_HVM_DRIVER
3854 3852 xdf_hvm_fini();
3855 3853 #endif /* XPV_HVM_DRIVER */
3856 3854
3857 3855 kmem_cache_destroy(xdf_vreq_cache);
3858 3856 kmem_cache_destroy(xdf_gs_cache);
3859 3857 ddi_soft_state_fini(&xdf_ssp);
3860 3858
3861 3859 return (0);
3862 3860 }
3863 3861
3864 3862 int
3865 3863 _info(struct modinfo *modinfop)
3866 3864 {
3867 3865 return (mod_info(&xdf_modlinkage, modinfop));
3868 3866 }
|
↓ open down ↓ |
253 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX