Print this page
935 sv_lyr_open() misses one NULL-pointer check
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Gordon Ross <gwr@nexenta.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/avs/ns/sv/sv.c
+++ new/usr/src/uts/common/avs/ns/sv/sv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 + *
25 + * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 26 */
25 27
26 28 /*
27 29 * Storage Volume Character and Block Driver (SV)
28 30 *
29 31 * This driver implements a simplistic /dev/{r}dsk/ interface to a
30 32 * specified disk volume that is otherwise managed by the Prism
31 33 * software. The SV driver layers itself onto the underlying disk
32 34 * device driver by changing function pointers in the cb_ops
33 35 * structure.
34 36 *
35 37 * CONFIGURATION:
36 38 *
37 39 * 1. Configure the driver using the svadm utility.
38 40 * 2. Access the device as before through /dev/rdsk/c?t?d?s?
39 41 *
40 42 * LIMITATIONS:
41 43 *
42 44 * This driver should NOT be used to share a device between another
43 45 * DataServices user interface module (e.g., STE) and a user accessing
44 46 * the device through the block device in O_WRITE mode. This is because
45 47 * writes through the block device are asynchronous (due to the page
46 48 * cache) and so consistency between the block device user and the
47 49 * STE user cannot be guaranteed.
48 50 *
49 51 * Data is copied between system struct buf(9s) and nsc_vec_t. This is
50 52 * wasteful and slow.
51 53 */
52 54
53 55 #include <sys/debug.h>
54 56 #include <sys/types.h>
55 57
56 58 #include <sys/ksynch.h>
57 59 #include <sys/kmem.h>
58 60 #include <sys/errno.h>
59 61 #include <sys/varargs.h>
60 62 #include <sys/file.h>
61 63 #include <sys/open.h>
62 64 #include <sys/conf.h>
63 65 #include <sys/cred.h>
64 66 #include <sys/buf.h>
65 67 #include <sys/uio.h>
66 68 #ifndef DS_DDICT
67 69 #include <sys/pathname.h>
68 70 #endif
69 71 #include <sys/aio_req.h>
70 72 #include <sys/dkio.h>
71 73 #include <sys/vtoc.h>
72 74 #include <sys/cmn_err.h>
73 75 #include <sys/modctl.h>
74 76 #include <sys/ddi.h>
75 77 #include <sys/sunddi.h>
76 78 #include <sys/sunldi.h>
77 79 #include <sys/nsctl/nsvers.h>
78 80
79 81 #include <sys/nsc_thread.h>
80 82 #include <sys/unistat/spcs_s.h>
81 83 #include <sys/unistat/spcs_s_k.h>
82 84 #include <sys/unistat/spcs_errors.h>
83 85
84 86 #ifdef DS_DDICT
85 87 #include "../contract.h"
86 88 #endif
87 89
88 90 #include "../nsctl.h"
89 91
90 92
91 93 #include <sys/sdt.h> /* dtrace is S10 or later */
92 94
93 95 #include "sv.h"
94 96 #include "sv_impl.h"
95 97 #include "sv_efi.h"
96 98
97 99 #define MAX_EINTR_COUNT 1000
98 100
99 101 /*
100 102 * sv_mod_status
101 103 */
102 104 #define SV_PREVENT_UNLOAD 1
103 105 #define SV_ALLOW_UNLOAD 2
104 106
105 107 static const int sv_major_rev = ISS_VERSION_MAJ; /* Major number */
106 108 static const int sv_minor_rev = ISS_VERSION_MIN; /* Minor number */
107 109 static const int sv_micro_rev = ISS_VERSION_MIC; /* Micro number */
108 110 static const int sv_baseline_rev = ISS_VERSION_NUM; /* Baseline number */
109 111
110 112 #ifdef DKIOCPARTITION
111 113 /*
112 114 * CRC32 polynomial table needed for computing the checksums
113 115 * in an EFI vtoc.
114 116 */
115 117 static const uint32_t sv_crc32_table[256] = { CRC32_TABLE };
116 118 #endif
117 119
118 120 static clock_t sv_config_time; /* Time of successful {en,dis}able */
119 121 static int sv_debug; /* Set non-zero for debug to syslog */
120 122 static int sv_mod_status; /* Set to prevent modunload */
121 123
122 124 static dev_info_t *sv_dip; /* Single DIP for driver */
123 125 static kmutex_t sv_mutex; /* Protect global lists, etc. */
124 126
125 127 static nsc_mem_t *sv_mem; /* nsctl memory allocator token */
126 128
127 129
128 130 /*
129 131 * Per device and per major state.
130 132 */
131 133
132 134 #ifndef _SunOS_5_6
133 135 #define UNSAFE_ENTER()
134 136 #define UNSAFE_EXIT()
135 137 #else
136 138 #define UNSAFE_ENTER() mutex_enter(&unsafe_driver)
137 139 #define UNSAFE_EXIT() mutex_exit(&unsafe_driver)
138 140 #endif
139 141
140 142 /* hash table of major dev structures */
141 143 static sv_maj_t *sv_majors[SV_MAJOR_HASH_CNT] = {0};
142 144 static sv_dev_t *sv_devs; /* array of per device structures */
143 145 static int sv_max_devices; /* SV version of nsc_max_devices() */
144 146 static int sv_ndevices; /* number of SV enabled devices */
145 147
146 148 /*
147 149 * Threading.
148 150 */
149 151
150 152 int sv_threads_max = 1024; /* maximum # to dynamically alloc */
151 153 int sv_threads = 32; /* # to pre-allocate (see sv.conf) */
152 154 int sv_threads_extra = 0; /* addl # we would have alloc'ed */
153 155
154 156 static nstset_t *sv_tset; /* the threadset pointer */
155 157
156 158 static int sv_threads_hysteresis = 4; /* hysteresis for threadset resizing */
157 159 static int sv_threads_dev = 2; /* # of threads to alloc per device */
158 160 static int sv_threads_inc = 8; /* increment for changing the set */
159 161 static int sv_threads_needed; /* number of threads needed */
160 162 static int sv_no_threads; /* number of nsc_create errors */
161 163 static int sv_max_nlive; /* max number of threads running */
162 164
163 165
164 166
165 167 /*
166 168 * nsctl fd callbacks.
167 169 */
168 170
169 171 static int svattach_fd(blind_t);
170 172 static int svdetach_fd(blind_t);
171 173
172 174 static nsc_def_t sv_fd_def[] = {
173 175 { "Attach", (uintptr_t)svattach_fd, },
174 176 { "Detach", (uintptr_t)svdetach_fd, },
175 177 { 0, 0, }
176 178 };
177 179
178 180 /*
179 181 * cb_ops functions.
180 182 */
181 183
182 184 static int svopen(dev_t *, int, int, cred_t *);
183 185 static int svclose(dev_t, int, int, cred_t *);
184 186 static int svioctl(dev_t, int, intptr_t, int, cred_t *, int *);
185 187 static int svprint(dev_t, char *);
186 188
187 189 /*
188 190 * These next functions are layered into the underlying driver's devops.
189 191 */
190 192
191 193 static int sv_lyr_open(dev_t *, int, int, cred_t *);
192 194 static int sv_lyr_close(dev_t, int, int, cred_t *);
193 195 static int sv_lyr_strategy(struct buf *);
194 196 static int sv_lyr_read(dev_t, struct uio *, cred_t *);
195 197 static int sv_lyr_write(dev_t, struct uio *, cred_t *);
196 198 static int sv_lyr_aread(dev_t, struct aio_req *, cred_t *);
197 199 static int sv_lyr_awrite(dev_t, struct aio_req *, cred_t *);
198 200 static int sv_lyr_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
199 201
200 202 static struct cb_ops sv_cb_ops = {
201 203 svopen, /* open */
202 204 svclose, /* close */
203 205 nulldev, /* strategy */
204 206 svprint,
205 207 nodev, /* dump */
206 208 nodev, /* read */
207 209 nodev, /* write */
208 210 svioctl,
209 211 nodev, /* devmap */
210 212 nodev, /* mmap */
211 213 nodev, /* segmap */
212 214 nochpoll, /* poll */
213 215 ddi_prop_op,
214 216 NULL, /* NOT a stream */
215 217 D_NEW | D_MP | D_64BIT,
216 218 CB_REV,
217 219 nodev, /* aread */
218 220 nodev, /* awrite */
219 221 };
220 222
221 223
222 224 /*
223 225 * dev_ops functions.
224 226 */
225 227
226 228 static int sv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
227 229 static int sv_attach(dev_info_t *, ddi_attach_cmd_t);
228 230 static int sv_detach(dev_info_t *, ddi_detach_cmd_t);
229 231
230 232 static struct dev_ops sv_ops = {
231 233 DEVO_REV,
232 234 0,
233 235 sv_getinfo,
234 236 nulldev, /* identify */
235 237 nulldev, /* probe */
236 238 sv_attach,
237 239 sv_detach,
238 240 nodev, /* reset */
239 241 &sv_cb_ops,
240 242 (struct bus_ops *)0
241 243 };
242 244
243 245 /*
244 246 * Module linkage.
245 247 */
246 248
247 249 extern struct mod_ops mod_driverops;
248 250
249 251 static struct modldrv modldrv = {
250 252 &mod_driverops,
251 253 "nws:Storage Volume:" ISS_VERSION_STR,
252 254 &sv_ops
253 255 };
254 256
255 257 static struct modlinkage modlinkage = {
256 258 MODREV_1,
257 259 &modldrv,
258 260 0
259 261 };
260 262
261 263
262 264 int
263 265 _init(void)
264 266 {
265 267 int error;
266 268
267 269 mutex_init(&sv_mutex, NULL, MUTEX_DRIVER, NULL);
268 270
269 271 if ((error = mod_install(&modlinkage)) != 0) {
270 272 mutex_destroy(&sv_mutex);
271 273 return (error);
272 274 }
273 275
274 276 #ifdef DEBUG
275 277 cmn_err(CE_CONT, "!sv (revision %d.%d.%d.%d, %s, %s)\n",
276 278 sv_major_rev, sv_minor_rev, sv_micro_rev, sv_baseline_rev,
277 279 ISS_VERSION_STR, BUILD_DATE_STR);
278 280 #else
279 281 if (sv_micro_rev) {
280 282 cmn_err(CE_CONT, "!sv (revision %d.%d.%d, %s, %s)\n",
281 283 sv_major_rev, sv_minor_rev, sv_micro_rev,
282 284 ISS_VERSION_STR, BUILD_DATE_STR);
283 285 } else {
284 286 cmn_err(CE_CONT, "!sv (revision %d.%d, %s, %s)\n",
285 287 sv_major_rev, sv_minor_rev,
286 288 ISS_VERSION_STR, BUILD_DATE_STR);
287 289 }
288 290 #endif
289 291
290 292 return (error);
291 293 }
292 294
293 295
294 296 int
295 297 _fini(void)
296 298 {
297 299 int error;
298 300
299 301 if ((error = mod_remove(&modlinkage)) != 0)
300 302 return (error);
301 303
302 304 mutex_destroy(&sv_mutex);
303 305
304 306 return (error);
305 307 }
306 308
307 309
308 310 int
309 311 _info(struct modinfo *modinfop)
310 312 {
311 313 return (mod_info(&modlinkage, modinfop));
312 314 }
313 315
314 316
315 317 /*
316 318 * Locking & State.
317 319 *
318 320 * sv_mutex protects config information - sv_maj_t and sv_dev_t lists;
319 321 * threadset creation and sizing; sv_ndevices.
320 322 *
321 323 * If we need to hold both sv_mutex and sv_lock, then the sv_mutex
322 324 * must be acquired first.
323 325 *
324 326 * sv_lock protects the sv_dev_t structure for an individual device.
325 327 *
326 328 * sv_olock protects the otyp/open members of the sv_dev_t. If we need
327 329 * to hold both sv_lock and sv_olock, then the sv_lock must be acquired
328 330 * first.
329 331 *
330 332 * nsc_reserve/nsc_release are used in NSC_MULTI mode to allow multiple
331 333 * I/O operations to a device simultaneously, as above.
332 334 *
333 335 * All nsc_open/nsc_close/nsc_reserve/nsc_release operations that occur
334 336 * with sv_lock write-locked must be done with (sv_state == SV_PENDING)
335 337 * and (sv_pending == curthread) so that any recursion through
336 338 * sv_lyr_open/sv_lyr_close can be detected.
337 339 */
338 340
339 341
340 342 static int
341 343 sv_init_devs(void)
342 344 {
343 345 int i;
344 346
345 347 ASSERT(MUTEX_HELD(&sv_mutex));
346 348
347 349 if (sv_max_devices > 0)
348 350 return (0);
349 351
350 352 sv_max_devices = nsc_max_devices();
351 353
352 354 if (sv_max_devices <= 0) {
353 355 /* nsctl is not attached (nskernd not running) */
354 356 if (sv_debug > 0)
355 357 cmn_err(CE_CONT, "!sv: nsc_max_devices = 0\n");
356 358 return (EAGAIN);
357 359 }
358 360
359 361 sv_devs = nsc_kmem_zalloc((sv_max_devices * sizeof (*sv_devs)),
360 362 KM_NOSLEEP, sv_mem);
361 363
362 364 if (sv_devs == NULL) {
363 365 cmn_err(CE_WARN, "!sv: could not allocate sv_devs array");
364 366 return (ENOMEM);
365 367 }
366 368
367 369 for (i = 0; i < sv_max_devices; i++) {
368 370 mutex_init(&sv_devs[i].sv_olock, NULL, MUTEX_DRIVER, NULL);
369 371 rw_init(&sv_devs[i].sv_lock, NULL, RW_DRIVER, NULL);
370 372 }
371 373
372 374 if (sv_debug > 0)
373 375 cmn_err(CE_CONT, "!sv: sv_init_devs successful\n");
374 376
375 377 return (0);
376 378 }
377 379
378 380
379 381 static int
380 382 sv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
381 383 {
382 384 int rc;
383 385
384 386 switch (cmd) {
385 387
386 388 case DDI_ATTACH:
387 389 sv_dip = dip;
388 390
389 391 if (ddi_create_minor_node(dip, "sv", S_IFCHR,
390 392 0, DDI_PSEUDO, 0) != DDI_SUCCESS)
391 393 goto failed;
392 394
393 395 mutex_enter(&sv_mutex);
394 396
395 397 sv_mem = nsc_register_mem("SV", NSC_MEM_LOCAL, 0);
396 398 if (sv_mem == NULL) {
397 399 mutex_exit(&sv_mutex);
398 400 goto failed;
399 401 }
400 402
401 403 rc = sv_init_devs();
402 404 if (rc != 0 && rc != EAGAIN) {
403 405 mutex_exit(&sv_mutex);
404 406 goto failed;
405 407 }
406 408
407 409 mutex_exit(&sv_mutex);
408 410
409 411
410 412 ddi_report_dev(dip);
411 413
412 414 sv_threads = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
413 415 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
414 416 "sv_threads", sv_threads);
415 417
416 418 if (sv_debug > 0)
417 419 cmn_err(CE_CONT, "!sv: sv_threads=%d\n", sv_threads);
418 420
419 421 if (sv_threads > sv_threads_max)
420 422 sv_threads_max = sv_threads;
421 423
422 424 return (DDI_SUCCESS);
423 425
424 426 default:
425 427 return (DDI_FAILURE);
426 428 }
427 429
428 430 failed:
429 431 DTRACE_PROBE(sv_attach_failed);
430 432 (void) sv_detach(dip, DDI_DETACH);
431 433 return (DDI_FAILURE);
432 434 }
433 435
434 436
435 437 static int
436 438 sv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
437 439 {
438 440 sv_dev_t *svp;
439 441 int i;
440 442
441 443 switch (cmd) {
442 444
443 445 case DDI_DETACH:
444 446
445 447 /*
446 448 * Check that everything is disabled.
447 449 */
448 450
449 451 mutex_enter(&sv_mutex);
450 452
451 453 if (sv_mod_status == SV_PREVENT_UNLOAD) {
452 454 mutex_exit(&sv_mutex);
453 455 DTRACE_PROBE(sv_detach_err_prevent);
454 456 return (DDI_FAILURE);
455 457 }
456 458
457 459 for (i = 0; sv_devs && i < sv_max_devices; i++) {
458 460 svp = &sv_devs[i];
459 461
460 462 if (svp->sv_state != SV_DISABLE) {
461 463 mutex_exit(&sv_mutex);
462 464 DTRACE_PROBE(sv_detach_err_busy);
463 465 return (DDI_FAILURE);
464 466 }
465 467 }
466 468
467 469
468 470 for (i = 0; sv_devs && i < sv_max_devices; i++) {
469 471 mutex_destroy(&sv_devs[i].sv_olock);
470 472 rw_destroy(&sv_devs[i].sv_lock);
471 473 }
472 474
473 475 if (sv_devs) {
474 476 nsc_kmem_free(sv_devs,
475 477 (sv_max_devices * sizeof (*sv_devs)));
476 478 sv_devs = NULL;
477 479 }
478 480 sv_max_devices = 0;
479 481
480 482 if (sv_mem) {
481 483 nsc_unregister_mem(sv_mem);
482 484 sv_mem = NULL;
483 485 }
484 486
485 487 mutex_exit(&sv_mutex);
486 488
487 489 /*
488 490 * Remove all minor nodes.
489 491 */
490 492
491 493 ddi_remove_minor_node(dip, NULL);
492 494 sv_dip = NULL;
493 495
494 496 return (DDI_SUCCESS);
495 497
496 498 default:
497 499 return (DDI_FAILURE);
498 500 }
499 501 }
500 502
501 503 static sv_maj_t *
502 504 sv_getmajor(const dev_t dev)
503 505 {
504 506 sv_maj_t **insert, *maj;
505 507 major_t umaj = getmajor(dev);
506 508
507 509 /*
508 510 * See if the hash table entry, or one of the hash chains
509 511 * is already allocated for this major number
510 512 */
511 513 if ((maj = sv_majors[SV_MAJOR_HASH(umaj)]) != 0) {
512 514 do {
513 515 if (maj->sm_major == umaj)
514 516 return (maj);
515 517 } while ((maj = maj->sm_next) != 0);
516 518 }
517 519
518 520 /*
519 521 * If the sv_mutex is held, there is design flaw, as the only non-mutex
520 522 * held callers can be sv_enable() or sv_dev_to_sv()
521 523 * Return an error, instead of panicing the system
522 524 */
523 525 if (MUTEX_HELD(&sv_mutex)) {
524 526 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
525 527 return (NULL);
526 528 }
527 529
528 530 /*
529 531 * Determine where to allocate a new element in the hash table
530 532 */
531 533 mutex_enter(&sv_mutex);
532 534 insert = &(sv_majors[SV_MAJOR_HASH(umaj)]);
533 535 for (maj = *insert; maj; maj = maj->sm_next) {
534 536
535 537 /* Did another thread beat us to it? */
536 538 if (maj->sm_major == umaj)
537 539 return (maj);
538 540
539 541 /* Find a NULL insert point? */
540 542 if (maj->sm_next == NULL)
541 543 insert = &maj->sm_next;
542 544 }
543 545
544 546 /*
545 547 * Located the new insert point
546 548 */
547 549 *insert = nsc_kmem_zalloc(sizeof (*maj), KM_NOSLEEP, sv_mem);
548 550 if ((maj = *insert) != 0)
549 551 maj->sm_major = umaj;
550 552 else
551 553 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
552 554
553 555 mutex_exit(&sv_mutex);
554 556
555 557 return (maj);
556 558 }
557 559
558 560 /* ARGSUSED */
559 561
560 562 static int
561 563 sv_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
562 564 {
563 565 int rc = DDI_FAILURE;
564 566
565 567 switch (infocmd) {
566 568
567 569 case DDI_INFO_DEVT2DEVINFO:
568 570 *result = sv_dip;
569 571 rc = DDI_SUCCESS;
570 572 break;
571 573
572 574 case DDI_INFO_DEVT2INSTANCE:
573 575 /*
574 576 * We only have a single instance.
575 577 */
576 578 *result = 0;
577 579 rc = DDI_SUCCESS;
578 580 break;
579 581
580 582 default:
581 583 break;
582 584 }
583 585
584 586 return (rc);
585 587 }
586 588
587 589
588 590 /*
589 591 * Hashing of devices onto major device structures.
590 592 *
591 593 * Individual device structures are hashed onto one of the sm_hash[]
592 594 * buckets in the relevant major device structure.
593 595 *
594 596 * Hash insertion and deletion -must- be done with sv_mutex held. Hash
595 597 * searching does not require the mutex because of the sm_seq member.
596 598 * sm_seq is incremented on each insertion (-after- hash chain pointer
597 599 * manipulation) and each deletion (-before- hash chain pointer
598 600 * manipulation). When searching the hash chain, the seq number is
599 601 * checked before accessing each device structure, if the seq number has
600 602 * changed, then we restart the search from the top of the hash chain.
601 603 * If we restart more than SV_HASH_RETRY times, we take sv_mutex and search
602 604 * the hash chain (we are guaranteed that this search cannot be
603 605 * interrupted).
604 606 */
605 607
606 608 #define SV_HASH_RETRY 16
607 609
608 610 static sv_dev_t *
609 611 sv_dev_to_sv(const dev_t dev, sv_maj_t **majpp)
610 612 {
611 613 minor_t umin = getminor(dev);
612 614 sv_dev_t **hb, *next, *svp;
613 615 sv_maj_t *maj;
614 616 int seq;
615 617 int try;
616 618
617 619 /* Get major hash table */
618 620 maj = sv_getmajor(dev);
619 621 if (majpp)
620 622 *majpp = maj;
621 623 if (maj == NULL)
622 624 return (NULL);
623 625
624 626 if (maj->sm_inuse == 0) {
625 627 DTRACE_PROBE1(
626 628 sv_dev_to_sv_end,
627 629 dev_t, dev);
628 630 return (NULL);
629 631 }
630 632
631 633 hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
632 634 try = 0;
633 635
634 636 retry:
635 637 if (try > SV_HASH_RETRY)
636 638 mutex_enter(&sv_mutex);
637 639
638 640 seq = maj->sm_seq;
639 641 for (svp = *hb; svp; svp = next) {
640 642 next = svp->sv_hash;
641 643
642 644 nsc_membar_stld(); /* preserve register load order */
643 645
644 646 if (maj->sm_seq != seq) {
645 647 DTRACE_PROBE1(sv_dev_to_sv_retry, dev_t, dev);
646 648 try++;
647 649 goto retry;
648 650 }
649 651
650 652 if (svp->sv_dev == dev)
651 653 break;
652 654 }
653 655
654 656 if (try > SV_HASH_RETRY)
655 657 mutex_exit(&sv_mutex);
656 658
657 659 return (svp);
658 660 }
659 661
660 662
661 663 /*
662 664 * Must be called with sv_mutex held.
663 665 */
664 666
665 667 static int
666 668 sv_get_state(const dev_t udev, sv_dev_t **svpp)
667 669 {
668 670 sv_dev_t **hb, **insert, *svp;
669 671 sv_maj_t *maj;
670 672 minor_t umin;
671 673 int i;
672 674
673 675 /* Get major hash table */
674 676 if ((maj = sv_getmajor(udev)) == NULL)
675 677 return (NULL);
676 678
677 679 /* Determine which minor hash table */
678 680 umin = getminor(udev);
679 681 hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
680 682
681 683 /* look for clash */
682 684
683 685 insert = hb;
684 686
685 687 for (svp = *hb; svp; svp = svp->sv_hash) {
686 688 if (svp->sv_dev == udev)
687 689 break;
688 690
689 691 if (svp->sv_hash == NULL)
690 692 insert = &svp->sv_hash;
691 693 }
692 694
693 695 if (svp) {
694 696 DTRACE_PROBE1(
695 697 sv_get_state_enabled,
696 698 dev_t, udev);
697 699 return (SV_EENABLED);
698 700 }
699 701
700 702 /* look for spare sv_devs slot */
701 703
702 704 for (i = 0; i < sv_max_devices; i++) {
703 705 svp = &sv_devs[i];
704 706
705 707 if (svp->sv_state == SV_DISABLE)
706 708 break;
707 709 }
708 710
709 711 if (i >= sv_max_devices) {
710 712 DTRACE_PROBE1(
711 713 sv_get_state_noslots,
712 714 dev_t, udev);
713 715 return (SV_ENOSLOTS);
714 716 }
715 717
716 718 svp->sv_state = SV_PENDING;
717 719 svp->sv_pending = curthread;
718 720
719 721 *insert = svp;
720 722 svp->sv_hash = NULL;
721 723 maj->sm_seq++; /* must be after the store to the hash chain */
722 724
723 725 *svpp = svp;
724 726
725 727 /*
726 728 * We do not know the size of the underlying device at
727 729 * this stage, so initialise "nblocks" property to
728 730 * zero, and update it whenever we succeed in
729 731 * nsc_reserve'ing the underlying nsc_fd_t.
730 732 */
731 733
732 734 svp->sv_nblocks = 0;
733 735
734 736 return (0);
735 737 }
736 738
737 739
738 740 /*
739 741 * Remove a device structure from it's hash chain.
740 742 * Must be called with sv_mutex held.
741 743 */
742 744
743 745 static void
744 746 sv_rm_hash(sv_dev_t *svp)
745 747 {
746 748 sv_dev_t **svpp;
747 749 sv_maj_t *maj;
748 750
749 751 /* Get major hash table */
750 752 if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
751 753 return;
752 754
753 755 /* remove svp from hash chain */
754 756
755 757 svpp = &(maj->sm_hash[SV_MINOR_HASH(getminor(svp->sv_dev))]);
756 758 while (*svpp) {
757 759 if (*svpp == svp) {
758 760 /*
759 761 * increment of sm_seq must be before the
760 762 * removal from the hash chain
761 763 */
762 764 maj->sm_seq++;
763 765 *svpp = svp->sv_hash;
764 766 break;
765 767 }
766 768
767 769 svpp = &(*svpp)->sv_hash;
768 770 }
769 771
770 772 svp->sv_hash = NULL;
771 773 }
772 774
773 775 /*
774 776 * Free (disable) a device structure.
775 777 * Must be called with sv_lock(RW_WRITER) and sv_mutex held, and will
776 778 * perform the exits during its processing.
777 779 */
778 780
779 781 static int
780 782 sv_free(sv_dev_t *svp, const int error)
781 783 {
782 784 struct cb_ops *cb_ops;
783 785 sv_maj_t *maj;
784 786
785 787 /* Get major hash table */
786 788 if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
787 789 return (NULL);
788 790
789 791 svp->sv_state = SV_PENDING;
790 792 svp->sv_pending = curthread;
791 793
792 794 /*
793 795 * Close the fd's before removing from the hash or swapping
794 796 * back the cb_ops pointers so that the cache flushes before new
795 797 * io can come in.
796 798 */
797 799
798 800 if (svp->sv_fd) {
799 801 (void) nsc_close(svp->sv_fd);
800 802 svp->sv_fd = 0;
801 803 }
802 804
803 805 sv_rm_hash(svp);
804 806
805 807 if (error != SV_ESDOPEN &&
806 808 error != SV_ELYROPEN && --maj->sm_inuse == 0) {
807 809
808 810 if (maj->sm_dev_ops)
809 811 cb_ops = maj->sm_dev_ops->devo_cb_ops;
810 812 else
811 813 cb_ops = NULL;
812 814
813 815 if (cb_ops && maj->sm_strategy != NULL) {
814 816 cb_ops->cb_strategy = maj->sm_strategy;
815 817 cb_ops->cb_close = maj->sm_close;
816 818 cb_ops->cb_ioctl = maj->sm_ioctl;
817 819 cb_ops->cb_write = maj->sm_write;
818 820 cb_ops->cb_open = maj->sm_open;
819 821 cb_ops->cb_read = maj->sm_read;
820 822 cb_ops->cb_flag = maj->sm_flag;
821 823
822 824 if (maj->sm_awrite)
823 825 cb_ops->cb_awrite = maj->sm_awrite;
824 826
825 827 if (maj->sm_aread)
826 828 cb_ops->cb_aread = maj->sm_aread;
827 829
828 830 /*
829 831 * corbin XXX
830 832 * Leave backing device ops in maj->sm_*
831 833 * to handle any requests that might come
832 834 * in during the disable. This could be
833 835 * a problem however if the backing device
834 836 * driver is changed while we process these
835 837 * requests.
836 838 *
837 839 * maj->sm_strategy = 0;
838 840 * maj->sm_awrite = 0;
839 841 * maj->sm_write = 0;
840 842 * maj->sm_ioctl = 0;
841 843 * maj->sm_close = 0;
842 844 * maj->sm_aread = 0;
843 845 * maj->sm_read = 0;
844 846 * maj->sm_open = 0;
845 847 * maj->sm_flag = 0;
846 848 *
847 849 */
848 850 }
849 851
850 852 if (maj->sm_dev_ops) {
851 853 maj->sm_dev_ops = 0;
852 854 }
853 855 }
854 856
855 857 if (svp->sv_lh) {
856 858 cred_t *crp = ddi_get_cred();
857 859
858 860 /*
859 861 * Close the protective layered driver open using the
860 862 * Sun Private layered driver i/f.
861 863 */
862 864
863 865 (void) ldi_close(svp->sv_lh, FREAD|FWRITE, crp);
864 866 svp->sv_lh = NULL;
865 867 }
866 868
867 869 svp->sv_timestamp = nsc_lbolt();
868 870 svp->sv_state = SV_DISABLE;
869 871 svp->sv_pending = NULL;
870 872 rw_exit(&svp->sv_lock);
871 873 mutex_exit(&sv_mutex);
872 874
873 875 return (error);
874 876 }
875 877
876 878 /*
877 879 * Reserve the device, taking into account the possibility that
878 880 * the reserve might have to be retried.
879 881 */
880 882 static int
881 883 sv_reserve(nsc_fd_t *fd, int flags)
882 884 {
883 885 int eintr_count;
884 886 int rc;
885 887
886 888 eintr_count = 0;
887 889 do {
888 890 rc = nsc_reserve(fd, flags);
889 891 if (rc == EINTR) {
890 892 ++eintr_count;
891 893 delay(2);
892 894 }
893 895 } while ((rc == EINTR) && (eintr_count < MAX_EINTR_COUNT));
894 896
895 897 return (rc);
896 898 }
897 899
898 900 static int
899 901 sv_enable(const caddr_t path, const int flag,
900 902 const dev_t udev, spcs_s_info_t kstatus)
901 903 {
902 904 struct dev_ops *dev_ops;
903 905 struct cb_ops *cb_ops;
904 906 sv_dev_t *svp;
905 907 sv_maj_t *maj;
906 908 nsc_size_t nblocks;
907 909 int rc;
908 910 cred_t *crp;
909 911 ldi_ident_t li;
910 912
911 913 if (udev == (dev_t)-1 || udev == 0) {
912 914 DTRACE_PROBE1(
913 915 sv_enable_err_baddev,
914 916 dev_t, udev);
915 917 return (SV_EBADDEV);
916 918 }
917 919
918 920 if ((flag & ~(NSC_CACHE|NSC_DEVICE)) != 0) {
919 921 DTRACE_PROBE1(sv_enable_err_amode, dev_t, udev);
920 922 return (SV_EAMODE);
921 923 }
922 924
923 925 /* Get major hash table */
924 926 if ((maj = sv_getmajor(udev)) == NULL)
925 927 return (SV_EBADDEV);
926 928
927 929 mutex_enter(&sv_mutex);
928 930
929 931 rc = sv_get_state(udev, &svp);
930 932 if (rc) {
931 933 mutex_exit(&sv_mutex);
932 934 DTRACE_PROBE1(sv_enable_err_state, dev_t, udev);
933 935 return (rc);
934 936 }
935 937
936 938 rw_enter(&svp->sv_lock, RW_WRITER);
937 939
938 940 /*
939 941 * Get real fd used for io
940 942 */
941 943
942 944 svp->sv_dev = udev;
943 945 svp->sv_flag = flag;
944 946
945 947 /*
946 948 * OR in NSC_DEVICE to ensure that nskern grabs the real strategy
947 949 * function pointer before sv swaps them out.
948 950 */
949 951
950 952 svp->sv_fd = nsc_open(path, (svp->sv_flag | NSC_DEVICE),
951 953 sv_fd_def, (blind_t)udev, &rc);
952 954
953 955 if (svp->sv_fd == NULL) {
954 956 if (kstatus)
955 957 spcs_s_add(kstatus, rc);
956 958 DTRACE_PROBE1(sv_enable_err_fd, dev_t, udev);
957 959 return (sv_free(svp, SV_ESDOPEN));
958 960 }
959 961
960 962 /*
961 963 * Perform a layered driver open using the Sun Private layered
962 964 * driver i/f to ensure that the cb_ops structure for the driver
963 965 * is not detached out from under us whilst sv is enabled.
964 966 *
965 967 */
966 968
967 969 crp = ddi_get_cred();
968 970 svp->sv_lh = NULL;
969 971
970 972 if ((rc = ldi_ident_from_dev(svp->sv_dev, &li)) == 0) {
971 973 rc = ldi_open_by_dev(&svp->sv_dev,
972 974 OTYP_BLK, FREAD|FWRITE, crp, &svp->sv_lh, li);
973 975 }
974 976
975 977 if (rc != 0) {
976 978 if (kstatus)
977 979 spcs_s_add(kstatus, rc);
978 980 DTRACE_PROBE1(sv_enable_err_lyr_open, dev_t, udev);
979 981 return (sv_free(svp, SV_ELYROPEN));
980 982 }
981 983
982 984 /*
983 985 * Do layering if required - must happen after nsc_open().
984 986 */
985 987
986 988 if (maj->sm_inuse++ == 0) {
987 989 maj->sm_dev_ops = nsc_get_devops(getmajor(udev));
988 990
989 991 if (maj->sm_dev_ops == NULL ||
990 992 maj->sm_dev_ops->devo_cb_ops == NULL) {
991 993 DTRACE_PROBE1(sv_enable_err_load, dev_t, udev);
992 994 return (sv_free(svp, SV_ELOAD));
993 995 }
994 996
995 997 dev_ops = maj->sm_dev_ops;
996 998 cb_ops = dev_ops->devo_cb_ops;
997 999
998 1000 if (cb_ops->cb_strategy == NULL ||
999 1001 cb_ops->cb_strategy == nodev ||
1000 1002 cb_ops->cb_strategy == nulldev) {
1001 1003 DTRACE_PROBE1(sv_enable_err_nostrategy, dev_t, udev);
1002 1004 return (sv_free(svp, SV_ELOAD));
1003 1005 }
1004 1006
1005 1007 if (cb_ops->cb_strategy == sv_lyr_strategy) {
1006 1008 DTRACE_PROBE1(sv_enable_err_svstrategy, dev_t, udev);
1007 1009 return (sv_free(svp, SV_ESTRATEGY));
1008 1010 }
1009 1011
1010 1012 maj->sm_strategy = cb_ops->cb_strategy;
1011 1013 maj->sm_close = cb_ops->cb_close;
1012 1014 maj->sm_ioctl = cb_ops->cb_ioctl;
1013 1015 maj->sm_write = cb_ops->cb_write;
1014 1016 maj->sm_open = cb_ops->cb_open;
1015 1017 maj->sm_read = cb_ops->cb_read;
1016 1018 maj->sm_flag = cb_ops->cb_flag;
1017 1019
1018 1020 cb_ops->cb_flag = cb_ops->cb_flag | D_MP;
1019 1021 cb_ops->cb_strategy = sv_lyr_strategy;
1020 1022 cb_ops->cb_close = sv_lyr_close;
1021 1023 cb_ops->cb_ioctl = sv_lyr_ioctl;
1022 1024 cb_ops->cb_write = sv_lyr_write;
1023 1025 cb_ops->cb_open = sv_lyr_open;
1024 1026 cb_ops->cb_read = sv_lyr_read;
1025 1027
1026 1028 /*
1027 1029 * Check that the driver has async I/O entry points
1028 1030 * before changing them.
1029 1031 */
1030 1032
1031 1033 if (dev_ops->devo_rev < 3 || cb_ops->cb_rev < 1) {
1032 1034 maj->sm_awrite = 0;
1033 1035 maj->sm_aread = 0;
1034 1036 } else {
1035 1037 maj->sm_awrite = cb_ops->cb_awrite;
1036 1038 maj->sm_aread = cb_ops->cb_aread;
1037 1039
1038 1040 cb_ops->cb_awrite = sv_lyr_awrite;
1039 1041 cb_ops->cb_aread = sv_lyr_aread;
1040 1042 }
1041 1043
1042 1044 /*
1043 1045 * Bug 4645743
1044 1046 *
1045 1047 * Prevent sv from ever unloading after it has interposed
1046 1048 * on a major device because there is a race between
1047 1049 * sv removing its layered entry points from the target
1048 1050 * dev_ops, a client coming in and accessing the driver,
1049 1051 * and the kernel modunloading the sv text.
1050 1052 *
1051 1053 * To allow unload, do svboot -u, which only happens in
1052 1054 * pkgrm time.
1053 1055 */
1054 1056 ASSERT(MUTEX_HELD(&sv_mutex));
1055 1057 sv_mod_status = SV_PREVENT_UNLOAD;
1056 1058 }
1057 1059
1058 1060
1059 1061 svp->sv_timestamp = nsc_lbolt();
1060 1062 svp->sv_state = SV_ENABLE;
1061 1063 svp->sv_pending = NULL;
1062 1064 rw_exit(&svp->sv_lock);
1063 1065
1064 1066 sv_ndevices++;
1065 1067 mutex_exit(&sv_mutex);
1066 1068
1067 1069 nblocks = 0;
1068 1070 if (sv_reserve(svp->sv_fd, NSC_READ|NSC_MULTI|NSC_PCATCH) == 0) {
1069 1071 nblocks = svp->sv_nblocks;
1070 1072 nsc_release(svp->sv_fd);
1071 1073 }
1072 1074
1073 1075 cmn_err(CE_CONT, "!sv: rdev 0x%lx, nblocks %" NSC_SZFMT "\n",
1074 1076 svp->sv_dev, nblocks);
1075 1077
1076 1078 return (0);
1077 1079 }
1078 1080
1079 1081
1080 1082 static int
1081 1083 sv_prepare_unload()
1082 1084 {
1083 1085 int rc = 0;
1084 1086
1085 1087 mutex_enter(&sv_mutex);
1086 1088
1087 1089 if (sv_mod_status == SV_PREVENT_UNLOAD) {
1088 1090 if ((sv_ndevices != 0) || (sv_tset != NULL)) {
1089 1091 rc = EBUSY;
1090 1092 } else {
1091 1093 sv_mod_status = SV_ALLOW_UNLOAD;
1092 1094 delay(SV_WAIT_UNLOAD * drv_usectohz(1000000));
1093 1095 }
1094 1096 }
1095 1097
1096 1098 mutex_exit(&sv_mutex);
1097 1099 return (rc);
1098 1100 }
1099 1101
1100 1102 static int
1101 1103 svattach_fd(blind_t arg)
1102 1104 {
1103 1105 dev_t dev = (dev_t)arg;
1104 1106 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1105 1107 int rc;
1106 1108
1107 1109 if (sv_debug > 0)
1108 1110 cmn_err(CE_CONT, "!svattach_fd(%p, %p)\n", arg, (void *)svp);
1109 1111
1110 1112 if (svp == NULL) {
1111 1113 cmn_err(CE_WARN, "!svattach_fd: no state (arg %p)", arg);
1112 1114 return (0);
1113 1115 }
1114 1116
1115 1117 if ((rc = nsc_partsize(svp->sv_fd, &svp->sv_nblocks)) != 0) {
1116 1118 cmn_err(CE_WARN,
1117 1119 "!svattach_fd: nsc_partsize() failed, rc %d", rc);
1118 1120 svp->sv_nblocks = 0;
1119 1121 }
1120 1122
1121 1123 if ((rc = nsc_maxfbas(svp->sv_fd, 0, &svp->sv_maxfbas)) != 0) {
1122 1124 cmn_err(CE_WARN,
1123 1125 "!svattach_fd: nsc_maxfbas() failed, rc %d", rc);
1124 1126 svp->sv_maxfbas = 0;
1125 1127 }
1126 1128
1127 1129 if (sv_debug > 0) {
1128 1130 cmn_err(CE_CONT,
1129 1131 "!svattach_fd(%p): size %" NSC_SZFMT ", "
1130 1132 "maxfbas %" NSC_SZFMT "\n",
1131 1133 arg, svp->sv_nblocks, svp->sv_maxfbas);
1132 1134 }
1133 1135
1134 1136 return (0);
1135 1137 }
1136 1138
1137 1139
1138 1140 static int
1139 1141 svdetach_fd(blind_t arg)
1140 1142 {
1141 1143 dev_t dev = (dev_t)arg;
1142 1144 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1143 1145
1144 1146 if (sv_debug > 0)
1145 1147 cmn_err(CE_CONT, "!svdetach_fd(%p, %p)\n", arg, (void *)svp);
1146 1148
1147 1149 /* svp can be NULL during disable of an sv */
1148 1150 if (svp == NULL)
1149 1151 return (0);
1150 1152
1151 1153 svp->sv_maxfbas = 0;
1152 1154 svp->sv_nblocks = 0;
1153 1155 return (0);
1154 1156 }
1155 1157
1156 1158
1157 1159 /*
1158 1160 * Side effect: if called with (guard != 0), then expects both sv_mutex
1159 1161 * and sv_lock(RW_WRITER) to be held, and will release them before returning.
1160 1162 */
1161 1163
1162 1164 /* ARGSUSED */
1163 1165 static int
1164 1166 sv_disable(dev_t dev, spcs_s_info_t kstatus)
1165 1167 {
1166 1168 sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1167 1169
1168 1170 if (svp == NULL) {
1169 1171
1170 1172 DTRACE_PROBE1(sv_disable_err_nodev, sv_dev_t *, svp);
1171 1173 return (SV_ENODEV);
1172 1174 }
1173 1175
1174 1176 mutex_enter(&sv_mutex);
1175 1177 rw_enter(&svp->sv_lock, RW_WRITER);
1176 1178
1177 1179 if (svp->sv_fd == NULL || svp->sv_state != SV_ENABLE) {
1178 1180 rw_exit(&svp->sv_lock);
1179 1181 mutex_exit(&sv_mutex);
1180 1182
1181 1183 DTRACE_PROBE1(sv_disable_err_disabled, sv_dev_t *, svp);
1182 1184 return (SV_EDISABLED);
1183 1185 }
1184 1186
1185 1187
1186 1188 sv_ndevices--;
1187 1189 return (sv_free(svp, 0));
1188 1190 }
1189 1191
1190 1192
1191 1193
1192 1194 static int
1193 1195 sv_lyr_open(dev_t *devp, int flag, int otyp, cred_t *crp)
1194 1196 {
1195 1197 nsc_buf_t *tmph;
1196 1198 sv_dev_t *svp;
1197 1199 sv_maj_t *maj;
1198 1200 int (*fn)();
1199 1201 dev_t odev;
1200 1202 int ret;
1201 1203 int rc;
1202 1204
1203 1205 svp = sv_dev_to_sv(*devp, &maj);
1204 1206
1205 1207 if (svp) {
1206 1208 if (svp->sv_state == SV_PENDING &&
1207 1209 svp->sv_pending == curthread) {
1208 1210 /*
1209 1211 * This is a recursive open from a call to
1210 1212 * ddi_lyr_open_by_devt and so we just want
1211 1213 * to pass it straight through to the
1212 1214 * underlying driver.
1213 1215 */
1214 1216 DTRACE_PROBE2(sv_lyr_open_recursive,
1215 1217 sv_dev_t *, svp,
1216 1218 dev_t, *devp);
1217 1219 svp = NULL;
1218 1220 } else
1219 1221 rw_enter(&svp->sv_lock, RW_READER);
1220 1222 }
1221 1223
1222 1224 odev = *devp;
1223 1225
1224 1226 if (maj && (fn = maj->sm_open) != 0) {
1225 1227 if (!(maj->sm_flag & D_MP)) {
1226 1228 UNSAFE_ENTER();
1227 1229 ret = (*fn)(devp, flag, otyp, crp);
1228 1230 UNSAFE_EXIT();
|
↓ open down ↓ |
1195 lines elided |
↑ open up ↑ |
1229 1231 } else {
1230 1232 ret = (*fn)(devp, flag, otyp, crp);
1231 1233 }
1232 1234
1233 1235 if (ret == 0) {
1234 1236 /*
1235 1237 * Re-acquire svp if the driver changed *devp.
1236 1238 */
1237 1239
1238 1240 if (*devp != odev) {
1239 - rw_exit(&svp->sv_lock);
1241 + if (svp != NULL)
1242 + rw_exit(&svp->sv_lock);
1240 1243
1241 1244 svp = sv_dev_to_sv(*devp, NULL);
1242 1245
1243 1246 if (svp) {
1244 1247 rw_enter(&svp->sv_lock, RW_READER);
1245 1248 }
1246 1249 }
1247 1250 }
1248 1251 } else {
1249 1252 ret = ENODEV;
1250 1253 }
1251 1254
1252 1255 if (svp && ret != 0 && svp->sv_state == SV_ENABLE) {
1253 1256 /*
1254 1257 * Underlying DDI open failed, but we have this
1255 1258 * device SV enabled. If we can read some data
1256 1259 * from the device, fake a successful open (this
1257 1260 * probably means that this device is RDC'd and we
1258 1261 * are getting the data from the secondary node).
1259 1262 *
1260 1263 * The reserve must be done with NSC_TRY|NSC_NOWAIT to
1261 1264 * ensure that it does not deadlock if this open is
1262 1265 * coming from nskernd:get_bsize().
1263 1266 */
1264 1267 rc = sv_reserve(svp->sv_fd,
1265 1268 NSC_TRY | NSC_NOWAIT | NSC_MULTI | NSC_PCATCH);
1266 1269 if (rc == 0) {
1267 1270 tmph = NULL;
1268 1271
1269 1272 rc = nsc_alloc_buf(svp->sv_fd, 0, 1, NSC_READ, &tmph);
1270 1273 if (rc <= 0) {
1271 1274 /* success */
1272 1275 ret = 0;
1273 1276 }
1274 1277
1275 1278 if (tmph) {
1276 1279 (void) nsc_free_buf(tmph);
1277 1280 tmph = NULL;
1278 1281 }
1279 1282
1280 1283 nsc_release(svp->sv_fd);
1281 1284
1282 1285 /*
1283 1286 * Count the number of layered opens that we
1284 1287 * fake since we have to fake a matching number
1285 1288 * of closes (OTYP_LYR open/close calls must be
1286 1289 * paired).
1287 1290 */
1288 1291
1289 1292 if (ret == 0 && otyp == OTYP_LYR) {
1290 1293 mutex_enter(&svp->sv_olock);
1291 1294 svp->sv_openlcnt++;
1292 1295 mutex_exit(&svp->sv_olock);
1293 1296 }
1294 1297 }
1295 1298 }
1296 1299
1297 1300 if (svp) {
1298 1301 rw_exit(&svp->sv_lock);
1299 1302 }
1300 1303
1301 1304 return (ret);
1302 1305 }
1303 1306
1304 1307
1305 1308 static int
1306 1309 sv_lyr_close(dev_t dev, int flag, int otyp, cred_t *crp)
1307 1310 {
1308 1311 sv_dev_t *svp;
1309 1312 sv_maj_t *maj;
1310 1313 int (*fn)();
1311 1314 int ret;
1312 1315
1313 1316 svp = sv_dev_to_sv(dev, &maj);
1314 1317
1315 1318 if (svp &&
1316 1319 svp->sv_state == SV_PENDING &&
1317 1320 svp->sv_pending == curthread) {
1318 1321 /*
1319 1322 * This is a recursive open from a call to
1320 1323 * ddi_lyr_close and so we just want
1321 1324 * to pass it straight through to the
1322 1325 * underlying driver.
1323 1326 */
1324 1327 DTRACE_PROBE2(sv_lyr_close_recursive, sv_dev_t *, svp,
1325 1328 dev_t, dev);
1326 1329 svp = NULL;
1327 1330 }
1328 1331
1329 1332 if (svp) {
1330 1333 rw_enter(&svp->sv_lock, RW_READER);
1331 1334
1332 1335 if (otyp == OTYP_LYR) {
1333 1336 mutex_enter(&svp->sv_olock);
1334 1337
1335 1338 if (svp->sv_openlcnt) {
1336 1339 /*
1337 1340 * Consume sufficient layered closes to
1338 1341 * account for the opens that we faked
1339 1342 * whilst the device was failed.
1340 1343 */
1341 1344 svp->sv_openlcnt--;
1342 1345 mutex_exit(&svp->sv_olock);
1343 1346 rw_exit(&svp->sv_lock);
1344 1347
1345 1348 DTRACE_PROBE1(sv_lyr_close_end, dev_t, dev);
1346 1349
1347 1350 return (0);
1348 1351 }
1349 1352
1350 1353 mutex_exit(&svp->sv_olock);
1351 1354 }
1352 1355 }
1353 1356
1354 1357 if (maj && (fn = maj->sm_close) != 0) {
1355 1358 if (!(maj->sm_flag & D_MP)) {
1356 1359 UNSAFE_ENTER();
1357 1360 ret = (*fn)(dev, flag, otyp, crp);
1358 1361 UNSAFE_EXIT();
1359 1362 } else {
1360 1363 ret = (*fn)(dev, flag, otyp, crp);
1361 1364 }
1362 1365 } else {
1363 1366 ret = ENODEV;
1364 1367 }
1365 1368
1366 1369 if (svp) {
1367 1370 rw_exit(&svp->sv_lock);
1368 1371 }
1369 1372
1370 1373 return (ret);
1371 1374 }
1372 1375
1373 1376
1374 1377 /*
1375 1378 * Convert the specified dev_t into a locked and enabled sv_dev_t, or
1376 1379 * return NULL.
1377 1380 */
1378 1381 static sv_dev_t *
1379 1382 sv_find_enabled(const dev_t dev, sv_maj_t **majpp)
1380 1383 {
1381 1384 sv_dev_t *svp;
1382 1385
1383 1386 while ((svp = sv_dev_to_sv(dev, majpp)) != NULL) {
1384 1387 rw_enter(&svp->sv_lock, RW_READER);
1385 1388
1386 1389 if (svp->sv_state == SV_ENABLE) {
1387 1390 /* locked and enabled */
1388 1391 break;
1389 1392 }
1390 1393
1391 1394 /*
1392 1395 * State was changed while waiting on the lock.
1393 1396 * Wait for a stable state.
1394 1397 */
1395 1398 rw_exit(&svp->sv_lock);
1396 1399
1397 1400 DTRACE_PROBE1(sv_find_enabled_retry, dev_t, dev);
1398 1401
1399 1402 delay(2);
1400 1403 }
1401 1404
1402 1405 return (svp);
1403 1406 }
1404 1407
1405 1408
1406 1409 static int
1407 1410 sv_lyr_uio(dev_t dev, uio_t *uiop, cred_t *crp, int rw)
1408 1411 {
1409 1412 sv_dev_t *svp;
1410 1413 sv_maj_t *maj;
1411 1414 int (*fn)();
1412 1415 int rc;
1413 1416
1414 1417 svp = sv_find_enabled(dev, &maj);
1415 1418 if (svp == NULL) {
1416 1419 if (maj) {
1417 1420 if (rw == NSC_READ)
1418 1421 fn = maj->sm_read;
1419 1422 else
1420 1423 fn = maj->sm_write;
1421 1424
1422 1425 if (fn != 0) {
1423 1426 if (!(maj->sm_flag & D_MP)) {
1424 1427 UNSAFE_ENTER();
1425 1428 rc = (*fn)(dev, uiop, crp);
1426 1429 UNSAFE_EXIT();
1427 1430 } else {
1428 1431 rc = (*fn)(dev, uiop, crp);
1429 1432 }
1430 1433 }
1431 1434
1432 1435 return (rc);
1433 1436 } else {
1434 1437 return (ENODEV);
1435 1438 }
1436 1439 }
1437 1440
1438 1441 ASSERT(RW_READ_HELD(&svp->sv_lock));
1439 1442
1440 1443 if (svp->sv_flag == 0) {
1441 1444 /*
1442 1445 * guard access mode
1443 1446 * - prevent user level access to the device
1444 1447 */
1445 1448 DTRACE_PROBE1(sv_lyr_uio_err_guard, uio_t *, uiop);
1446 1449 rc = EPERM;
1447 1450 goto out;
1448 1451 }
1449 1452
1450 1453 if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
1451 1454 DTRACE_PROBE1(sv_lyr_uio_err_rsrv, uio_t *, uiop);
1452 1455 goto out;
1453 1456 }
1454 1457
1455 1458 if (rw == NSC_READ)
1456 1459 rc = nsc_uread(svp->sv_fd, uiop, crp);
1457 1460 else
1458 1461 rc = nsc_uwrite(svp->sv_fd, uiop, crp);
1459 1462
1460 1463 nsc_release(svp->sv_fd);
1461 1464
1462 1465 out:
1463 1466 rw_exit(&svp->sv_lock);
1464 1467
1465 1468 return (rc);
1466 1469 }
1467 1470
1468 1471
1469 1472 static int
1470 1473 sv_lyr_read(dev_t dev, uio_t *uiop, cred_t *crp)
1471 1474 {
1472 1475 return (sv_lyr_uio(dev, uiop, crp, NSC_READ));
1473 1476 }
1474 1477
1475 1478
1476 1479 static int
1477 1480 sv_lyr_write(dev_t dev, uio_t *uiop, cred_t *crp)
1478 1481 {
1479 1482 return (sv_lyr_uio(dev, uiop, crp, NSC_WRITE));
1480 1483 }
1481 1484
1482 1485
1483 1486 /* ARGSUSED */
1484 1487
1485 1488 static int
1486 1489 sv_lyr_aread(dev_t dev, struct aio_req *aio, cred_t *crp)
1487 1490 {
1488 1491 return (aphysio(sv_lyr_strategy,
1489 1492 anocancel, dev, B_READ, minphys, aio));
1490 1493 }
1491 1494
1492 1495
1493 1496 /* ARGSUSED */
1494 1497
1495 1498 static int
1496 1499 sv_lyr_awrite(dev_t dev, struct aio_req *aio, cred_t *crp)
1497 1500 {
1498 1501 return (aphysio(sv_lyr_strategy,
1499 1502 anocancel, dev, B_WRITE, minphys, aio));
1500 1503 }
1501 1504
1502 1505
1503 1506 /*
1504 1507 * Set up an array containing the list of raw path names
1505 1508 * The array for the paths is svl and the size of the array is
1506 1509 * in size.
1507 1510 *
1508 1511 * If there are more layered devices than will fit in the array,
1509 1512 * the number of extra layered devices is returned. Otherwise
1510 1513 * zero is return.
1511 1514 *
1512 1515 * Input:
1513 1516 * svn : array for paths
1514 1517 * size : size of the array
1515 1518 *
1516 1519 * Output (extra):
1517 1520 * zero : All paths fit in array
1518 1521 * >0 : Number of defined layered devices don't fit in array
1519 1522 */
1520 1523
1521 1524 static int
1522 1525 sv_list(void *ptr, const int size, int *extra, const int ilp32)
1523 1526 {
1524 1527 sv_name32_t *svn32;
1525 1528 sv_name_t *svn;
1526 1529 sv_dev_t *svp;
1527 1530 int *mode, *nblocks;
1528 1531 int i, index;
1529 1532 char *path;
1530 1533
1531 1534 *extra = 0;
1532 1535 index = 0;
1533 1536
1534 1537 if (ilp32)
1535 1538 svn32 = ptr;
1536 1539 else
1537 1540 svn = ptr;
1538 1541
1539 1542 mutex_enter(&sv_mutex);
1540 1543 for (i = 0; i < sv_max_devices; i++) {
1541 1544 svp = &sv_devs[i];
1542 1545
1543 1546 rw_enter(&svp->sv_lock, RW_READER);
1544 1547
1545 1548 if (svp->sv_state != SV_ENABLE) {
1546 1549 rw_exit(&svp->sv_lock);
1547 1550 continue;
1548 1551 }
1549 1552
1550 1553 if ((*extra) != 0 || ptr == NULL) {
1551 1554 /* Another overflow entry */
1552 1555 rw_exit(&svp->sv_lock);
1553 1556 (*extra)++;
1554 1557 continue;
1555 1558 }
1556 1559
1557 1560 if (ilp32) {
1558 1561 nblocks = &svn32->svn_nblocks;
1559 1562 mode = &svn32->svn_mode;
1560 1563 path = svn32->svn_path;
1561 1564
1562 1565 svn32->svn_timestamp = (uint32_t)svp->sv_timestamp;
1563 1566 svn32++;
1564 1567 } else {
1565 1568 nblocks = &svn->svn_nblocks;
1566 1569 mode = &svn->svn_mode;
1567 1570 path = svn->svn_path;
1568 1571
1569 1572 svn->svn_timestamp = svp->sv_timestamp;
1570 1573 svn++;
1571 1574 }
1572 1575
1573 1576 (void) strcpy(path, nsc_pathname(svp->sv_fd));
1574 1577 *nblocks = svp->sv_nblocks;
1575 1578 *mode = svp->sv_flag;
1576 1579
1577 1580 if (*nblocks == 0) {
1578 1581 if (sv_debug > 3)
1579 1582 cmn_err(CE_CONT, "!sv_list: need to reserve\n");
1580 1583
1581 1584 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
1582 1585 *nblocks = svp->sv_nblocks;
1583 1586 nsc_release(svp->sv_fd);
1584 1587 }
1585 1588 }
1586 1589
1587 1590 if (++index >= size) {
1588 1591 /* Out of space */
1589 1592 (*extra)++;
1590 1593 }
1591 1594
1592 1595 rw_exit(&svp->sv_lock);
1593 1596 }
1594 1597 mutex_exit(&sv_mutex);
1595 1598
1596 1599 if (index < size) {
1597 1600 /* NULL terminated list */
1598 1601 if (ilp32)
1599 1602 svn32->svn_path[0] = '\0';
1600 1603 else
1601 1604 svn->svn_path[0] = '\0';
1602 1605 }
1603 1606
1604 1607 return (0);
1605 1608 }
1606 1609
1607 1610
1608 1611 static void
1609 1612 sv_thread_tune(int threads)
1610 1613 {
1611 1614 int incr = (threads > 0) ? 1 : -1;
1612 1615 int change = 0;
1613 1616 int nthreads;
1614 1617
1615 1618 ASSERT(MUTEX_HELD(&sv_mutex));
1616 1619
1617 1620 if (sv_threads_extra) {
1618 1621 /* keep track of any additional threads requested */
1619 1622 if (threads > 0) {
1620 1623 sv_threads_extra += threads;
1621 1624 return;
1622 1625 }
1623 1626 threads = -threads;
1624 1627 if (threads >= sv_threads_extra) {
1625 1628 threads -= sv_threads_extra;
1626 1629 sv_threads_extra = 0;
1627 1630 /* fall through to while loop */
1628 1631 } else {
1629 1632 sv_threads_extra -= threads;
1630 1633 return;
1631 1634 }
1632 1635 } else if (threads > 0) {
1633 1636 /*
1634 1637 * do not increase the number of threads beyond
1635 1638 * sv_threads_max when doing dynamic thread tuning
1636 1639 */
1637 1640 nthreads = nst_nthread(sv_tset);
1638 1641 if ((nthreads + threads) > sv_threads_max) {
1639 1642 sv_threads_extra = nthreads + threads - sv_threads_max;
1640 1643 threads = sv_threads_max - nthreads;
1641 1644 if (threads <= 0)
1642 1645 return;
1643 1646 }
1644 1647 }
1645 1648
1646 1649 if (threads < 0)
1647 1650 threads = -threads;
1648 1651
1649 1652 while (threads--) {
1650 1653 nthreads = nst_nthread(sv_tset);
1651 1654 sv_threads_needed += incr;
1652 1655
1653 1656 if (sv_threads_needed >= nthreads)
1654 1657 change += nst_add_thread(sv_tset, sv_threads_inc);
1655 1658 else if ((sv_threads_needed <
1656 1659 (nthreads - (sv_threads_inc + sv_threads_hysteresis))) &&
1657 1660 ((nthreads - sv_threads_inc) >= sv_threads))
1658 1661 change -= nst_del_thread(sv_tset, sv_threads_inc);
1659 1662 }
1660 1663
1661 1664 #ifdef DEBUG
1662 1665 if (change) {
1663 1666 cmn_err(CE_NOTE,
1664 1667 "!sv_thread_tune: threads needed %d, nthreads %d, "
1665 1668 "nthreads change %d",
1666 1669 sv_threads_needed, nst_nthread(sv_tset), change);
1667 1670 }
1668 1671 #endif
1669 1672 }
1670 1673
1671 1674
1672 1675 /* ARGSUSED */
1673 1676 static int
1674 1677 svopen(dev_t *devp, int flag, int otyp, cred_t *crp)
1675 1678 {
1676 1679 int rc;
1677 1680
1678 1681 mutex_enter(&sv_mutex);
1679 1682 rc = sv_init_devs();
1680 1683 mutex_exit(&sv_mutex);
1681 1684
1682 1685 return (rc);
1683 1686 }
1684 1687
1685 1688
1686 1689 /* ARGSUSED */
1687 1690 static int
1688 1691 svclose(dev_t dev, int flag, int otyp, cred_t *crp)
1689 1692 {
1690 1693 const int secs = HZ * 5;
1691 1694 const int ticks = HZ / 10;
1692 1695 int loops = secs / ticks;
1693 1696
1694 1697 mutex_enter(&sv_mutex);
1695 1698 while (sv_ndevices <= 0 && sv_tset != NULL && loops > 0) {
1696 1699 if (nst_nlive(sv_tset) <= 0) {
1697 1700 nst_destroy(sv_tset);
1698 1701 sv_tset = NULL;
1699 1702 break;
1700 1703 }
1701 1704
1702 1705 /* threads still active - wait for them to exit */
1703 1706 mutex_exit(&sv_mutex);
1704 1707 delay(ticks);
1705 1708 loops--;
1706 1709 mutex_enter(&sv_mutex);
1707 1710 }
1708 1711 mutex_exit(&sv_mutex);
1709 1712
1710 1713 if (loops <= 0) {
1711 1714 cmn_err(CE_WARN,
1712 1715 #ifndef DEBUG
1713 1716 /* do not write to console when non-DEBUG */
1714 1717 "!"
1715 1718 #endif
1716 1719 "sv:svclose: threads still active "
1717 1720 "after %d sec - leaking thread set", secs);
1718 1721 }
1719 1722
1720 1723 return (0);
1721 1724 }
1722 1725
1723 1726
1724 1727 static int
1725 1728 svioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvalp)
1726 1729 {
1727 1730 char itmp1[12], itmp2[12]; /* temp char array for editing ints */
1728 1731 spcs_s_info_t kstatus; /* Kernel version of spcs status */
1729 1732 spcs_s_info_t ustatus; /* Address of user version of spcs status */
1730 1733 sv_list32_t svl32; /* 32 bit Initial structure for SVIOC_LIST */
1731 1734 sv_version_t svv; /* Version structure */
1732 1735 sv_conf_t svc; /* User config structure */
1733 1736 sv_list_t svl; /* Initial structure for SVIOC_LIST */
1734 1737 void *usvn; /* Address of user sv_name_t */
1735 1738 void *svn = NULL; /* Array for SVIOC_LIST */
1736 1739 uint64_t phash; /* pathname hash */
1737 1740 int rc = 0; /* Return code -- errno */
1738 1741 int size; /* Number of items in array */
1739 1742 int bytes; /* Byte size of array */
1740 1743 int ilp32; /* Convert data structures for ilp32 userland */
1741 1744
1742 1745 *rvalp = 0;
1743 1746
1744 1747 /*
1745 1748 * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
1746 1749 * else it means it previously was SV_PREVENT_UNLOAD, and now it's
1747 1750 * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
1748 1751 *
1749 1752 * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
1750 1753 */
1751 1754 if (sv_mod_status == SV_ALLOW_UNLOAD) {
1752 1755 return (EBUSY);
1753 1756 }
1754 1757
1755 1758 if ((cmd != SVIOC_LIST) && ((rc = drv_priv(crp)) != 0))
1756 1759 return (rc);
1757 1760
1758 1761 kstatus = spcs_s_kcreate();
1759 1762 if (!kstatus) {
1760 1763 DTRACE_PROBE1(sv_ioctl_err_kcreate, dev_t, dev);
1761 1764 return (ENOMEM);
1762 1765 }
1763 1766
1764 1767 ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
1765 1768
1766 1769 switch (cmd) {
1767 1770
1768 1771 case SVIOC_ENABLE:
1769 1772
1770 1773 if (ilp32) {
1771 1774 sv_conf32_t svc32;
1772 1775
1773 1776 if (ddi_copyin((void *)arg, &svc32,
1774 1777 sizeof (svc32), mode) < 0) {
1775 1778 spcs_s_kfree(kstatus);
1776 1779 return (EFAULT);
1777 1780 }
1778 1781
1779 1782 svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1780 1783 (void) strcpy(svc.svc_path, svc32.svc_path);
1781 1784 svc.svc_flag = svc32.svc_flag;
1782 1785 svc.svc_major = svc32.svc_major;
1783 1786 svc.svc_minor = svc32.svc_minor;
1784 1787 } else {
1785 1788 if (ddi_copyin((void *)arg, &svc,
1786 1789 sizeof (svc), mode) < 0) {
1787 1790 spcs_s_kfree(kstatus);
1788 1791 return (EFAULT);
1789 1792 }
1790 1793 }
1791 1794
1792 1795 /* force to raw access */
1793 1796 svc.svc_flag = NSC_DEVICE;
1794 1797
1795 1798 if (sv_tset == NULL) {
1796 1799 mutex_enter(&sv_mutex);
1797 1800
1798 1801 if (sv_tset == NULL) {
1799 1802 sv_tset = nst_init("sv_thr", sv_threads);
1800 1803 }
1801 1804
1802 1805 mutex_exit(&sv_mutex);
1803 1806
1804 1807 if (sv_tset == NULL) {
1805 1808 cmn_err(CE_WARN,
1806 1809 "!sv: could not allocate %d threads",
1807 1810 sv_threads);
1808 1811 }
1809 1812 }
1810 1813
1811 1814 rc = sv_enable(svc.svc_path, svc.svc_flag,
1812 1815 makedevice(svc.svc_major, svc.svc_minor), kstatus);
1813 1816
1814 1817 if (rc == 0) {
1815 1818 sv_config_time = nsc_lbolt();
1816 1819
1817 1820 mutex_enter(&sv_mutex);
1818 1821 sv_thread_tune(sv_threads_dev);
1819 1822 mutex_exit(&sv_mutex);
1820 1823 }
1821 1824
1822 1825 DTRACE_PROBE3(sv_ioctl_end, dev_t, dev, int, *rvalp, int, rc);
1823 1826
1824 1827 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1825 1828 /* NOTREACHED */
1826 1829
1827 1830 case SVIOC_DISABLE:
1828 1831
1829 1832 if (ilp32) {
1830 1833 sv_conf32_t svc32;
1831 1834
1832 1835 if (ddi_copyin((void *)arg, &svc32,
1833 1836 sizeof (svc32), mode) < 0) {
1834 1837 spcs_s_kfree(kstatus);
1835 1838 return (EFAULT);
1836 1839 }
1837 1840
1838 1841 svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1839 1842 svc.svc_major = svc32.svc_major;
1840 1843 svc.svc_minor = svc32.svc_minor;
1841 1844 (void) strcpy(svc.svc_path, svc32.svc_path);
1842 1845 svc.svc_flag = svc32.svc_flag;
1843 1846 } else {
1844 1847 if (ddi_copyin((void *)arg, &svc,
1845 1848 sizeof (svc), mode) < 0) {
1846 1849 spcs_s_kfree(kstatus);
1847 1850 return (EFAULT);
1848 1851 }
1849 1852 }
1850 1853
1851 1854 if (svc.svc_major == (major_t)-1 &&
1852 1855 svc.svc_minor == (minor_t)-1) {
1853 1856 sv_dev_t *svp;
1854 1857 int i;
1855 1858
1856 1859 /*
1857 1860 * User level could not find the minor device
1858 1861 * node, so do this the slow way by searching
1859 1862 * the entire sv config for a matching pathname.
1860 1863 */
1861 1864
1862 1865 phash = nsc_strhash(svc.svc_path);
1863 1866
1864 1867 mutex_enter(&sv_mutex);
1865 1868
1866 1869 for (i = 0; i < sv_max_devices; i++) {
1867 1870 svp = &sv_devs[i];
1868 1871
1869 1872 if (svp->sv_state == SV_DISABLE ||
1870 1873 svp->sv_fd == NULL)
1871 1874 continue;
1872 1875
1873 1876 if (nsc_fdpathcmp(svp->sv_fd, phash,
1874 1877 svc.svc_path) == 0) {
1875 1878 svc.svc_major = getmajor(svp->sv_dev);
1876 1879 svc.svc_minor = getminor(svp->sv_dev);
1877 1880 break;
1878 1881 }
1879 1882 }
1880 1883
1881 1884 mutex_exit(&sv_mutex);
1882 1885
1883 1886 if (svc.svc_major == (major_t)-1 &&
1884 1887 svc.svc_minor == (minor_t)-1)
1885 1888 return (spcs_s_ocopyoutf(&kstatus,
1886 1889 svc.svc_error, SV_ENODEV));
1887 1890 }
1888 1891
1889 1892 rc = sv_disable(makedevice(svc.svc_major, svc.svc_minor),
1890 1893 kstatus);
1891 1894
1892 1895 if (rc == 0) {
1893 1896 sv_config_time = nsc_lbolt();
1894 1897
1895 1898 mutex_enter(&sv_mutex);
1896 1899 sv_thread_tune(-sv_threads_dev);
1897 1900 mutex_exit(&sv_mutex);
1898 1901 }
1899 1902
1900 1903 DTRACE_PROBE3(sv_ioctl_2, dev_t, dev, int, *rvalp, int, rc);
1901 1904
1902 1905 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1903 1906 /* NOTREACHED */
1904 1907
1905 1908 case SVIOC_LIST:
1906 1909
1907 1910 if (ilp32) {
1908 1911 if (ddi_copyin((void *)arg, &svl32,
1909 1912 sizeof (svl32), mode) < 0) {
1910 1913 spcs_s_kfree(kstatus);
1911 1914 return (EFAULT);
1912 1915 }
1913 1916
1914 1917 ustatus = (spcs_s_info_t)svl32.svl_error;
1915 1918 size = svl32.svl_count;
1916 1919 usvn = (void *)(unsigned long)svl32.svl_names;
1917 1920 } else {
1918 1921 if (ddi_copyin((void *)arg, &svl,
1919 1922 sizeof (svl), mode) < 0) {
1920 1923 spcs_s_kfree(kstatus);
1921 1924 return (EFAULT);
1922 1925 }
1923 1926
1924 1927 ustatus = svl.svl_error;
1925 1928 size = svl.svl_count;
1926 1929 usvn = svl.svl_names;
1927 1930 }
1928 1931
1929 1932 /* Do some boundary checking */
1930 1933 if ((size < 0) || (size > sv_max_devices)) {
1931 1934 /* Array size is out of range */
1932 1935 return (spcs_s_ocopyoutf(&kstatus, ustatus,
1933 1936 SV_EARRBOUNDS, "0",
1934 1937 spcs_s_inttostring(sv_max_devices, itmp1,
1935 1938 sizeof (itmp1), 0),
1936 1939 spcs_s_inttostring(size, itmp2,
1937 1940 sizeof (itmp2), 0)));
1938 1941 }
1939 1942
1940 1943 if (ilp32)
1941 1944 bytes = size * sizeof (sv_name32_t);
1942 1945 else
1943 1946 bytes = size * sizeof (sv_name_t);
1944 1947
1945 1948 /* Allocate memory for the array of structures */
1946 1949 if (bytes != 0) {
1947 1950 svn = kmem_zalloc(bytes, KM_SLEEP);
1948 1951 if (!svn) {
1949 1952 return (spcs_s_ocopyoutf(&kstatus,
1950 1953 ustatus, ENOMEM));
1951 1954 }
1952 1955 }
1953 1956
1954 1957 rc = sv_list(svn, size, rvalp, ilp32);
1955 1958 if (rc) {
1956 1959 if (svn != NULL)
1957 1960 kmem_free(svn, bytes);
1958 1961 return (spcs_s_ocopyoutf(&kstatus, ustatus, rc));
1959 1962 }
1960 1963
1961 1964 if (ilp32) {
1962 1965 svl32.svl_timestamp = (uint32_t)sv_config_time;
1963 1966 svl32.svl_maxdevs = (int32_t)sv_max_devices;
1964 1967
1965 1968 /* Return the list structure */
1966 1969 if (ddi_copyout(&svl32, (void *)arg,
1967 1970 sizeof (svl32), mode) < 0) {
1968 1971 spcs_s_kfree(kstatus);
1969 1972 if (svn != NULL)
1970 1973 kmem_free(svn, bytes);
1971 1974 return (EFAULT);
1972 1975 }
1973 1976 } else {
1974 1977 svl.svl_timestamp = sv_config_time;
1975 1978 svl.svl_maxdevs = sv_max_devices;
1976 1979
1977 1980 /* Return the list structure */
1978 1981 if (ddi_copyout(&svl, (void *)arg,
1979 1982 sizeof (svl), mode) < 0) {
1980 1983 spcs_s_kfree(kstatus);
1981 1984 if (svn != NULL)
1982 1985 kmem_free(svn, bytes);
1983 1986 return (EFAULT);
1984 1987 }
1985 1988 }
1986 1989
1987 1990 /* Return the array */
1988 1991 if (svn != NULL) {
1989 1992 if (ddi_copyout(svn, usvn, bytes, mode) < 0) {
1990 1993 kmem_free(svn, bytes);
1991 1994 spcs_s_kfree(kstatus);
1992 1995 return (EFAULT);
1993 1996 }
1994 1997 kmem_free(svn, bytes);
1995 1998 }
1996 1999
1997 2000 DTRACE_PROBE3(sv_ioctl_3, dev_t, dev, int, *rvalp, int, 0);
1998 2001
1999 2002 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2000 2003 /* NOTREACHED */
2001 2004
2002 2005 case SVIOC_VERSION:
2003 2006
2004 2007 if (ilp32) {
2005 2008 sv_version32_t svv32;
2006 2009
2007 2010 if (ddi_copyin((void *)arg, &svv32,
2008 2011 sizeof (svv32), mode) < 0) {
2009 2012 spcs_s_kfree(kstatus);
2010 2013 return (EFAULT);
2011 2014 }
2012 2015
2013 2016 svv32.svv_major_rev = sv_major_rev;
2014 2017 svv32.svv_minor_rev = sv_minor_rev;
2015 2018 svv32.svv_micro_rev = sv_micro_rev;
2016 2019 svv32.svv_baseline_rev = sv_baseline_rev;
2017 2020
2018 2021 if (ddi_copyout(&svv32, (void *)arg,
2019 2022 sizeof (svv32), mode) < 0) {
2020 2023 spcs_s_kfree(kstatus);
2021 2024 return (EFAULT);
2022 2025 }
2023 2026
2024 2027 ustatus = (spcs_s_info_t)svv32.svv_error;
2025 2028 } else {
2026 2029 if (ddi_copyin((void *)arg, &svv,
2027 2030 sizeof (svv), mode) < 0) {
2028 2031 spcs_s_kfree(kstatus);
2029 2032 return (EFAULT);
2030 2033 }
2031 2034
2032 2035 svv.svv_major_rev = sv_major_rev;
2033 2036 svv.svv_minor_rev = sv_minor_rev;
2034 2037 svv.svv_micro_rev = sv_micro_rev;
2035 2038 svv.svv_baseline_rev = sv_baseline_rev;
2036 2039
2037 2040 if (ddi_copyout(&svv, (void *)arg,
2038 2041 sizeof (svv), mode) < 0) {
2039 2042 spcs_s_kfree(kstatus);
2040 2043 return (EFAULT);
2041 2044 }
2042 2045
2043 2046 ustatus = svv.svv_error;
2044 2047 }
2045 2048
2046 2049 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, 0);
2047 2050
2048 2051 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2049 2052 /* NOTREACHED */
2050 2053
2051 2054 case SVIOC_UNLOAD:
2052 2055 rc = sv_prepare_unload();
2053 2056
2054 2057 if (ddi_copyout(&rc, (void *)arg, sizeof (rc), mode) < 0) {
2055 2058 rc = EFAULT;
2056 2059 }
2057 2060
2058 2061 spcs_s_kfree(kstatus);
2059 2062 return (rc);
2060 2063
2061 2064 default:
2062 2065 spcs_s_kfree(kstatus);
2063 2066
2064 2067 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, EINVAL);
2065 2068
2066 2069 return (EINVAL);
2067 2070 /* NOTREACHED */
2068 2071 }
2069 2072
2070 2073 /* NOTREACHED */
2071 2074 }
2072 2075
2073 2076
2074 2077 /* ARGSUSED */
2075 2078 static int
2076 2079 svprint(dev_t dev, char *str)
2077 2080 {
2078 2081 int instance = ddi_get_instance(sv_dip);
2079 2082 cmn_err(CE_WARN, "!%s%d: %s", ddi_get_name(sv_dip), instance, str);
2080 2083 return (0);
2081 2084 }
2082 2085
2083 2086
2084 2087 static void
2085 2088 _sv_lyr_strategy(struct buf *bp)
2086 2089 {
2087 2090 caddr_t buf_addr; /* pointer to linear buffer in bp */
2088 2091 nsc_buf_t *bufh = NULL;
2089 2092 nsc_buf_t *hndl = NULL;
2090 2093 sv_dev_t *svp;
2091 2094 nsc_vec_t *v;
2092 2095 sv_maj_t *maj;
2093 2096 nsc_size_t fba_req, fba_len; /* FBA lengths */
2094 2097 nsc_off_t fba_off; /* FBA offset */
2095 2098 size_t tocopy, nbytes; /* byte lengths */
2096 2099 int rw, rc; /* flags and return codes */
2097 2100 int (*fn)();
2098 2101
2099 2102 rc = 0;
2100 2103
2101 2104 if (sv_debug > 5)
2102 2105 cmn_err(CE_CONT, "!_sv_lyr_strategy(%p)\n", (void *)bp);
2103 2106
2104 2107 svp = sv_find_enabled(bp->b_edev, &maj);
2105 2108 if (svp == NULL) {
2106 2109 if (maj && (fn = maj->sm_strategy) != 0) {
2107 2110 if (!(maj->sm_flag & D_MP)) {
2108 2111 UNSAFE_ENTER();
2109 2112 rc = (*fn)(bp);
2110 2113 UNSAFE_EXIT();
2111 2114 } else {
2112 2115 rc = (*fn)(bp);
2113 2116 }
2114 2117 return;
2115 2118 } else {
2116 2119 bioerror(bp, ENODEV);
2117 2120 biodone(bp);
2118 2121 return;
2119 2122 }
2120 2123 }
2121 2124
2122 2125 ASSERT(RW_READ_HELD(&svp->sv_lock));
2123 2126
2124 2127 if (svp->sv_flag == 0) {
2125 2128 /*
2126 2129 * guard access mode
2127 2130 * - prevent user level access to the device
2128 2131 */
2129 2132 DTRACE_PROBE1(sv_lyr_strategy_err_guard, struct buf *, bp);
2130 2133 bioerror(bp, EPERM);
2131 2134 goto out;
2132 2135 }
2133 2136
2134 2137 if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
2135 2138 DTRACE_PROBE1(sv_lyr_strategy_err_rsrv, struct buf *, bp);
2136 2139
2137 2140 if (rc == EINTR)
2138 2141 cmn_err(CE_WARN, "!nsc_reserve() returned EINTR");
2139 2142 bioerror(bp, rc);
2140 2143 goto out;
2141 2144 }
2142 2145
2143 2146 if (bp->b_lblkno >= (diskaddr_t)svp->sv_nblocks) {
2144 2147 DTRACE_PROBE1(sv_lyr_strategy_eof, struct buf *, bp);
2145 2148
2146 2149 if (bp->b_flags & B_READ) {
2147 2150 /* return EOF, not an error */
2148 2151 bp->b_resid = bp->b_bcount;
2149 2152 bioerror(bp, 0);
2150 2153 } else
2151 2154 bioerror(bp, EINVAL);
2152 2155
2153 2156 goto done;
2154 2157 }
2155 2158
2156 2159 /*
2157 2160 * Preallocate a handle once per call to strategy.
2158 2161 * If this fails, then the nsc_alloc_buf() will allocate
2159 2162 * a temporary handle per allocation/free pair.
2160 2163 */
2161 2164
2162 2165 DTRACE_PROBE1(sv_dbg_alloch_start, sv_dev_t *, svp);
2163 2166
2164 2167 bufh = nsc_alloc_handle(svp->sv_fd, NULL, NULL, NULL);
2165 2168
2166 2169 DTRACE_PROBE1(sv_dbg_alloch_end, sv_dev_t *, svp);
2167 2170
2168 2171 if (bufh && (bufh->sb_flag & NSC_HACTIVE) != 0) {
2169 2172 DTRACE_PROBE1(sv_lyr_strategy_err_hactive, struct buf *, bp);
2170 2173
2171 2174 cmn_err(CE_WARN,
2172 2175 "!sv: allocated active handle (bufh %p, flags %x)",
2173 2176 (void *)bufh, bufh->sb_flag);
2174 2177
2175 2178 bioerror(bp, ENXIO);
2176 2179 goto done;
2177 2180 }
2178 2181
2179 2182 fba_req = FBA_LEN(bp->b_bcount);
2180 2183 if (fba_req + bp->b_lblkno > (diskaddr_t)svp->sv_nblocks)
2181 2184 fba_req = (nsc_size_t)(svp->sv_nblocks - bp->b_lblkno);
2182 2185
2183 2186 rw = (bp->b_flags & B_READ) ? NSC_READ : NSC_WRITE;
2184 2187
2185 2188 bp_mapin(bp);
2186 2189
2187 2190 bp->b_resid = bp->b_bcount;
2188 2191 buf_addr = bp->b_un.b_addr;
2189 2192 fba_off = 0;
2190 2193
2191 2194 /*
2192 2195 * fba_req - requested size of transfer in FBAs after
2193 2196 * truncation to device extent, and allowing for
2194 2197 * possible non-FBA bounded final chunk.
2195 2198 * fba_off - offset of start of chunk from start of bp in FBAs.
2196 2199 * fba_len - size of this chunk in FBAs.
2197 2200 */
2198 2201
2199 2202 loop:
2200 2203 fba_len = min(fba_req, svp->sv_maxfbas);
2201 2204 hndl = bufh;
2202 2205
2203 2206 DTRACE_PROBE4(sv_dbg_allocb_start,
2204 2207 sv_dev_t *, svp,
2205 2208 uint64_t, (uint64_t)(bp->b_lblkno + fba_off),
2206 2209 uint64_t, (uint64_t)fba_len,
2207 2210 int, rw);
2208 2211
2209 2212 rc = nsc_alloc_buf(svp->sv_fd, (nsc_off_t)(bp->b_lblkno + fba_off),
2210 2213 fba_len, rw, &hndl);
2211 2214
2212 2215 DTRACE_PROBE1(sv_dbg_allocb_end, sv_dev_t *, svp);
2213 2216
2214 2217 if (rc > 0) {
2215 2218 DTRACE_PROBE1(sv_lyr_strategy_err_alloc, struct buf *, bp);
2216 2219 bioerror(bp, rc);
2217 2220 if (hndl != bufh)
2218 2221 (void) nsc_free_buf(hndl);
2219 2222 hndl = NULL;
2220 2223 goto done;
2221 2224 }
2222 2225
2223 2226 tocopy = min(FBA_SIZE(fba_len), bp->b_resid);
2224 2227 v = hndl->sb_vec;
2225 2228
2226 2229 if (rw == NSC_WRITE && FBA_OFF(tocopy) != 0) {
2227 2230 /*
2228 2231 * Not overwriting all of the last FBA, so read in the
2229 2232 * old contents now before we overwrite it with the new
2230 2233 * data.
2231 2234 */
2232 2235
2233 2236 DTRACE_PROBE2(sv_dbg_read_start, sv_dev_t *, svp,
2234 2237 uint64_t, (uint64_t)(hndl->sb_pos + hndl->sb_len - 1));
2235 2238
2236 2239 rc = nsc_read(hndl, (hndl->sb_pos + hndl->sb_len - 1), 1, 0);
2237 2240 if (rc > 0) {
2238 2241 bioerror(bp, rc);
2239 2242 goto done;
2240 2243 }
2241 2244
2242 2245 DTRACE_PROBE1(sv_dbg_read_end, sv_dev_t *, svp);
2243 2246 }
2244 2247
2245 2248 DTRACE_PROBE1(sv_dbg_bcopy_start, sv_dev_t *, svp);
2246 2249
2247 2250 while (tocopy > 0) {
2248 2251 nbytes = min(tocopy, (nsc_size_t)v->sv_len);
2249 2252
2250 2253 if (bp->b_flags & B_READ)
2251 2254 (void) bcopy(v->sv_addr, buf_addr, nbytes);
2252 2255 else
2253 2256 (void) bcopy(buf_addr, v->sv_addr, nbytes);
2254 2257
2255 2258 bp->b_resid -= nbytes;
2256 2259 buf_addr += nbytes;
2257 2260 tocopy -= nbytes;
2258 2261 v++;
2259 2262 }
2260 2263
2261 2264 DTRACE_PROBE1(sv_dbg_bcopy_end, sv_dev_t *, svp);
2262 2265
2263 2266 if ((bp->b_flags & B_READ) == 0) {
2264 2267 DTRACE_PROBE3(sv_dbg_write_start, sv_dev_t *, svp,
2265 2268 uint64_t, (uint64_t)hndl->sb_pos,
2266 2269 uint64_t, (uint64_t)hndl->sb_len);
2267 2270
2268 2271 rc = nsc_write(hndl, hndl->sb_pos, hndl->sb_len, 0);
2269 2272
2270 2273 DTRACE_PROBE1(sv_dbg_write_end, sv_dev_t *, svp);
2271 2274
2272 2275 if (rc > 0) {
2273 2276 bioerror(bp, rc);
2274 2277 goto done;
2275 2278 }
2276 2279 }
2277 2280
2278 2281 /*
2279 2282 * Adjust FBA offset and requested (ie. remaining) length,
2280 2283 * loop if more data to transfer.
2281 2284 */
2282 2285
2283 2286 fba_off += fba_len;
2284 2287 fba_req -= fba_len;
2285 2288
2286 2289 if (fba_req > 0) {
2287 2290 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2288 2291
2289 2292 rc = nsc_free_buf(hndl);
2290 2293
2291 2294 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2292 2295
2293 2296 if (rc > 0) {
2294 2297 DTRACE_PROBE1(sv_lyr_strategy_err_free,
2295 2298 struct buf *, bp);
2296 2299 bioerror(bp, rc);
2297 2300 }
2298 2301
2299 2302 hndl = NULL;
2300 2303
2301 2304 if (rc <= 0)
2302 2305 goto loop;
2303 2306 }
2304 2307
2305 2308 done:
2306 2309 if (hndl != NULL) {
2307 2310 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2308 2311
2309 2312 rc = nsc_free_buf(hndl);
2310 2313
2311 2314 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2312 2315
2313 2316 if (rc > 0) {
2314 2317 DTRACE_PROBE1(sv_lyr_strategy_err_free,
2315 2318 struct buf *, bp);
2316 2319 bioerror(bp, rc);
2317 2320 }
2318 2321
2319 2322 hndl = NULL;
2320 2323 }
2321 2324
2322 2325 if (bufh)
2323 2326 (void) nsc_free_handle(bufh);
2324 2327
2325 2328 DTRACE_PROBE1(sv_dbg_rlse_start, sv_dev_t *, svp);
2326 2329
2327 2330 nsc_release(svp->sv_fd);
2328 2331
2329 2332 DTRACE_PROBE1(sv_dbg_rlse_end, sv_dev_t *, svp);
2330 2333
2331 2334 out:
2332 2335 if (sv_debug > 5) {
2333 2336 cmn_err(CE_CONT,
2334 2337 "!_sv_lyr_strategy: bp %p, bufh %p, bp->b_error %d\n",
2335 2338 (void *)bp, (void *)bufh, bp->b_error);
2336 2339 }
2337 2340
2338 2341 DTRACE_PROBE2(sv_lyr_strategy_end, struct buf *, bp, int, bp->b_error);
2339 2342
2340 2343 rw_exit(&svp->sv_lock);
2341 2344 biodone(bp);
2342 2345 }
2343 2346
2344 2347
2345 2348 static void
2346 2349 sv_async_strategy(blind_t arg)
2347 2350 {
2348 2351 struct buf *bp = (struct buf *)arg;
2349 2352 _sv_lyr_strategy(bp);
2350 2353 }
2351 2354
2352 2355
2353 2356 static int
2354 2357 sv_lyr_strategy(struct buf *bp)
2355 2358 {
2356 2359 nsthread_t *tp;
2357 2360 int nlive;
2358 2361
2359 2362 /*
2360 2363 * If B_ASYNC was part of the DDI we could use it as a hint to
2361 2364 * not create a thread for synchronous i/o.
2362 2365 */
2363 2366 if (sv_dev_to_sv(bp->b_edev, NULL) == NULL) {
2364 2367 /* not sv enabled - just pass through */
2365 2368 DTRACE_PROBE1(sv_lyr_strategy_notsv, struct buf *, bp);
2366 2369 _sv_lyr_strategy(bp);
2367 2370 return (0);
2368 2371 }
2369 2372
2370 2373 if (sv_debug > 4) {
2371 2374 cmn_err(CE_CONT, "!sv_lyr_strategy: nthread %d nlive %d\n",
2372 2375 nst_nthread(sv_tset), nst_nlive(sv_tset));
2373 2376 }
2374 2377
2375 2378 /*
2376 2379 * If there are only guard devices enabled there
2377 2380 * won't be a threadset, so don't try and use it.
2378 2381 */
2379 2382 tp = NULL;
2380 2383 if (sv_tset != NULL) {
2381 2384 tp = nst_create(sv_tset, sv_async_strategy, (blind_t)bp, 0);
2382 2385 }
2383 2386
2384 2387 if (tp == NULL) {
2385 2388 /*
2386 2389 * out of threads, so fall back to synchronous io.
2387 2390 */
2388 2391 if (sv_debug > 0) {
2389 2392 cmn_err(CE_CONT,
2390 2393 "!sv_lyr_strategy: thread alloc failed\n");
2391 2394 }
2392 2395
2393 2396 DTRACE_PROBE1(sv_lyr_strategy_no_thread,
2394 2397 struct buf *, bp);
2395 2398
2396 2399 _sv_lyr_strategy(bp);
2397 2400 sv_no_threads++;
2398 2401 } else {
2399 2402 nlive = nst_nlive(sv_tset);
2400 2403 if (nlive > sv_max_nlive) {
2401 2404 if (sv_debug > 0) {
2402 2405 cmn_err(CE_CONT,
2403 2406 "!sv_lyr_strategy: "
2404 2407 "new max nlive %d (nthread %d)\n",
2405 2408 nlive, nst_nthread(sv_tset));
2406 2409 }
2407 2410
2408 2411 sv_max_nlive = nlive;
2409 2412 }
2410 2413 }
2411 2414
2412 2415 return (0);
2413 2416 }
2414 2417
2415 2418
2416 2419 #ifndef offsetof
2417 2420 #define offsetof(s, m) ((size_t)(&((s *)0)->m))
2418 2421 #endif
2419 2422
2420 2423 /*
2421 2424 * re-write the size of the current partition
2422 2425 */
2423 2426 static int
2424 2427 sv_fix_dkiocgvtoc(const intptr_t arg, const int mode, sv_dev_t *svp)
2425 2428 {
2426 2429 size_t offset;
2427 2430 int ilp32;
2428 2431 int pnum;
2429 2432 int rc;
2430 2433
2431 2434 ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
2432 2435
2433 2436 rc = nskern_partition(svp->sv_dev, &pnum);
2434 2437 if (rc != 0) {
2435 2438 return (rc);
2436 2439 }
2437 2440
2438 2441 if (pnum < 0 || pnum >= V_NUMPAR) {
2439 2442 cmn_err(CE_WARN,
2440 2443 "!sv_gvtoc: unable to determine partition number "
2441 2444 "for dev %lx", svp->sv_dev);
2442 2445 return (EINVAL);
2443 2446 }
2444 2447
2445 2448 if (ilp32) {
2446 2449 int32_t p_size;
2447 2450
2448 2451 #ifdef _SunOS_5_6
2449 2452 offset = offsetof(struct vtoc, v_part);
2450 2453 offset += sizeof (struct partition) * pnum;
2451 2454 offset += offsetof(struct partition, p_size);
2452 2455 #else
2453 2456 offset = offsetof(struct vtoc32, v_part);
2454 2457 offset += sizeof (struct partition32) * pnum;
2455 2458 offset += offsetof(struct partition32, p_size);
2456 2459 #endif
2457 2460
2458 2461 p_size = (int32_t)svp->sv_nblocks;
2459 2462 if (p_size == 0) {
2460 2463 if (sv_reserve(svp->sv_fd,
2461 2464 NSC_MULTI|NSC_PCATCH) == 0) {
2462 2465 p_size = (int32_t)svp->sv_nblocks;
2463 2466 nsc_release(svp->sv_fd);
2464 2467 } else {
2465 2468 rc = EINTR;
2466 2469 }
2467 2470 }
2468 2471
2469 2472 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2470 2473 sizeof (p_size), mode) != 0) {
2471 2474 rc = EFAULT;
2472 2475 }
2473 2476 } else {
2474 2477 long p_size;
2475 2478
2476 2479 offset = offsetof(struct vtoc, v_part);
2477 2480 offset += sizeof (struct partition) * pnum;
2478 2481 offset += offsetof(struct partition, p_size);
2479 2482
2480 2483 p_size = (long)svp->sv_nblocks;
2481 2484 if (p_size == 0) {
2482 2485 if (sv_reserve(svp->sv_fd,
2483 2486 NSC_MULTI|NSC_PCATCH) == 0) {
2484 2487 p_size = (long)svp->sv_nblocks;
2485 2488 nsc_release(svp->sv_fd);
2486 2489 } else {
2487 2490 rc = EINTR;
2488 2491 }
2489 2492 }
2490 2493
2491 2494 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2492 2495 sizeof (p_size), mode) != 0) {
2493 2496 rc = EFAULT;
2494 2497 }
2495 2498 }
2496 2499
2497 2500 return (rc);
2498 2501 }
2499 2502
2500 2503
2501 2504 #ifdef DKIOCPARTITION
2502 2505 /*
2503 2506 * re-write the size of the current partition
2504 2507 *
2505 2508 * arg is dk_efi_t.
2506 2509 *
2507 2510 * dk_efi_t->dki_data = (void *)(uintptr_t)efi.dki_data_64;
2508 2511 *
2509 2512 * dk_efi_t->dki_data --> efi_gpt_t (label header)
2510 2513 * dk_efi_t->dki_data + 1 --> efi_gpe_t[] (array of partitions)
2511 2514 *
2512 2515 * efi_gpt_t->efi_gpt_PartitionEntryArrayCRC32 --> CRC32 of array of parts
2513 2516 * efi_gpt_t->efi_gpt_HeaderCRC32 --> CRC32 of header itself
2514 2517 *
2515 2518 * This assumes that sizeof (efi_gpt_t) is the same as the size of a
2516 2519 * logical block on the disk.
2517 2520 *
2518 2521 * Everything is little endian (i.e. disk format).
2519 2522 */
2520 2523 static int
2521 2524 sv_fix_dkiocgetefi(const intptr_t arg, const int mode, sv_dev_t *svp)
2522 2525 {
2523 2526 dk_efi_t efi;
2524 2527 efi_gpt_t gpt;
2525 2528 efi_gpe_t *gpe = NULL;
2526 2529 size_t sgpe;
2527 2530 uint64_t p_size; /* virtual partition size from nsctl */
2528 2531 uint32_t crc;
2529 2532 int unparts; /* number of parts in user's array */
2530 2533 int pnum;
2531 2534 int rc;
2532 2535
2533 2536 rc = nskern_partition(svp->sv_dev, &pnum);
2534 2537 if (rc != 0) {
2535 2538 return (rc);
2536 2539 }
2537 2540
2538 2541 if (pnum < 0) {
2539 2542 cmn_err(CE_WARN,
2540 2543 "!sv_efi: unable to determine partition number for dev %lx",
2541 2544 svp->sv_dev);
2542 2545 return (EINVAL);
2543 2546 }
2544 2547
2545 2548 if (ddi_copyin((void *)arg, &efi, sizeof (efi), mode)) {
2546 2549 return (EFAULT);
2547 2550 }
2548 2551
2549 2552 efi.dki_data = (void *)(uintptr_t)efi.dki_data_64;
2550 2553
2551 2554 if (efi.dki_length < sizeof (gpt) + sizeof (gpe)) {
2552 2555 return (EINVAL);
2553 2556 }
2554 2557
2555 2558 if (ddi_copyin((void *)efi.dki_data, &gpt, sizeof (gpt), mode)) {
2556 2559 rc = EFAULT;
2557 2560 goto out;
2558 2561 }
2559 2562
2560 2563 if ((unparts = LE_32(gpt.efi_gpt_NumberOfPartitionEntries)) == 0)
2561 2564 unparts = 1;
2562 2565 else if (pnum >= unparts) {
2563 2566 cmn_err(CE_WARN,
2564 2567 "!sv_efi: partition# beyond end of user array (%d >= %d)",
2565 2568 pnum, unparts);
2566 2569 return (EINVAL);
2567 2570 }
2568 2571
2569 2572 sgpe = sizeof (*gpe) * unparts;
2570 2573 gpe = kmem_alloc(sgpe, KM_SLEEP);
2571 2574
2572 2575 if (ddi_copyin((void *)(efi.dki_data + 1), gpe, sgpe, mode)) {
2573 2576 rc = EFAULT;
2574 2577 goto out;
2575 2578 }
2576 2579
2577 2580 p_size = svp->sv_nblocks;
2578 2581 if (p_size == 0) {
2579 2582 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2580 2583 p_size = (diskaddr_t)svp->sv_nblocks;
2581 2584 nsc_release(svp->sv_fd);
2582 2585 } else {
2583 2586 rc = EINTR;
2584 2587 }
2585 2588 }
2586 2589
2587 2590 gpe[pnum].efi_gpe_EndingLBA = LE_64(
2588 2591 LE_64(gpe[pnum].efi_gpe_StartingLBA) + p_size - 1);
2589 2592
2590 2593 gpt.efi_gpt_PartitionEntryArrayCRC32 = 0;
2591 2594 CRC32(crc, gpe, sgpe, -1U, sv_crc32_table);
2592 2595 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
2593 2596
2594 2597 gpt.efi_gpt_HeaderCRC32 = 0;
2595 2598 CRC32(crc, &gpt, sizeof (gpt), -1U, sv_crc32_table);
2596 2599 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
2597 2600
2598 2601 if ((rc == 0) && ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), mode)) {
2599 2602 rc = EFAULT;
2600 2603 goto out;
2601 2604 }
2602 2605
2603 2606 if ((rc == 0) && ddi_copyout(gpe, efi.dki_data + 1, sgpe, mode)) {
2604 2607 rc = EFAULT;
2605 2608 goto out;
2606 2609 }
2607 2610
2608 2611 out:
2609 2612 if (gpe) {
2610 2613 kmem_free(gpe, sgpe);
2611 2614 }
2612 2615
2613 2616 return (rc);
2614 2617 }
2615 2618
2616 2619
2617 2620 /*
2618 2621 * Re-write the size of the partition specified by p_partno
2619 2622 *
2620 2623 * Note that if a DKIOCPARTITION is issued to an fd opened against a
2621 2624 * non-sv'd device, but p_partno requests the size for a different
2622 2625 * device that is sv'd, this function will *not* be called as sv is
2623 2626 * not interposed on the original device (the fd).
2624 2627 *
2625 2628 * It would not be easy to change this as we cannot get the partition
2626 2629 * number for the non-sv'd device, so cannot compute the dev_t of the
2627 2630 * (sv'd) p_partno device, and so cannot find out if it is sv'd or get
2628 2631 * its size from nsctl.
2629 2632 *
2630 2633 * See also the "Bug 4755783" comment in sv_lyr_ioctl().
2631 2634 */
2632 2635 static int
2633 2636 sv_fix_dkiocpartition(const intptr_t arg, const int mode, sv_dev_t *svp)
2634 2637 {
2635 2638 struct partition64 p64;
2636 2639 sv_dev_t *nsvp = NULL;
2637 2640 diskaddr_t p_size;
2638 2641 minor_t nminor;
2639 2642 int pnum, rc;
2640 2643 dev_t ndev;
2641 2644
2642 2645 rc = nskern_partition(svp->sv_dev, &pnum);
2643 2646 if (rc != 0) {
2644 2647 return (rc);
2645 2648 }
2646 2649
2647 2650 if (ddi_copyin((void *)arg, &p64, sizeof (p64), mode)) {
2648 2651 return (EFAULT);
2649 2652 }
2650 2653
2651 2654 if (p64.p_partno != pnum) {
2652 2655 /* switch to requested partition, not the current one */
2653 2656 nminor = getminor(svp->sv_dev) + (p64.p_partno - pnum);
2654 2657 ndev = makedevice(getmajor(svp->sv_dev), nminor);
2655 2658 nsvp = sv_find_enabled(ndev, NULL);
2656 2659 if (nsvp == NULL) {
2657 2660 /* not sv device - just return */
2658 2661 return (0);
2659 2662 }
2660 2663
2661 2664 svp = nsvp;
2662 2665 }
2663 2666
2664 2667 p_size = svp->sv_nblocks;
2665 2668 if (p_size == 0) {
2666 2669 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2667 2670 p_size = (diskaddr_t)svp->sv_nblocks;
2668 2671 nsc_release(svp->sv_fd);
2669 2672 } else {
2670 2673 rc = EINTR;
2671 2674 }
2672 2675 }
2673 2676
2674 2677 if (nsvp != NULL) {
2675 2678 rw_exit(&nsvp->sv_lock);
2676 2679 }
2677 2680
2678 2681 if ((rc == 0) && ddi_copyout(&p_size,
2679 2682 (void *)(arg + offsetof(struct partition64, p_size)),
2680 2683 sizeof (p_size), mode) != 0) {
2681 2684 return (EFAULT);
2682 2685 }
2683 2686
2684 2687 return (rc);
2685 2688 }
2686 2689 #endif /* DKIOCPARTITION */
2687 2690
2688 2691
2689 2692 static int
2690 2693 sv_lyr_ioctl(const dev_t dev, const int cmd, const intptr_t arg,
2691 2694 const int mode, cred_t *crp, int *rvalp)
2692 2695 {
2693 2696 sv_dev_t *svp;
2694 2697 sv_maj_t *maj;
2695 2698 int (*fn)();
2696 2699 int rc = 0;
2697 2700
2698 2701 maj = 0;
2699 2702 fn = 0;
2700 2703
2701 2704 /*
2702 2705 * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
2703 2706 * else it means it previously was SV_PREVENT_UNLOAD, and now it's
2704 2707 * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
2705 2708 *
2706 2709 * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
2707 2710 */
2708 2711 if (sv_mod_status == SV_ALLOW_UNLOAD) {
2709 2712 return (EBUSY);
2710 2713 }
2711 2714
2712 2715 svp = sv_find_enabled(dev, &maj);
2713 2716 if (svp != NULL) {
2714 2717 if (nskernd_isdaemon()) {
2715 2718 /*
2716 2719 * This is nskernd which always needs to see
2717 2720 * the underlying disk device accurately.
2718 2721 *
2719 2722 * So just pass the ioctl straight through
2720 2723 * to the underlying driver as though the device
2721 2724 * was not sv enabled.
2722 2725 */
2723 2726 DTRACE_PROBE2(sv_lyr_ioctl_nskernd, sv_dev_t *, svp,
2724 2727 dev_t, dev);
2725 2728
2726 2729 rw_exit(&svp->sv_lock);
2727 2730 svp = NULL;
2728 2731 } else {
2729 2732 ASSERT(RW_READ_HELD(&svp->sv_lock));
2730 2733 }
2731 2734 }
2732 2735
2733 2736 /*
2734 2737 * We now have a locked and enabled SV device, or a non-SV device.
2735 2738 */
2736 2739
2737 2740 switch (cmd) {
2738 2741 /*
2739 2742 * DKIOCGVTOC, DKIOCSVTOC, DKIOCPARTITION, DKIOCGETEFI
2740 2743 * and DKIOCSETEFI are intercepted and faked up as some
2741 2744 * i/o providers emulate volumes of a different size to
2742 2745 * the underlying volume.
2743 2746 *
2744 2747 * Setting the size by rewriting the vtoc is not permitted.
2745 2748 */
2746 2749
2747 2750 case DKIOCSVTOC:
2748 2751 #ifdef DKIOCPARTITION
2749 2752 case DKIOCSETEFI:
2750 2753 #endif
2751 2754 if (svp == NULL) {
2752 2755 /* not intercepted -- allow ioctl through */
2753 2756 break;
2754 2757 }
2755 2758
2756 2759 rw_exit(&svp->sv_lock);
2757 2760
2758 2761 DTRACE_PROBE2(sv_lyr_ioctl_svtoc, dev_t, dev, int, EPERM);
2759 2762
2760 2763 return (EPERM);
2761 2764
2762 2765 default:
2763 2766 break;
2764 2767 }
2765 2768
2766 2769 /*
2767 2770 * Pass through the real ioctl command.
2768 2771 */
2769 2772
2770 2773 if (maj && (fn = maj->sm_ioctl) != 0) {
2771 2774 if (!(maj->sm_flag & D_MP)) {
2772 2775 UNSAFE_ENTER();
2773 2776 rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2774 2777 UNSAFE_EXIT();
2775 2778 } else {
2776 2779 rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2777 2780 }
2778 2781 } else {
2779 2782 rc = ENODEV;
2780 2783 }
2781 2784
2782 2785 /*
2783 2786 * Bug 4755783
2784 2787 * Fix up the size of the current partition to allow
2785 2788 * for the virtual volume to be a different size to the
2786 2789 * physical volume (e.g. for II compact dependent shadows).
2787 2790 *
2788 2791 * Note that this only attempts to fix up the current partition
2789 2792 * - the one that the ioctl was issued against. There could be
2790 2793 * other sv'd partitions in the same vtoc, but we cannot tell
2791 2794 * so we don't attempt to fix them up.
2792 2795 */
2793 2796
2794 2797 if (svp != NULL && rc == 0) {
2795 2798 switch (cmd) {
2796 2799 case DKIOCGVTOC:
2797 2800 rc = sv_fix_dkiocgvtoc(arg, mode, svp);
2798 2801 break;
2799 2802
2800 2803 #ifdef DKIOCPARTITION
2801 2804 case DKIOCGETEFI:
2802 2805 rc = sv_fix_dkiocgetefi(arg, mode, svp);
2803 2806 break;
2804 2807
2805 2808 case DKIOCPARTITION:
2806 2809 rc = sv_fix_dkiocpartition(arg, mode, svp);
2807 2810 break;
2808 2811 #endif /* DKIOCPARTITION */
2809 2812 }
2810 2813 }
2811 2814
2812 2815 if (svp != NULL) {
2813 2816 rw_exit(&svp->sv_lock);
2814 2817 }
2815 2818
2816 2819 return (rc);
2817 2820 }
|
↓ open down ↓ |
1568 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX