1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Storage Volume Character and Block Driver (SV)
  28  *
  29  * This driver implements a simplistic /dev/{r}dsk/ interface to a
  30  * specified disk volume that is otherwise managed by the Prism
  31  * software.  The SV driver layers itself onto the underlying disk
  32  * device driver by changing function pointers in the cb_ops
  33  * structure.
  34  *
  35  * CONFIGURATION:
  36  *
  37  * 1. Configure the driver using the svadm utility.
  38  * 2. Access the device as before through /dev/rdsk/c?t?d?s?
  39  *
  40  * LIMITATIONS:
  41  *
  42  * This driver should NOT be used to share a device between another
  43  * DataServices user interface module (e.g., STE) and a user accessing
  44  * the device through the block device in O_WRITE mode.  This is because
  45  * writes through the block device are asynchronous (due to the page
  46  * cache) and so consistency between the block device user and the
  47  * STE user cannot be guaranteed.
  48  *
  49  * Data is copied between system struct buf(9s) and nsc_vec_t.  This is
  50  * wasteful and slow.
  51  */
  52 
  53 #include <sys/debug.h>
  54 #include <sys/types.h>
  55 
  56 #include <sys/ksynch.h>
  57 #include <sys/kmem.h>
  58 #include <sys/errno.h>
  59 #include <sys/varargs.h>
  60 #include <sys/file.h>
  61 #include <sys/open.h>
  62 #include <sys/conf.h>
  63 #include <sys/cred.h>
  64 #include <sys/buf.h>
  65 #include <sys/uio.h>
  66 #ifndef DS_DDICT
  67 #include <sys/pathname.h>
  68 #endif
  69 #include <sys/aio_req.h>
  70 #include <sys/dkio.h>
  71 #include <sys/vtoc.h>
  72 #include <sys/cmn_err.h>
  73 #include <sys/modctl.h>
  74 #include <sys/ddi.h>
  75 #include <sys/sunddi.h>
  76 #include <sys/sunldi.h>
  77 #include <sys/nsctl/nsvers.h>
  78 
  79 #include <sys/nsc_thread.h>
  80 #include <sys/unistat/spcs_s.h>
  81 #include <sys/unistat/spcs_s_k.h>
  82 #include <sys/unistat/spcs_errors.h>
  83 
  84 #ifdef DS_DDICT
  85 #include "../contract.h"
  86 #endif
  87 
  88 #include "../nsctl.h"
  89 
  90 
  91 #include <sys/sdt.h>              /* dtrace is S10 or later */
  92 
  93 #include "sv.h"
  94 #include "sv_impl.h"
  95 #include "sv_efi.h"
  96 
  97 #define MAX_EINTR_COUNT 1000
  98 
  99 /*
 100  * sv_mod_status
 101  */
 102 #define SV_PREVENT_UNLOAD 1
 103 #define SV_ALLOW_UNLOAD 2
 104 
 105 static const int sv_major_rev = ISS_VERSION_MAJ;        /* Major number */
 106 static const int sv_minor_rev = ISS_VERSION_MIN;        /* Minor number */
 107 static const int sv_micro_rev = ISS_VERSION_MIC;        /* Micro number */
 108 static const int sv_baseline_rev = ISS_VERSION_NUM;     /* Baseline number */
 109 
 110 #ifdef DKIOCPARTITION
 111 /*
 112  * CRC32 polynomial table needed for computing the checksums
 113  * in an EFI vtoc.
 114  */
 115 static const uint32_t sv_crc32_table[256] = { CRC32_TABLE };
 116 #endif
 117 
 118 static clock_t sv_config_time;          /* Time of successful {en,dis}able */
 119 static int sv_debug;                    /* Set non-zero for debug to syslog */
 120 static int sv_mod_status;               /* Set to prevent modunload */
 121 
 122 static dev_info_t *sv_dip;              /* Single DIP for driver */
 123 static kmutex_t sv_mutex;               /* Protect global lists, etc. */
 124 
 125 static nsc_mem_t        *sv_mem;        /* nsctl memory allocator token */
 126 
 127 
 128 /*
 129  * Per device and per major state.
 130  */
 131 
 132 #ifndef _SunOS_5_6
 133 #define UNSAFE_ENTER()
 134 #define UNSAFE_EXIT()
 135 #else
 136 #define UNSAFE_ENTER()  mutex_enter(&unsafe_driver)
 137 #define UNSAFE_EXIT()   mutex_exit(&unsafe_driver)
 138 #endif
 139 
 140                                         /* hash table of major dev structures */
 141 static sv_maj_t *sv_majors[SV_MAJOR_HASH_CNT] = {0};
 142 static sv_dev_t *sv_devs;               /* array of per device structures */
 143 static int sv_max_devices;              /* SV version of nsc_max_devices() */
 144 static int sv_ndevices;                 /* number of SV enabled devices */
 145 
 146 /*
 147  * Threading.
 148  */
 149 
 150 int sv_threads_max = 1024;              /* maximum # to dynamically alloc */
 151 int sv_threads = 32;                    /* # to pre-allocate (see sv.conf) */
 152 int sv_threads_extra = 0;               /* addl # we would have alloc'ed */
 153 
 154 static nstset_t *sv_tset;               /* the threadset pointer */
 155 
 156 static int sv_threads_hysteresis = 4;   /* hysteresis for threadset resizing */
 157 static int sv_threads_dev = 2;          /* # of threads to alloc per device */
 158 static int sv_threads_inc = 8;          /* increment for changing the set */
 159 static int sv_threads_needed;           /* number of threads needed */
 160 static int sv_no_threads;               /* number of nsc_create errors */
 161 static int sv_max_nlive;                /* max number of threads running */
 162 
 163 
 164 
 165 /*
 166  * nsctl fd callbacks.
 167  */
 168 
 169 static int svattach_fd(blind_t);
 170 static int svdetach_fd(blind_t);
 171 
 172 static nsc_def_t sv_fd_def[] = {
 173         { "Attach",     (uintptr_t)svattach_fd, },
 174         { "Detach",     (uintptr_t)svdetach_fd, },
 175         { 0, 0, }
 176 };
 177 
 178 /*
 179  * cb_ops functions.
 180  */
 181 
 182 static int svopen(dev_t *, int, int, cred_t *);
 183 static int svclose(dev_t, int, int, cred_t *);
 184 static int svioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 185 static int svprint(dev_t, char *);
 186 
 187 /*
 188  * These next functions are layered into the underlying driver's devops.
 189  */
 190 
 191 static int sv_lyr_open(dev_t *, int, int, cred_t *);
 192 static int sv_lyr_close(dev_t, int, int, cred_t *);
 193 static int sv_lyr_strategy(struct buf *);
 194 static int sv_lyr_read(dev_t, struct uio *, cred_t *);
 195 static int sv_lyr_write(dev_t, struct uio *, cred_t *);
 196 static int sv_lyr_aread(dev_t, struct aio_req *, cred_t *);
 197 static int sv_lyr_awrite(dev_t, struct aio_req *, cred_t *);
 198 static int sv_lyr_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 199 
 200 static struct cb_ops sv_cb_ops = {
 201         svopen,         /* open */
 202         svclose,        /* close */
 203         nulldev,        /* strategy */
 204         svprint,
 205         nodev,          /* dump */
 206         nodev,          /* read */
 207         nodev,          /* write */
 208         svioctl,
 209         nodev,          /* devmap */
 210         nodev,          /* mmap */
 211         nodev,          /* segmap */
 212         nochpoll,       /* poll */
 213         ddi_prop_op,
 214         NULL,           /* NOT a stream */
 215         D_NEW | D_MP | D_64BIT,
 216         CB_REV,
 217         nodev,          /* aread */
 218         nodev,          /* awrite */
 219 };
 220 
 221 
 222 /*
 223  * dev_ops functions.
 224  */
 225 
 226 static int sv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 227 static int sv_attach(dev_info_t *, ddi_attach_cmd_t);
 228 static int sv_detach(dev_info_t *, ddi_detach_cmd_t);
 229 
 230 static struct dev_ops sv_ops = {
 231         DEVO_REV,
 232         0,
 233         sv_getinfo,
 234         nulldev,        /* identify */
 235         nulldev,        /* probe */
 236         sv_attach,
 237         sv_detach,
 238         nodev,          /* reset */
 239         &sv_cb_ops,
 240         (struct bus_ops *)0
 241 };
 242 
 243 /*
 244  * Module linkage.
 245  */
 246 
 247 extern struct mod_ops mod_driverops;
 248 
 249 static struct modldrv modldrv = {
 250         &mod_driverops,
 251         "nws:Storage Volume:" ISS_VERSION_STR,
 252         &sv_ops
 253 };
 254 
 255 static struct modlinkage modlinkage = {
 256         MODREV_1,
 257         &modldrv,
 258         0
 259 };
 260 
 261 
 262 int
 263 _init(void)
 264 {
 265         int error;
 266 
 267         mutex_init(&sv_mutex, NULL, MUTEX_DRIVER, NULL);
 268 
 269         if ((error = mod_install(&modlinkage)) != 0) {
 270                 mutex_destroy(&sv_mutex);
 271                 return (error);
 272         }
 273 
 274 #ifdef DEBUG
 275         cmn_err(CE_CONT, "!sv (revision %d.%d.%d.%d, %s, %s)\n",
 276             sv_major_rev, sv_minor_rev, sv_micro_rev, sv_baseline_rev,
 277             ISS_VERSION_STR, BUILD_DATE_STR);
 278 #else
 279         if (sv_micro_rev) {
 280                 cmn_err(CE_CONT, "!sv (revision %d.%d.%d, %s, %s)\n",
 281                     sv_major_rev, sv_minor_rev, sv_micro_rev,
 282                     ISS_VERSION_STR, BUILD_DATE_STR);
 283         } else {
 284                 cmn_err(CE_CONT, "!sv (revision %d.%d, %s, %s)\n",
 285                     sv_major_rev, sv_minor_rev,
 286                     ISS_VERSION_STR, BUILD_DATE_STR);
 287         }
 288 #endif
 289 
 290         return (error);
 291 }
 292 
 293 
 294 int
 295 _fini(void)
 296 {
 297         int error;
 298 
 299         if ((error = mod_remove(&modlinkage)) != 0)
 300                 return (error);
 301 
 302         mutex_destroy(&sv_mutex);
 303 
 304         return (error);
 305 }
 306 
 307 
 308 int
 309 _info(struct modinfo *modinfop)
 310 {
 311         return (mod_info(&modlinkage, modinfop));
 312 }
 313 
 314 
 315 /*
 316  * Locking & State.
 317  *
 318  * sv_mutex protects config information - sv_maj_t and sv_dev_t lists;
 319  * threadset creation and sizing; sv_ndevices.
 320  *
 321  * If we need to hold both sv_mutex and sv_lock, then the sv_mutex
 322  * must be acquired first.
 323  *
 324  * sv_lock protects the sv_dev_t structure for an individual device.
 325  *
 326  * sv_olock protects the otyp/open members of the sv_dev_t.  If we need
 327  * to hold both sv_lock and sv_olock, then the sv_lock must be acquired
 328  * first.
 329  *
 330  * nsc_reserve/nsc_release are used in NSC_MULTI mode to allow multiple
 331  * I/O operations to a device simultaneously, as above.
 332  *
 333  * All nsc_open/nsc_close/nsc_reserve/nsc_release operations that occur
 334  * with sv_lock write-locked must be done with (sv_state == SV_PENDING)
 335  * and (sv_pending == curthread) so that any recursion through
 336  * sv_lyr_open/sv_lyr_close can be detected.
 337  */
 338 
 339 
 340 static int
 341 sv_init_devs(void)
 342 {
 343         int i;
 344 
 345         ASSERT(MUTEX_HELD(&sv_mutex));
 346 
 347         if (sv_max_devices > 0)
 348                 return (0);
 349 
 350         sv_max_devices = nsc_max_devices();
 351 
 352         if (sv_max_devices <= 0) {
 353                 /* nsctl is not attached (nskernd not running) */
 354                 if (sv_debug > 0)
 355                         cmn_err(CE_CONT, "!sv: nsc_max_devices = 0\n");
 356                 return (EAGAIN);
 357         }
 358 
 359         sv_devs = nsc_kmem_zalloc((sv_max_devices * sizeof (*sv_devs)),
 360             KM_NOSLEEP, sv_mem);
 361 
 362         if (sv_devs == NULL) {
 363                 cmn_err(CE_WARN, "!sv: could not allocate sv_devs array");
 364                 return (ENOMEM);
 365         }
 366 
 367         for (i = 0; i < sv_max_devices; i++) {
 368                 mutex_init(&sv_devs[i].sv_olock, NULL, MUTEX_DRIVER, NULL);
 369                 rw_init(&sv_devs[i].sv_lock, NULL, RW_DRIVER, NULL);
 370         }
 371 
 372         if (sv_debug > 0)
 373                 cmn_err(CE_CONT, "!sv: sv_init_devs successful\n");
 374 
 375         return (0);
 376 }
 377 
 378 
 379 static int
 380 sv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 381 {
 382         int rc;
 383 
 384         switch (cmd) {
 385 
 386         case DDI_ATTACH:
 387                 sv_dip = dip;
 388 
 389                 if (ddi_create_minor_node(dip, "sv", S_IFCHR,
 390                     0, DDI_PSEUDO, 0) != DDI_SUCCESS)
 391                         goto failed;
 392 
 393                 mutex_enter(&sv_mutex);
 394 
 395                 sv_mem = nsc_register_mem("SV", NSC_MEM_LOCAL, 0);
 396                 if (sv_mem == NULL) {
 397                         mutex_exit(&sv_mutex);
 398                         goto failed;
 399                 }
 400 
 401                 rc = sv_init_devs();
 402                 if (rc != 0 && rc != EAGAIN) {
 403                         mutex_exit(&sv_mutex);
 404                         goto failed;
 405                 }
 406 
 407                 mutex_exit(&sv_mutex);
 408 
 409 
 410                 ddi_report_dev(dip);
 411 
 412                 sv_threads = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 413                     DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 414                     "sv_threads", sv_threads);
 415 
 416                 if (sv_debug > 0)
 417                         cmn_err(CE_CONT, "!sv: sv_threads=%d\n", sv_threads);
 418 
 419                 if (sv_threads > sv_threads_max)
 420                         sv_threads_max = sv_threads;
 421 
 422                 return (DDI_SUCCESS);
 423 
 424         default:
 425                 return (DDI_FAILURE);
 426         }
 427 
 428 failed:
 429         DTRACE_PROBE(sv_attach_failed);
 430         (void) sv_detach(dip, DDI_DETACH);
 431         return (DDI_FAILURE);
 432 }
 433 
 434 
 435 static int
 436 sv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 437 {
 438         sv_dev_t *svp;
 439         int i;
 440 
 441         switch (cmd) {
 442 
 443         case DDI_DETACH:
 444 
 445                 /*
 446                  * Check that everything is disabled.
 447                  */
 448 
 449                 mutex_enter(&sv_mutex);
 450 
 451                 if (sv_mod_status == SV_PREVENT_UNLOAD) {
 452                         mutex_exit(&sv_mutex);
 453                         DTRACE_PROBE(sv_detach_err_prevent);
 454                         return (DDI_FAILURE);
 455                 }
 456 
 457                 for (i = 0; sv_devs && i < sv_max_devices; i++) {
 458                         svp = &sv_devs[i];
 459 
 460                         if (svp->sv_state != SV_DISABLE) {
 461                                 mutex_exit(&sv_mutex);
 462                                 DTRACE_PROBE(sv_detach_err_busy);
 463                                 return (DDI_FAILURE);
 464                         }
 465                 }
 466 
 467 
 468                 for (i = 0; sv_devs && i < sv_max_devices; i++) {
 469                         mutex_destroy(&sv_devs[i].sv_olock);
 470                         rw_destroy(&sv_devs[i].sv_lock);
 471                 }
 472 
 473                 if (sv_devs) {
 474                         nsc_kmem_free(sv_devs,
 475                             (sv_max_devices * sizeof (*sv_devs)));
 476                         sv_devs = NULL;
 477                 }
 478                 sv_max_devices = 0;
 479 
 480                 if (sv_mem) {
 481                         nsc_unregister_mem(sv_mem);
 482                         sv_mem = NULL;
 483                 }
 484 
 485                 mutex_exit(&sv_mutex);
 486 
 487                 /*
 488                  * Remove all minor nodes.
 489                  */
 490 
 491                 ddi_remove_minor_node(dip, NULL);
 492                 sv_dip = NULL;
 493 
 494                 return (DDI_SUCCESS);
 495 
 496         default:
 497                 return (DDI_FAILURE);
 498         }
 499 }
 500 
 501 static sv_maj_t *
 502 sv_getmajor(const dev_t dev)
 503 {
 504         sv_maj_t **insert, *maj;
 505         major_t umaj = getmajor(dev);
 506 
 507         /*
 508          * See if the hash table entry, or one of the hash chains
 509          * is already allocated for this major number
 510          */
 511         if ((maj = sv_majors[SV_MAJOR_HASH(umaj)]) != 0) {
 512                 do {
 513                         if (maj->sm_major == umaj)
 514                                 return (maj);
 515                 } while ((maj = maj->sm_next) != 0);
 516         }
 517 
 518         /*
 519          * If the sv_mutex is held, there is design flaw, as the only non-mutex
 520          * held callers can be sv_enable() or sv_dev_to_sv()
 521          * Return an error, instead of panicing the system
 522          */
 523         if (MUTEX_HELD(&sv_mutex)) {
 524                 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
 525                 return (NULL);
 526         }
 527 
 528         /*
 529          * Determine where to allocate a new element in the hash table
 530          */
 531         mutex_enter(&sv_mutex);
 532         insert = &(sv_majors[SV_MAJOR_HASH(umaj)]);
 533         for (maj = *insert; maj; maj = maj->sm_next) {
 534 
 535                 /* Did another thread beat us to it? */
 536                 if (maj->sm_major == umaj)
 537                         return (maj);
 538 
 539                 /* Find a NULL insert point? */
 540                 if (maj->sm_next == NULL)
 541                         insert = &maj->sm_next;
 542         }
 543 
 544         /*
 545          * Located the new insert point
 546          */
 547         *insert = nsc_kmem_zalloc(sizeof (*maj), KM_NOSLEEP, sv_mem);
 548         if ((maj = *insert) != 0)
 549                 maj->sm_major = umaj;
 550         else
 551                 cmn_err(CE_WARN, "!sv: could not allocate sv_maj_t");
 552 
 553         mutex_exit(&sv_mutex);
 554 
 555         return (maj);
 556 }
 557 
 558 /* ARGSUSED */
 559 
 560 static int
 561 sv_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 562 {
 563         int rc = DDI_FAILURE;
 564 
 565         switch (infocmd) {
 566 
 567         case DDI_INFO_DEVT2DEVINFO:
 568                 *result = sv_dip;
 569                 rc = DDI_SUCCESS;
 570                 break;
 571 
 572         case DDI_INFO_DEVT2INSTANCE:
 573                 /*
 574                  * We only have a single instance.
 575                  */
 576                 *result = 0;
 577                 rc = DDI_SUCCESS;
 578                 break;
 579 
 580         default:
 581                 break;
 582         }
 583 
 584         return (rc);
 585 }
 586 
 587 
 588 /*
 589  * Hashing of devices onto major device structures.
 590  *
 591  * Individual device structures are hashed onto one of the sm_hash[]
 592  * buckets in the relevant major device structure.
 593  *
 594  * Hash insertion and deletion -must- be done with sv_mutex held.  Hash
 595  * searching does not require the mutex because of the sm_seq member.
 596  * sm_seq is incremented on each insertion (-after- hash chain pointer
 597  * manipulation) and each deletion (-before- hash chain pointer
 598  * manipulation).  When searching the hash chain, the seq number is
 599  * checked before accessing each device structure, if the seq number has
 600  * changed, then we restart the search from the top of the hash chain.
 601  * If we restart more than SV_HASH_RETRY times, we take sv_mutex and search
 602  * the hash chain (we are guaranteed that this search cannot be
 603  * interrupted).
 604  */
 605 
 606 #define SV_HASH_RETRY   16
 607 
 608 static sv_dev_t *
 609 sv_dev_to_sv(const dev_t dev, sv_maj_t **majpp)
 610 {
 611         minor_t umin = getminor(dev);
 612         sv_dev_t **hb, *next, *svp;
 613         sv_maj_t *maj;
 614         int seq;
 615         int try;
 616 
 617         /* Get major hash table */
 618         maj = sv_getmajor(dev);
 619         if (majpp)
 620                 *majpp = maj;
 621         if (maj == NULL)
 622                 return (NULL);
 623 
 624         if (maj->sm_inuse == 0) {
 625                 DTRACE_PROBE1(
 626                     sv_dev_to_sv_end,
 627                     dev_t, dev);
 628                 return (NULL);
 629         }
 630 
 631         hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
 632         try = 0;
 633 
 634 retry:
 635         if (try > SV_HASH_RETRY)
 636                 mutex_enter(&sv_mutex);
 637 
 638         seq = maj->sm_seq;
 639         for (svp = *hb; svp; svp = next) {
 640                 next = svp->sv_hash;
 641 
 642                 nsc_membar_stld();      /* preserve register load order */
 643 
 644                 if (maj->sm_seq != seq) {
 645                         DTRACE_PROBE1(sv_dev_to_sv_retry, dev_t, dev);
 646                         try++;
 647                         goto retry;
 648                 }
 649 
 650                 if (svp->sv_dev == dev)
 651                         break;
 652         }
 653 
 654         if (try > SV_HASH_RETRY)
 655                 mutex_exit(&sv_mutex);
 656 
 657         return (svp);
 658 }
 659 
 660 
 661 /*
 662  * Must be called with sv_mutex held.
 663  */
 664 
 665 static int
 666 sv_get_state(const dev_t udev, sv_dev_t **svpp)
 667 {
 668         sv_dev_t **hb, **insert, *svp;
 669         sv_maj_t *maj;
 670         minor_t umin;
 671         int i;
 672 
 673         /* Get major hash table */
 674         if ((maj = sv_getmajor(udev)) == NULL)
 675                 return (NULL);
 676 
 677         /* Determine which minor hash table */
 678         umin = getminor(udev);
 679         hb = &(maj->sm_hash[SV_MINOR_HASH(umin)]);
 680 
 681         /* look for clash */
 682 
 683         insert = hb;
 684 
 685         for (svp = *hb; svp; svp = svp->sv_hash) {
 686                 if (svp->sv_dev == udev)
 687                         break;
 688 
 689                 if (svp->sv_hash == NULL)
 690                         insert = &svp->sv_hash;
 691         }
 692 
 693         if (svp) {
 694                 DTRACE_PROBE1(
 695                     sv_get_state_enabled,
 696                     dev_t, udev);
 697                 return (SV_EENABLED);
 698         }
 699 
 700         /* look for spare sv_devs slot */
 701 
 702         for (i = 0; i < sv_max_devices; i++) {
 703                 svp = &sv_devs[i];
 704 
 705                 if (svp->sv_state == SV_DISABLE)
 706                         break;
 707         }
 708 
 709         if (i >= sv_max_devices) {
 710                 DTRACE_PROBE1(
 711                     sv_get_state_noslots,
 712                     dev_t, udev);
 713                 return (SV_ENOSLOTS);
 714         }
 715 
 716         svp->sv_state = SV_PENDING;
 717         svp->sv_pending = curthread;
 718 
 719         *insert = svp;
 720         svp->sv_hash = NULL;
 721         maj->sm_seq++;               /* must be after the store to the hash chain */
 722 
 723         *svpp = svp;
 724 
 725         /*
 726          * We do not know the size of the underlying device at
 727          * this stage, so initialise "nblocks" property to
 728          * zero, and update it whenever we succeed in
 729          * nsc_reserve'ing the underlying nsc_fd_t.
 730          */
 731 
 732         svp->sv_nblocks = 0;
 733 
 734         return (0);
 735 }
 736 
 737 
 738 /*
 739  * Remove a device structure from it's hash chain.
 740  * Must be called with sv_mutex held.
 741  */
 742 
 743 static void
 744 sv_rm_hash(sv_dev_t *svp)
 745 {
 746         sv_dev_t **svpp;
 747         sv_maj_t *maj;
 748 
 749         /* Get major hash table */
 750         if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
 751                 return;
 752 
 753         /* remove svp from hash chain */
 754 
 755         svpp = &(maj->sm_hash[SV_MINOR_HASH(getminor(svp->sv_dev))]);
 756         while (*svpp) {
 757                 if (*svpp == svp) {
 758                         /*
 759                          * increment of sm_seq must be before the
 760                          * removal from the hash chain
 761                          */
 762                         maj->sm_seq++;
 763                         *svpp = svp->sv_hash;
 764                         break;
 765                 }
 766 
 767                 svpp = &(*svpp)->sv_hash;
 768         }
 769 
 770         svp->sv_hash = NULL;
 771 }
 772 
 773 /*
 774  * Free (disable) a device structure.
 775  * Must be called with sv_lock(RW_WRITER) and sv_mutex held, and will
 776  * perform the exits during its processing.
 777  */
 778 
 779 static int
 780 sv_free(sv_dev_t *svp, const int error)
 781 {
 782         struct cb_ops *cb_ops;
 783         sv_maj_t *maj;
 784 
 785         /* Get major hash table */
 786         if ((maj = sv_getmajor(svp->sv_dev)) == NULL)
 787                 return (NULL);
 788 
 789         svp->sv_state = SV_PENDING;
 790         svp->sv_pending = curthread;
 791 
 792         /*
 793          * Close the fd's before removing from the hash or swapping
 794          * back the cb_ops pointers so that the cache flushes before new
 795          * io can come in.
 796          */
 797 
 798         if (svp->sv_fd) {
 799                 (void) nsc_close(svp->sv_fd);
 800                 svp->sv_fd = 0;
 801         }
 802 
 803         sv_rm_hash(svp);
 804 
 805         if (error != SV_ESDOPEN &&
 806             error != SV_ELYROPEN && --maj->sm_inuse == 0) {
 807 
 808                 if (maj->sm_dev_ops)
 809                         cb_ops = maj->sm_dev_ops->devo_cb_ops;
 810                 else
 811                         cb_ops = NULL;
 812 
 813                 if (cb_ops && maj->sm_strategy != NULL) {
 814                         cb_ops->cb_strategy = maj->sm_strategy;
 815                         cb_ops->cb_close = maj->sm_close;
 816                         cb_ops->cb_ioctl = maj->sm_ioctl;
 817                         cb_ops->cb_write = maj->sm_write;
 818                         cb_ops->cb_open = maj->sm_open;
 819                         cb_ops->cb_read = maj->sm_read;
 820                         cb_ops->cb_flag = maj->sm_flag;
 821 
 822                         if (maj->sm_awrite)
 823                                 cb_ops->cb_awrite = maj->sm_awrite;
 824 
 825                         if (maj->sm_aread)
 826                                 cb_ops->cb_aread = maj->sm_aread;
 827 
 828                         /*
 829                          * corbin XXX
 830                          * Leave backing device ops in maj->sm_*
 831                          * to handle any requests that might come
 832                          * in during the disable.  This could be
 833                          * a problem however if the backing device
 834                          * driver is changed while we process these
 835                          * requests.
 836                          *
 837                          * maj->sm_strategy = 0;
 838                          * maj->sm_awrite = 0;
 839                          * maj->sm_write = 0;
 840                          * maj->sm_ioctl = 0;
 841                          * maj->sm_close = 0;
 842                          * maj->sm_aread = 0;
 843                          * maj->sm_read = 0;
 844                          * maj->sm_open = 0;
 845                          * maj->sm_flag = 0;
 846                          *
 847                          */
 848                 }
 849 
 850                 if (maj->sm_dev_ops) {
 851                         maj->sm_dev_ops = 0;
 852                 }
 853         }
 854 
 855         if (svp->sv_lh) {
 856                 cred_t *crp = ddi_get_cred();
 857 
 858                 /*
 859                  * Close the protective layered driver open using the
 860                  * Sun Private layered driver i/f.
 861                  */
 862 
 863                 (void) ldi_close(svp->sv_lh, FREAD|FWRITE, crp);
 864                 svp->sv_lh = NULL;
 865         }
 866 
 867         svp->sv_timestamp = nsc_lbolt();
 868         svp->sv_state = SV_DISABLE;
 869         svp->sv_pending = NULL;
 870         rw_exit(&svp->sv_lock);
 871         mutex_exit(&sv_mutex);
 872 
 873         return (error);
 874 }
 875 
 876 /*
 877  * Reserve the device, taking into account the possibility that
 878  * the reserve might have to be retried.
 879  */
 880 static int
 881 sv_reserve(nsc_fd_t *fd, int flags)
 882 {
 883         int eintr_count;
 884         int rc;
 885 
 886         eintr_count = 0;
 887         do {
 888                 rc = nsc_reserve(fd, flags);
 889                 if (rc == EINTR) {
 890                         ++eintr_count;
 891                         delay(2);
 892                 }
 893         } while ((rc == EINTR) && (eintr_count < MAX_EINTR_COUNT));
 894 
 895         return (rc);
 896 }
 897 
 898 static int
 899 sv_enable(const caddr_t path, const int flag,
 900     const dev_t udev, spcs_s_info_t kstatus)
 901 {
 902         struct dev_ops *dev_ops;
 903         struct cb_ops *cb_ops;
 904         sv_dev_t *svp;
 905         sv_maj_t *maj;
 906         nsc_size_t nblocks;
 907         int rc;
 908         cred_t *crp;
 909         ldi_ident_t     li;
 910 
 911         if (udev == (dev_t)-1 || udev == 0) {
 912                 DTRACE_PROBE1(
 913                     sv_enable_err_baddev,
 914                     dev_t, udev);
 915                 return (SV_EBADDEV);
 916         }
 917 
 918         if ((flag & ~(NSC_CACHE|NSC_DEVICE)) != 0) {
 919                 DTRACE_PROBE1(sv_enable_err_amode, dev_t, udev);
 920                 return (SV_EAMODE);
 921         }
 922 
 923         /* Get major hash table */
 924         if ((maj = sv_getmajor(udev)) == NULL)
 925                 return (SV_EBADDEV);
 926 
 927         mutex_enter(&sv_mutex);
 928 
 929         rc = sv_get_state(udev, &svp);
 930         if (rc) {
 931                 mutex_exit(&sv_mutex);
 932                 DTRACE_PROBE1(sv_enable_err_state, dev_t, udev);
 933                 return (rc);
 934         }
 935 
 936         rw_enter(&svp->sv_lock, RW_WRITER);
 937 
 938         /*
 939          * Get real fd used for io
 940          */
 941 
 942         svp->sv_dev = udev;
 943         svp->sv_flag = flag;
 944 
 945         /*
 946          * OR in NSC_DEVICE to ensure that nskern grabs the real strategy
 947          * function pointer before sv swaps them out.
 948          */
 949 
 950         svp->sv_fd = nsc_open(path, (svp->sv_flag | NSC_DEVICE),
 951             sv_fd_def, (blind_t)udev, &rc);
 952 
 953         if (svp->sv_fd == NULL) {
 954                 if (kstatus)
 955                         spcs_s_add(kstatus, rc);
 956                 DTRACE_PROBE1(sv_enable_err_fd, dev_t, udev);
 957                 return (sv_free(svp, SV_ESDOPEN));
 958         }
 959 
 960         /*
 961          * Perform a layered driver open using the Sun Private layered
 962          * driver i/f to ensure that the cb_ops structure for the driver
 963          * is not detached out from under us whilst sv is enabled.
 964          *
 965          */
 966 
 967         crp = ddi_get_cred();
 968         svp->sv_lh = NULL;
 969 
 970         if ((rc = ldi_ident_from_dev(svp->sv_dev, &li)) == 0) {
 971                 rc = ldi_open_by_dev(&svp->sv_dev,
 972                     OTYP_BLK, FREAD|FWRITE, crp, &svp->sv_lh, li);
 973         }
 974 
 975         if (rc != 0) {
 976                 if (kstatus)
 977                         spcs_s_add(kstatus, rc);
 978                 DTRACE_PROBE1(sv_enable_err_lyr_open, dev_t, udev);
 979                 return (sv_free(svp, SV_ELYROPEN));
 980         }
 981 
 982         /*
 983          * Do layering if required - must happen after nsc_open().
 984          */
 985 
 986         if (maj->sm_inuse++ == 0) {
 987                 maj->sm_dev_ops = nsc_get_devops(getmajor(udev));
 988 
 989                 if (maj->sm_dev_ops == NULL ||
 990                     maj->sm_dev_ops->devo_cb_ops == NULL) {
 991                         DTRACE_PROBE1(sv_enable_err_load, dev_t, udev);
 992                         return (sv_free(svp, SV_ELOAD));
 993                 }
 994 
 995                 dev_ops = maj->sm_dev_ops;
 996                 cb_ops = dev_ops->devo_cb_ops;
 997 
 998                 if (cb_ops->cb_strategy == NULL ||
 999                     cb_ops->cb_strategy == nodev ||
1000                     cb_ops->cb_strategy == nulldev) {
1001                         DTRACE_PROBE1(sv_enable_err_nostrategy, dev_t, udev);
1002                         return (sv_free(svp, SV_ELOAD));
1003                 }
1004 
1005                 if (cb_ops->cb_strategy == sv_lyr_strategy) {
1006                         DTRACE_PROBE1(sv_enable_err_svstrategy, dev_t, udev);
1007                         return (sv_free(svp, SV_ESTRATEGY));
1008                 }
1009 
1010                 maj->sm_strategy = cb_ops->cb_strategy;
1011                 maj->sm_close = cb_ops->cb_close;
1012                 maj->sm_ioctl = cb_ops->cb_ioctl;
1013                 maj->sm_write = cb_ops->cb_write;
1014                 maj->sm_open = cb_ops->cb_open;
1015                 maj->sm_read = cb_ops->cb_read;
1016                 maj->sm_flag = cb_ops->cb_flag;
1017 
1018                 cb_ops->cb_flag = cb_ops->cb_flag | D_MP;
1019                 cb_ops->cb_strategy = sv_lyr_strategy;
1020                 cb_ops->cb_close = sv_lyr_close;
1021                 cb_ops->cb_ioctl = sv_lyr_ioctl;
1022                 cb_ops->cb_write = sv_lyr_write;
1023                 cb_ops->cb_open = sv_lyr_open;
1024                 cb_ops->cb_read = sv_lyr_read;
1025 
1026                 /*
1027                  * Check that the driver has async I/O entry points
1028                  * before changing them.
1029                  */
1030 
1031                 if (dev_ops->devo_rev < 3 || cb_ops->cb_rev < 1) {
1032                         maj->sm_awrite = 0;
1033                         maj->sm_aread = 0;
1034                 } else {
1035                         maj->sm_awrite = cb_ops->cb_awrite;
1036                         maj->sm_aread = cb_ops->cb_aread;
1037 
1038                         cb_ops->cb_awrite = sv_lyr_awrite;
1039                         cb_ops->cb_aread = sv_lyr_aread;
1040                 }
1041 
1042                 /*
1043                  * Bug 4645743
1044                  *
1045                  * Prevent sv from ever unloading after it has interposed
1046                  * on a major device because there is a race between
1047                  * sv removing its layered entry points from the target
1048                  * dev_ops, a client coming in and accessing the driver,
1049                  * and the kernel modunloading the sv text.
1050                  *
1051                  * To allow unload, do svboot -u, which only happens in
1052                  * pkgrm time.
1053                  */
1054                 ASSERT(MUTEX_HELD(&sv_mutex));
1055                 sv_mod_status = SV_PREVENT_UNLOAD;
1056         }
1057 
1058 
1059         svp->sv_timestamp = nsc_lbolt();
1060         svp->sv_state = SV_ENABLE;
1061         svp->sv_pending = NULL;
1062         rw_exit(&svp->sv_lock);
1063 
1064         sv_ndevices++;
1065         mutex_exit(&sv_mutex);
1066 
1067         nblocks = 0;
1068         if (sv_reserve(svp->sv_fd, NSC_READ|NSC_MULTI|NSC_PCATCH) == 0) {
1069                 nblocks = svp->sv_nblocks;
1070                 nsc_release(svp->sv_fd);
1071         }
1072 
1073         cmn_err(CE_CONT, "!sv: rdev 0x%lx, nblocks %" NSC_SZFMT "\n",
1074             svp->sv_dev, nblocks);
1075 
1076         return (0);
1077 }
1078 
1079 
1080 static int
1081 sv_prepare_unload()
1082 {
1083         int rc = 0;
1084 
1085         mutex_enter(&sv_mutex);
1086 
1087         if (sv_mod_status == SV_PREVENT_UNLOAD) {
1088                 if ((sv_ndevices != 0) || (sv_tset != NULL)) {
1089                         rc = EBUSY;
1090                 } else {
1091                         sv_mod_status = SV_ALLOW_UNLOAD;
1092                         delay(SV_WAIT_UNLOAD * drv_usectohz(1000000));
1093                 }
1094         }
1095 
1096         mutex_exit(&sv_mutex);
1097         return (rc);
1098 }
1099 
1100 static int
1101 svattach_fd(blind_t arg)
1102 {
1103         dev_t dev = (dev_t)arg;
1104         sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1105         int rc;
1106 
1107         if (sv_debug > 0)
1108                 cmn_err(CE_CONT, "!svattach_fd(%p, %p)\n", arg, (void *)svp);
1109 
1110         if (svp == NULL) {
1111                 cmn_err(CE_WARN, "!svattach_fd: no state (arg %p)", arg);
1112                 return (0);
1113         }
1114 
1115         if ((rc = nsc_partsize(svp->sv_fd, &svp->sv_nblocks)) != 0) {
1116                 cmn_err(CE_WARN,
1117                     "!svattach_fd: nsc_partsize() failed, rc %d", rc);
1118                 svp->sv_nblocks = 0;
1119         }
1120 
1121         if ((rc = nsc_maxfbas(svp->sv_fd, 0, &svp->sv_maxfbas)) != 0) {
1122                 cmn_err(CE_WARN,
1123                     "!svattach_fd: nsc_maxfbas() failed, rc %d", rc);
1124                 svp->sv_maxfbas = 0;
1125         }
1126 
1127         if (sv_debug > 0) {
1128                 cmn_err(CE_CONT,
1129                     "!svattach_fd(%p): size %" NSC_SZFMT ", "
1130                     "maxfbas %" NSC_SZFMT "\n",
1131                     arg, svp->sv_nblocks, svp->sv_maxfbas);
1132         }
1133 
1134         return (0);
1135 }
1136 
1137 
1138 static int
1139 svdetach_fd(blind_t arg)
1140 {
1141         dev_t dev = (dev_t)arg;
1142         sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1143 
1144         if (sv_debug > 0)
1145                 cmn_err(CE_CONT, "!svdetach_fd(%p, %p)\n", arg, (void *)svp);
1146 
1147         /* svp can be NULL during disable of an sv */
1148         if (svp == NULL)
1149                 return (0);
1150 
1151         svp->sv_maxfbas = 0;
1152         svp->sv_nblocks = 0;
1153         return (0);
1154 }
1155 
1156 
1157 /*
1158  * Side effect: if called with (guard != 0), then expects both sv_mutex
1159  * and sv_lock(RW_WRITER) to be held, and will release them before returning.
1160  */
1161 
1162 /* ARGSUSED */
1163 static int
1164 sv_disable(dev_t dev, spcs_s_info_t kstatus)
1165 {
1166         sv_dev_t *svp = sv_dev_to_sv(dev, NULL);
1167 
1168         if (svp == NULL) {
1169 
1170                 DTRACE_PROBE1(sv_disable_err_nodev, sv_dev_t *, svp);
1171                 return (SV_ENODEV);
1172         }
1173 
1174         mutex_enter(&sv_mutex);
1175         rw_enter(&svp->sv_lock, RW_WRITER);
1176 
1177         if (svp->sv_fd == NULL || svp->sv_state != SV_ENABLE) {
1178                 rw_exit(&svp->sv_lock);
1179                 mutex_exit(&sv_mutex);
1180 
1181                 DTRACE_PROBE1(sv_disable_err_disabled, sv_dev_t *, svp);
1182                 return (SV_EDISABLED);
1183         }
1184 
1185 
1186         sv_ndevices--;
1187         return (sv_free(svp, 0));
1188 }
1189 
1190 
1191 
1192 static int
1193 sv_lyr_open(dev_t *devp, int flag, int otyp, cred_t *crp)
1194 {
1195         nsc_buf_t *tmph;
1196         sv_dev_t *svp;
1197         sv_maj_t *maj;
1198         int (*fn)();
1199         dev_t odev;
1200         int ret;
1201         int rc;
1202 
1203         svp = sv_dev_to_sv(*devp, &maj);
1204 
1205         if (svp) {
1206                 if (svp->sv_state == SV_PENDING &&
1207                     svp->sv_pending == curthread) {
1208                         /*
1209                          * This is a recursive open from a call to
1210                          * ddi_lyr_open_by_devt and so we just want
1211                          * to pass it straight through to the
1212                          * underlying driver.
1213                          */
1214                         DTRACE_PROBE2(sv_lyr_open_recursive,
1215                             sv_dev_t *, svp,
1216                             dev_t, *devp);
1217                         svp = NULL;
1218                 } else
1219                         rw_enter(&svp->sv_lock, RW_READER);
1220         }
1221 
1222         odev = *devp;
1223 
1224         if (maj && (fn = maj->sm_open) != 0) {
1225                 if (!(maj->sm_flag & D_MP)) {
1226                         UNSAFE_ENTER();
1227                         ret = (*fn)(devp, flag, otyp, crp);
1228                         UNSAFE_EXIT();
1229                 } else {
1230                         ret = (*fn)(devp, flag, otyp, crp);
1231                 }
1232 
1233                 if (ret == 0) {
1234                         /*
1235                          * Re-acquire svp if the driver changed *devp.
1236                          */
1237 
1238                         if (*devp != odev) {
1239                                 rw_exit(&svp->sv_lock);
1240 
1241                                 svp = sv_dev_to_sv(*devp, NULL);
1242 
1243                                 if (svp) {
1244                                         rw_enter(&svp->sv_lock, RW_READER);
1245                                 }
1246                         }
1247                 }
1248         } else {
1249                 ret = ENODEV;
1250         }
1251 
1252         if (svp && ret != 0 && svp->sv_state == SV_ENABLE) {
1253                 /*
1254                  * Underlying DDI open failed, but we have this
1255                  * device SV enabled.  If we can read some data
1256                  * from the device, fake a successful open (this
1257                  * probably means that this device is RDC'd and we
1258                  * are getting the data from the secondary node).
1259                  *
1260                  * The reserve must be done with NSC_TRY|NSC_NOWAIT to
1261                  * ensure that it does not deadlock if this open is
1262                  * coming from nskernd:get_bsize().
1263                  */
1264                 rc = sv_reserve(svp->sv_fd,
1265                     NSC_TRY | NSC_NOWAIT | NSC_MULTI | NSC_PCATCH);
1266                 if (rc == 0) {
1267                         tmph = NULL;
1268 
1269                         rc = nsc_alloc_buf(svp->sv_fd, 0, 1, NSC_READ, &tmph);
1270                         if (rc <= 0) {
1271                                 /* success */
1272                                 ret = 0;
1273                         }
1274 
1275                         if (tmph) {
1276                                 (void) nsc_free_buf(tmph);
1277                                 tmph = NULL;
1278                         }
1279 
1280                         nsc_release(svp->sv_fd);
1281 
1282                         /*
1283                          * Count the number of layered opens that we
1284                          * fake since we have to fake a matching number
1285                          * of closes (OTYP_LYR open/close calls must be
1286                          * paired).
1287                          */
1288 
1289                         if (ret == 0 && otyp == OTYP_LYR) {
1290                                 mutex_enter(&svp->sv_olock);
1291                                 svp->sv_openlcnt++;
1292                                 mutex_exit(&svp->sv_olock);
1293                         }
1294                 }
1295         }
1296 
1297         if (svp) {
1298                 rw_exit(&svp->sv_lock);
1299         }
1300 
1301         return (ret);
1302 }
1303 
1304 
1305 static int
1306 sv_lyr_close(dev_t dev, int flag, int otyp, cred_t *crp)
1307 {
1308         sv_dev_t *svp;
1309         sv_maj_t *maj;
1310         int (*fn)();
1311         int ret;
1312 
1313         svp = sv_dev_to_sv(dev, &maj);
1314 
1315         if (svp &&
1316             svp->sv_state == SV_PENDING &&
1317             svp->sv_pending == curthread) {
1318                 /*
1319                  * This is a recursive open from a call to
1320                  * ddi_lyr_close and so we just want
1321                  * to pass it straight through to the
1322                  * underlying driver.
1323                  */
1324                 DTRACE_PROBE2(sv_lyr_close_recursive, sv_dev_t *, svp,
1325                     dev_t, dev);
1326                 svp = NULL;
1327         }
1328 
1329         if (svp) {
1330                 rw_enter(&svp->sv_lock, RW_READER);
1331 
1332                 if (otyp == OTYP_LYR) {
1333                         mutex_enter(&svp->sv_olock);
1334 
1335                         if (svp->sv_openlcnt) {
1336                                 /*
1337                                  * Consume sufficient layered closes to
1338                                  * account for the opens that we faked
1339                                  * whilst the device was failed.
1340                                  */
1341                                 svp->sv_openlcnt--;
1342                                 mutex_exit(&svp->sv_olock);
1343                                 rw_exit(&svp->sv_lock);
1344 
1345                                 DTRACE_PROBE1(sv_lyr_close_end, dev_t, dev);
1346 
1347                                 return (0);
1348                         }
1349 
1350                         mutex_exit(&svp->sv_olock);
1351                 }
1352         }
1353 
1354         if (maj && (fn = maj->sm_close) != 0) {
1355                 if (!(maj->sm_flag & D_MP)) {
1356                         UNSAFE_ENTER();
1357                         ret = (*fn)(dev, flag, otyp, crp);
1358                         UNSAFE_EXIT();
1359                 } else {
1360                         ret = (*fn)(dev, flag, otyp, crp);
1361                 }
1362         } else {
1363                 ret = ENODEV;
1364         }
1365 
1366         if (svp) {
1367                 rw_exit(&svp->sv_lock);
1368         }
1369 
1370         return (ret);
1371 }
1372 
1373 
1374 /*
1375  * Convert the specified dev_t into a locked and enabled sv_dev_t, or
1376  * return NULL.
1377  */
1378 static sv_dev_t *
1379 sv_find_enabled(const dev_t dev, sv_maj_t **majpp)
1380 {
1381         sv_dev_t *svp;
1382 
1383         while ((svp = sv_dev_to_sv(dev, majpp)) != NULL) {
1384                 rw_enter(&svp->sv_lock, RW_READER);
1385 
1386                 if (svp->sv_state == SV_ENABLE) {
1387                         /* locked and enabled */
1388                         break;
1389                 }
1390 
1391                 /*
1392                  * State was changed while waiting on the lock.
1393                  * Wait for a stable state.
1394                  */
1395                 rw_exit(&svp->sv_lock);
1396 
1397                 DTRACE_PROBE1(sv_find_enabled_retry, dev_t, dev);
1398 
1399                 delay(2);
1400         }
1401 
1402         return (svp);
1403 }
1404 
1405 
1406 static int
1407 sv_lyr_uio(dev_t dev, uio_t *uiop, cred_t *crp, int rw)
1408 {
1409         sv_dev_t *svp;
1410         sv_maj_t *maj;
1411         int (*fn)();
1412         int rc;
1413 
1414         svp = sv_find_enabled(dev, &maj);
1415         if (svp == NULL) {
1416                 if (maj) {
1417                         if (rw == NSC_READ)
1418                                 fn = maj->sm_read;
1419                         else
1420                                 fn = maj->sm_write;
1421 
1422                         if (fn != 0) {
1423                                 if (!(maj->sm_flag & D_MP)) {
1424                                         UNSAFE_ENTER();
1425                                         rc = (*fn)(dev, uiop, crp);
1426                                         UNSAFE_EXIT();
1427                                 } else {
1428                                         rc = (*fn)(dev, uiop, crp);
1429                                 }
1430                         }
1431 
1432                         return (rc);
1433                 } else {
1434                         return (ENODEV);
1435                 }
1436         }
1437 
1438         ASSERT(RW_READ_HELD(&svp->sv_lock));
1439 
1440         if (svp->sv_flag == 0) {
1441                 /*
1442                  * guard access mode
1443                  * - prevent user level access to the device
1444                  */
1445                 DTRACE_PROBE1(sv_lyr_uio_err_guard, uio_t *, uiop);
1446                 rc = EPERM;
1447                 goto out;
1448         }
1449 
1450         if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
1451                 DTRACE_PROBE1(sv_lyr_uio_err_rsrv, uio_t *, uiop);
1452                 goto out;
1453         }
1454 
1455         if (rw == NSC_READ)
1456                 rc = nsc_uread(svp->sv_fd, uiop, crp);
1457         else
1458                 rc = nsc_uwrite(svp->sv_fd, uiop, crp);
1459 
1460         nsc_release(svp->sv_fd);
1461 
1462 out:
1463         rw_exit(&svp->sv_lock);
1464 
1465         return (rc);
1466 }
1467 
1468 
1469 static int
1470 sv_lyr_read(dev_t dev, uio_t *uiop, cred_t *crp)
1471 {
1472         return (sv_lyr_uio(dev, uiop, crp, NSC_READ));
1473 }
1474 
1475 
1476 static int
1477 sv_lyr_write(dev_t dev, uio_t *uiop, cred_t *crp)
1478 {
1479         return (sv_lyr_uio(dev, uiop, crp, NSC_WRITE));
1480 }
1481 
1482 
1483 /* ARGSUSED */
1484 
1485 static int
1486 sv_lyr_aread(dev_t dev, struct aio_req *aio, cred_t *crp)
1487 {
1488         return (aphysio(sv_lyr_strategy,
1489             anocancel, dev, B_READ, minphys, aio));
1490 }
1491 
1492 
1493 /* ARGSUSED */
1494 
1495 static int
1496 sv_lyr_awrite(dev_t dev, struct aio_req *aio, cred_t *crp)
1497 {
1498         return (aphysio(sv_lyr_strategy,
1499             anocancel, dev, B_WRITE, minphys, aio));
1500 }
1501 
1502 
1503 /*
1504  * Set up an array containing the list of raw path names
1505  * The array for the paths is svl and the size of the array is
1506  * in size.
1507  *
1508  * If there are more layered devices than will fit in the array,
1509  * the number of extra layered devices is returned.  Otherwise
1510  * zero is return.
1511  *
1512  * Input:
1513  *      svn     : array for paths
1514  *      size    : size of the array
1515  *
1516  * Output (extra):
1517  *      zero    : All paths fit in array
1518  *      >0   : Number of defined layered devices don't fit in array
1519  */
1520 
1521 static int
1522 sv_list(void *ptr, const int size, int *extra, const int ilp32)
1523 {
1524         sv_name32_t *svn32;
1525         sv_name_t *svn;
1526         sv_dev_t *svp;
1527         int *mode, *nblocks;
1528         int i, index;
1529         char *path;
1530 
1531         *extra = 0;
1532         index = 0;
1533 
1534         if (ilp32)
1535                 svn32 = ptr;
1536         else
1537                 svn = ptr;
1538 
1539         mutex_enter(&sv_mutex);
1540         for (i = 0; i < sv_max_devices; i++) {
1541                 svp = &sv_devs[i];
1542 
1543                 rw_enter(&svp->sv_lock, RW_READER);
1544 
1545                 if (svp->sv_state != SV_ENABLE) {
1546                         rw_exit(&svp->sv_lock);
1547                         continue;
1548                 }
1549 
1550                 if ((*extra) != 0 || ptr == NULL) {
1551                         /* Another overflow entry */
1552                         rw_exit(&svp->sv_lock);
1553                         (*extra)++;
1554                         continue;
1555                 }
1556 
1557                 if (ilp32) {
1558                         nblocks = &svn32->svn_nblocks;
1559                         mode = &svn32->svn_mode;
1560                         path = svn32->svn_path;
1561 
1562                         svn32->svn_timestamp = (uint32_t)svp->sv_timestamp;
1563                         svn32++;
1564                 } else {
1565                         nblocks = &svn->svn_nblocks;
1566                         mode = &svn->svn_mode;
1567                         path = svn->svn_path;
1568 
1569                         svn->svn_timestamp = svp->sv_timestamp;
1570                         svn++;
1571                 }
1572 
1573                 (void) strcpy(path, nsc_pathname(svp->sv_fd));
1574                 *nblocks = svp->sv_nblocks;
1575                 *mode = svp->sv_flag;
1576 
1577                 if (*nblocks == 0) {
1578                         if (sv_debug > 3)
1579                                 cmn_err(CE_CONT, "!sv_list: need to reserve\n");
1580 
1581                         if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
1582                                 *nblocks = svp->sv_nblocks;
1583                                 nsc_release(svp->sv_fd);
1584                         }
1585                 }
1586 
1587                 if (++index >= size) {
1588                         /* Out of space */
1589                         (*extra)++;
1590                 }
1591 
1592                 rw_exit(&svp->sv_lock);
1593         }
1594         mutex_exit(&sv_mutex);
1595 
1596         if (index < size) {
1597                 /* NULL terminated list */
1598                 if (ilp32)
1599                         svn32->svn_path[0] = '\0';
1600                 else
1601                         svn->svn_path[0] = '\0';
1602         }
1603 
1604         return (0);
1605 }
1606 
1607 
1608 static void
1609 sv_thread_tune(int threads)
1610 {
1611         int incr = (threads > 0) ? 1 : -1;
1612         int change = 0;
1613         int nthreads;
1614 
1615         ASSERT(MUTEX_HELD(&sv_mutex));
1616 
1617         if (sv_threads_extra) {
1618                 /* keep track of any additional threads requested */
1619                 if (threads > 0) {
1620                         sv_threads_extra += threads;
1621                         return;
1622                 }
1623                 threads = -threads;
1624                 if (threads >= sv_threads_extra) {
1625                         threads -= sv_threads_extra;
1626                         sv_threads_extra = 0;
1627                         /* fall through to while loop */
1628                 } else {
1629                         sv_threads_extra -= threads;
1630                         return;
1631                 }
1632         } else if (threads > 0) {
1633                 /*
1634                  * do not increase the number of threads beyond
1635                  * sv_threads_max when doing dynamic thread tuning
1636                  */
1637                 nthreads = nst_nthread(sv_tset);
1638                 if ((nthreads + threads) > sv_threads_max) {
1639                         sv_threads_extra = nthreads + threads - sv_threads_max;
1640                         threads = sv_threads_max - nthreads;
1641                         if (threads <= 0)
1642                                 return;
1643                 }
1644         }
1645 
1646         if (threads < 0)
1647                 threads = -threads;
1648 
1649         while (threads--) {
1650                 nthreads = nst_nthread(sv_tset);
1651                 sv_threads_needed += incr;
1652 
1653                 if (sv_threads_needed >= nthreads)
1654                         change += nst_add_thread(sv_tset, sv_threads_inc);
1655                 else if ((sv_threads_needed <
1656                     (nthreads - (sv_threads_inc + sv_threads_hysteresis))) &&
1657                     ((nthreads - sv_threads_inc) >= sv_threads))
1658                         change -= nst_del_thread(sv_tset, sv_threads_inc);
1659         }
1660 
1661 #ifdef DEBUG
1662         if (change) {
1663                 cmn_err(CE_NOTE,
1664                     "!sv_thread_tune: threads needed %d, nthreads %d, "
1665                     "nthreads change %d",
1666                     sv_threads_needed, nst_nthread(sv_tset), change);
1667         }
1668 #endif
1669 }
1670 
1671 
1672 /* ARGSUSED */
1673 static int
1674 svopen(dev_t *devp, int flag, int otyp, cred_t *crp)
1675 {
1676         int rc;
1677 
1678         mutex_enter(&sv_mutex);
1679         rc = sv_init_devs();
1680         mutex_exit(&sv_mutex);
1681 
1682         return (rc);
1683 }
1684 
1685 
1686 /* ARGSUSED */
1687 static int
1688 svclose(dev_t dev, int flag, int otyp, cred_t *crp)
1689 {
1690         const int secs = HZ * 5;
1691         const int ticks = HZ / 10;
1692         int loops = secs / ticks;
1693 
1694         mutex_enter(&sv_mutex);
1695         while (sv_ndevices <= 0 && sv_tset != NULL && loops > 0) {
1696                 if (nst_nlive(sv_tset) <= 0) {
1697                         nst_destroy(sv_tset);
1698                         sv_tset = NULL;
1699                         break;
1700                 }
1701 
1702                 /* threads still active - wait for them to exit */
1703                 mutex_exit(&sv_mutex);
1704                 delay(ticks);
1705                 loops--;
1706                 mutex_enter(&sv_mutex);
1707         }
1708         mutex_exit(&sv_mutex);
1709 
1710         if (loops <= 0) {
1711                 cmn_err(CE_WARN,
1712 #ifndef DEBUG
1713                     /* do not write to console when non-DEBUG */
1714                     "!"
1715 #endif
1716                     "sv:svclose: threads still active "
1717                     "after %d sec - leaking thread set", secs);
1718         }
1719 
1720         return (0);
1721 }
1722 
1723 
1724 static int
1725 svioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *crp, int *rvalp)
1726 {
1727         char itmp1[12], itmp2[12]; /* temp char array for editing ints */
1728         spcs_s_info_t kstatus;  /* Kernel version of spcs status */
1729         spcs_s_info_t ustatus;  /* Address of user version of spcs status */
1730         sv_list32_t svl32;      /* 32 bit Initial structure for SVIOC_LIST */
1731         sv_version_t svv;       /* Version structure */
1732         sv_conf_t svc;          /* User config structure */
1733         sv_list_t svl;          /* Initial structure for SVIOC_LIST */
1734         void *usvn;             /* Address of user sv_name_t */
1735         void *svn = NULL;       /* Array for SVIOC_LIST */
1736         uint64_t phash;         /* pathname hash */
1737         int rc = 0;             /* Return code -- errno */
1738         int size;               /* Number of items in array */
1739         int bytes;              /* Byte size of array */
1740         int ilp32;              /* Convert data structures for ilp32 userland */
1741 
1742         *rvalp = 0;
1743 
1744         /*
1745          * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
1746          * else it means it previously was SV_PREVENT_UNLOAD, and now it's
1747          * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
1748          *
1749          * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
1750          */
1751         if (sv_mod_status == SV_ALLOW_UNLOAD) {
1752                 return (EBUSY);
1753         }
1754 
1755         if ((cmd != SVIOC_LIST) && ((rc = drv_priv(crp)) != 0))
1756                 return (rc);
1757 
1758         kstatus = spcs_s_kcreate();
1759         if (!kstatus) {
1760                 DTRACE_PROBE1(sv_ioctl_err_kcreate, dev_t, dev);
1761                 return (ENOMEM);
1762         }
1763 
1764         ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
1765 
1766         switch (cmd) {
1767 
1768         case SVIOC_ENABLE:
1769 
1770                 if (ilp32) {
1771                         sv_conf32_t svc32;
1772 
1773                         if (ddi_copyin((void *)arg, &svc32,
1774                             sizeof (svc32), mode) < 0) {
1775                                 spcs_s_kfree(kstatus);
1776                                 return (EFAULT);
1777                         }
1778 
1779                         svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1780                         (void) strcpy(svc.svc_path, svc32.svc_path);
1781                         svc.svc_flag  = svc32.svc_flag;
1782                         svc.svc_major = svc32.svc_major;
1783                         svc.svc_minor = svc32.svc_minor;
1784                 } else {
1785                         if (ddi_copyin((void *)arg, &svc,
1786                             sizeof (svc), mode) < 0) {
1787                                 spcs_s_kfree(kstatus);
1788                                 return (EFAULT);
1789                         }
1790                 }
1791 
1792                 /* force to raw access */
1793                 svc.svc_flag = NSC_DEVICE;
1794 
1795                 if (sv_tset == NULL) {
1796                         mutex_enter(&sv_mutex);
1797 
1798                         if (sv_tset == NULL) {
1799                                 sv_tset = nst_init("sv_thr", sv_threads);
1800                         }
1801 
1802                         mutex_exit(&sv_mutex);
1803 
1804                         if (sv_tset == NULL) {
1805                                 cmn_err(CE_WARN,
1806                                     "!sv: could not allocate %d threads",
1807                                     sv_threads);
1808                         }
1809                 }
1810 
1811                 rc = sv_enable(svc.svc_path, svc.svc_flag,
1812                     makedevice(svc.svc_major, svc.svc_minor), kstatus);
1813 
1814                 if (rc == 0) {
1815                         sv_config_time = nsc_lbolt();
1816 
1817                         mutex_enter(&sv_mutex);
1818                         sv_thread_tune(sv_threads_dev);
1819                         mutex_exit(&sv_mutex);
1820                 }
1821 
1822                 DTRACE_PROBE3(sv_ioctl_end, dev_t, dev, int, *rvalp, int, rc);
1823 
1824                 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1825                 /* NOTREACHED */
1826 
1827         case SVIOC_DISABLE:
1828 
1829                 if (ilp32) {
1830                         sv_conf32_t svc32;
1831 
1832                         if (ddi_copyin((void *)arg, &svc32,
1833                             sizeof (svc32), mode) < 0) {
1834                                 spcs_s_kfree(kstatus);
1835                                 return (EFAULT);
1836                         }
1837 
1838                         svc.svc_error = (spcs_s_info_t)svc32.svc_error;
1839                         svc.svc_major = svc32.svc_major;
1840                         svc.svc_minor = svc32.svc_minor;
1841                         (void) strcpy(svc.svc_path, svc32.svc_path);
1842                         svc.svc_flag  = svc32.svc_flag;
1843                 } else {
1844                         if (ddi_copyin((void *)arg, &svc,
1845                             sizeof (svc), mode) < 0) {
1846                                 spcs_s_kfree(kstatus);
1847                                 return (EFAULT);
1848                         }
1849                 }
1850 
1851                 if (svc.svc_major == (major_t)-1 &&
1852                     svc.svc_minor == (minor_t)-1) {
1853                         sv_dev_t *svp;
1854                         int i;
1855 
1856                         /*
1857                          * User level could not find the minor device
1858                          * node, so do this the slow way by searching
1859                          * the entire sv config for a matching pathname.
1860                          */
1861 
1862                         phash = nsc_strhash(svc.svc_path);
1863 
1864                         mutex_enter(&sv_mutex);
1865 
1866                         for (i = 0; i < sv_max_devices; i++) {
1867                                 svp = &sv_devs[i];
1868 
1869                                 if (svp->sv_state == SV_DISABLE ||
1870                                     svp->sv_fd == NULL)
1871                                         continue;
1872 
1873                                 if (nsc_fdpathcmp(svp->sv_fd, phash,
1874                                     svc.svc_path) == 0) {
1875                                         svc.svc_major = getmajor(svp->sv_dev);
1876                                         svc.svc_minor = getminor(svp->sv_dev);
1877                                         break;
1878                                 }
1879                         }
1880 
1881                         mutex_exit(&sv_mutex);
1882 
1883                         if (svc.svc_major == (major_t)-1 &&
1884                             svc.svc_minor == (minor_t)-1)
1885                                 return (spcs_s_ocopyoutf(&kstatus,
1886                                     svc.svc_error, SV_ENODEV));
1887                 }
1888 
1889                 rc = sv_disable(makedevice(svc.svc_major, svc.svc_minor),
1890                     kstatus);
1891 
1892                 if (rc == 0) {
1893                         sv_config_time = nsc_lbolt();
1894 
1895                         mutex_enter(&sv_mutex);
1896                         sv_thread_tune(-sv_threads_dev);
1897                         mutex_exit(&sv_mutex);
1898                 }
1899 
1900                 DTRACE_PROBE3(sv_ioctl_2, dev_t, dev, int, *rvalp, int, rc);
1901 
1902                 return (spcs_s_ocopyoutf(&kstatus, svc.svc_error, rc));
1903                 /* NOTREACHED */
1904 
1905         case SVIOC_LIST:
1906 
1907                 if (ilp32) {
1908                         if (ddi_copyin((void *)arg, &svl32,
1909                             sizeof (svl32), mode) < 0) {
1910                                 spcs_s_kfree(kstatus);
1911                                 return (EFAULT);
1912                         }
1913 
1914                         ustatus = (spcs_s_info_t)svl32.svl_error;
1915                         size = svl32.svl_count;
1916                         usvn = (void *)(unsigned long)svl32.svl_names;
1917                 } else {
1918                         if (ddi_copyin((void *)arg, &svl,
1919                             sizeof (svl), mode) < 0) {
1920                                 spcs_s_kfree(kstatus);
1921                                 return (EFAULT);
1922                         }
1923 
1924                         ustatus = svl.svl_error;
1925                         size = svl.svl_count;
1926                         usvn = svl.svl_names;
1927                 }
1928 
1929                 /* Do some boundary checking */
1930                 if ((size < 0) || (size > sv_max_devices)) {
1931                         /* Array size is out of range */
1932                         return (spcs_s_ocopyoutf(&kstatus, ustatus,
1933                             SV_EARRBOUNDS, "0",
1934                             spcs_s_inttostring(sv_max_devices, itmp1,
1935                             sizeof (itmp1), 0),
1936                             spcs_s_inttostring(size, itmp2,
1937                             sizeof (itmp2), 0)));
1938                 }
1939 
1940                 if (ilp32)
1941                         bytes = size * sizeof (sv_name32_t);
1942                 else
1943                         bytes = size * sizeof (sv_name_t);
1944 
1945                 /* Allocate memory for the array of structures */
1946                 if (bytes != 0) {
1947                         svn = kmem_zalloc(bytes, KM_SLEEP);
1948                         if (!svn) {
1949                                 return (spcs_s_ocopyoutf(&kstatus,
1950                                     ustatus, ENOMEM));
1951                         }
1952                 }
1953 
1954                 rc = sv_list(svn, size, rvalp, ilp32);
1955                 if (rc) {
1956                         if (svn != NULL)
1957                                 kmem_free(svn, bytes);
1958                         return (spcs_s_ocopyoutf(&kstatus, ustatus, rc));
1959                 }
1960 
1961                 if (ilp32) {
1962                         svl32.svl_timestamp = (uint32_t)sv_config_time;
1963                         svl32.svl_maxdevs = (int32_t)sv_max_devices;
1964 
1965                         /* Return the list structure */
1966                         if (ddi_copyout(&svl32, (void *)arg,
1967                             sizeof (svl32), mode) < 0) {
1968                                 spcs_s_kfree(kstatus);
1969                                 if (svn != NULL)
1970                                         kmem_free(svn, bytes);
1971                                 return (EFAULT);
1972                         }
1973                 } else {
1974                         svl.svl_timestamp = sv_config_time;
1975                         svl.svl_maxdevs = sv_max_devices;
1976 
1977                         /* Return the list structure */
1978                         if (ddi_copyout(&svl, (void *)arg,
1979                             sizeof (svl), mode) < 0) {
1980                                 spcs_s_kfree(kstatus);
1981                                 if (svn != NULL)
1982                                         kmem_free(svn, bytes);
1983                                 return (EFAULT);
1984                         }
1985                 }
1986 
1987                 /* Return the array */
1988                 if (svn != NULL) {
1989                         if (ddi_copyout(svn, usvn, bytes, mode) < 0) {
1990                                 kmem_free(svn, bytes);
1991                                 spcs_s_kfree(kstatus);
1992                                 return (EFAULT);
1993                         }
1994                         kmem_free(svn, bytes);
1995                 }
1996 
1997                 DTRACE_PROBE3(sv_ioctl_3, dev_t, dev, int, *rvalp, int, 0);
1998 
1999                 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2000                 /* NOTREACHED */
2001 
2002         case SVIOC_VERSION:
2003 
2004                 if (ilp32) {
2005                         sv_version32_t svv32;
2006 
2007                         if (ddi_copyin((void *)arg, &svv32,
2008                             sizeof (svv32), mode) < 0) {
2009                                 spcs_s_kfree(kstatus);
2010                                 return (EFAULT);
2011                         }
2012 
2013                         svv32.svv_major_rev = sv_major_rev;
2014                         svv32.svv_minor_rev = sv_minor_rev;
2015                         svv32.svv_micro_rev = sv_micro_rev;
2016                         svv32.svv_baseline_rev = sv_baseline_rev;
2017 
2018                         if (ddi_copyout(&svv32, (void *)arg,
2019                             sizeof (svv32), mode) < 0) {
2020                                 spcs_s_kfree(kstatus);
2021                                 return (EFAULT);
2022                         }
2023 
2024                         ustatus = (spcs_s_info_t)svv32.svv_error;
2025                 } else {
2026                         if (ddi_copyin((void *)arg, &svv,
2027                             sizeof (svv), mode) < 0) {
2028                                 spcs_s_kfree(kstatus);
2029                                 return (EFAULT);
2030                         }
2031 
2032                         svv.svv_major_rev = sv_major_rev;
2033                         svv.svv_minor_rev = sv_minor_rev;
2034                         svv.svv_micro_rev = sv_micro_rev;
2035                         svv.svv_baseline_rev = sv_baseline_rev;
2036 
2037                         if (ddi_copyout(&svv, (void *)arg,
2038                             sizeof (svv), mode) < 0) {
2039                                 spcs_s_kfree(kstatus);
2040                                 return (EFAULT);
2041                         }
2042 
2043                         ustatus = svv.svv_error;
2044                 }
2045 
2046                 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, 0);
2047 
2048                 return (spcs_s_ocopyoutf(&kstatus, ustatus, 0));
2049                 /* NOTREACHED */
2050 
2051         case SVIOC_UNLOAD:
2052                 rc = sv_prepare_unload();
2053 
2054                 if (ddi_copyout(&rc, (void *)arg, sizeof (rc), mode) < 0) {
2055                         rc = EFAULT;
2056                 }
2057 
2058                 spcs_s_kfree(kstatus);
2059                 return (rc);
2060 
2061         default:
2062                 spcs_s_kfree(kstatus);
2063 
2064                 DTRACE_PROBE3(sv_ioctl_4, dev_t, dev, int, *rvalp, int, EINVAL);
2065 
2066                 return (EINVAL);
2067                 /* NOTREACHED */
2068         }
2069 
2070         /* NOTREACHED */
2071 }
2072 
2073 
2074 /* ARGSUSED */
2075 static int
2076 svprint(dev_t dev, char *str)
2077 {
2078         int instance = ddi_get_instance(sv_dip);
2079         cmn_err(CE_WARN, "!%s%d: %s", ddi_get_name(sv_dip), instance, str);
2080         return (0);
2081 }
2082 
2083 
2084 static void
2085 _sv_lyr_strategy(struct buf *bp)
2086 {
2087         caddr_t buf_addr;               /* pointer to linear buffer in bp */
2088         nsc_buf_t *bufh = NULL;
2089         nsc_buf_t *hndl = NULL;
2090         sv_dev_t *svp;
2091         nsc_vec_t *v;
2092         sv_maj_t *maj;
2093         nsc_size_t fba_req, fba_len;    /* FBA lengths */
2094         nsc_off_t fba_off;              /* FBA offset */
2095         size_t tocopy, nbytes;          /* byte lengths */
2096         int rw, rc;                     /* flags and return codes */
2097         int (*fn)();
2098 
2099         rc = 0;
2100 
2101         if (sv_debug > 5)
2102                 cmn_err(CE_CONT, "!_sv_lyr_strategy(%p)\n", (void *)bp);
2103 
2104         svp = sv_find_enabled(bp->b_edev, &maj);
2105         if (svp == NULL) {
2106                 if (maj && (fn = maj->sm_strategy) != 0) {
2107                         if (!(maj->sm_flag & D_MP)) {
2108                                 UNSAFE_ENTER();
2109                                 rc = (*fn)(bp);
2110                                 UNSAFE_EXIT();
2111                         } else {
2112                                 rc = (*fn)(bp);
2113                         }
2114                         return;
2115                 } else {
2116                         bioerror(bp, ENODEV);
2117                         biodone(bp);
2118                         return;
2119                 }
2120         }
2121 
2122         ASSERT(RW_READ_HELD(&svp->sv_lock));
2123 
2124         if (svp->sv_flag == 0) {
2125                 /*
2126                  * guard access mode
2127                  * - prevent user level access to the device
2128                  */
2129                 DTRACE_PROBE1(sv_lyr_strategy_err_guard, struct buf *, bp);
2130                 bioerror(bp, EPERM);
2131                 goto out;
2132         }
2133 
2134         if ((rc = sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH)) != 0) {
2135                 DTRACE_PROBE1(sv_lyr_strategy_err_rsrv, struct buf *, bp);
2136 
2137                 if (rc == EINTR)
2138                         cmn_err(CE_WARN, "!nsc_reserve() returned EINTR");
2139                 bioerror(bp, rc);
2140                 goto out;
2141         }
2142 
2143         if (bp->b_lblkno >= (diskaddr_t)svp->sv_nblocks) {
2144                 DTRACE_PROBE1(sv_lyr_strategy_eof, struct buf *, bp);
2145 
2146                 if (bp->b_flags & B_READ) {
2147                         /* return EOF, not an error */
2148                         bp->b_resid = bp->b_bcount;
2149                         bioerror(bp, 0);
2150                 } else
2151                         bioerror(bp, EINVAL);
2152 
2153                 goto done;
2154         }
2155 
2156         /*
2157          * Preallocate a handle once per call to strategy.
2158          * If this fails, then the nsc_alloc_buf() will allocate
2159          * a temporary handle per allocation/free pair.
2160          */
2161 
2162         DTRACE_PROBE1(sv_dbg_alloch_start, sv_dev_t *, svp);
2163 
2164         bufh = nsc_alloc_handle(svp->sv_fd, NULL, NULL, NULL);
2165 
2166         DTRACE_PROBE1(sv_dbg_alloch_end, sv_dev_t *, svp);
2167 
2168         if (bufh && (bufh->sb_flag & NSC_HACTIVE) != 0) {
2169                 DTRACE_PROBE1(sv_lyr_strategy_err_hactive, struct buf *, bp);
2170 
2171                 cmn_err(CE_WARN,
2172                     "!sv: allocated active handle (bufh %p, flags %x)",
2173                     (void *)bufh, bufh->sb_flag);
2174 
2175                 bioerror(bp, ENXIO);
2176                 goto done;
2177         }
2178 
2179         fba_req = FBA_LEN(bp->b_bcount);
2180         if (fba_req + bp->b_lblkno > (diskaddr_t)svp->sv_nblocks)
2181                 fba_req = (nsc_size_t)(svp->sv_nblocks - bp->b_lblkno);
2182 
2183         rw = (bp->b_flags & B_READ) ? NSC_READ : NSC_WRITE;
2184 
2185         bp_mapin(bp);
2186 
2187         bp->b_resid = bp->b_bcount;
2188         buf_addr = bp->b_un.b_addr;
2189         fba_off = 0;
2190 
2191         /*
2192          * fba_req  - requested size of transfer in FBAs after
2193          *              truncation to device extent, and allowing for
2194          *              possible non-FBA bounded final chunk.
2195          * fba_off  - offset of start of chunk from start of bp in FBAs.
2196          * fba_len  - size of this chunk in FBAs.
2197          */
2198 
2199 loop:
2200         fba_len = min(fba_req, svp->sv_maxfbas);
2201         hndl = bufh;
2202 
2203         DTRACE_PROBE4(sv_dbg_allocb_start,
2204             sv_dev_t *, svp,
2205             uint64_t, (uint64_t)(bp->b_lblkno + fba_off),
2206             uint64_t, (uint64_t)fba_len,
2207             int, rw);
2208 
2209         rc = nsc_alloc_buf(svp->sv_fd, (nsc_off_t)(bp->b_lblkno + fba_off),
2210             fba_len, rw, &hndl);
2211 
2212         DTRACE_PROBE1(sv_dbg_allocb_end, sv_dev_t *, svp);
2213 
2214         if (rc > 0) {
2215                 DTRACE_PROBE1(sv_lyr_strategy_err_alloc, struct buf *, bp);
2216                 bioerror(bp, rc);
2217                 if (hndl != bufh)
2218                         (void) nsc_free_buf(hndl);
2219                 hndl = NULL;
2220                 goto done;
2221         }
2222 
2223         tocopy = min(FBA_SIZE(fba_len), bp->b_resid);
2224         v = hndl->sb_vec;
2225 
2226         if (rw == NSC_WRITE && FBA_OFF(tocopy) != 0) {
2227                 /*
2228                  * Not overwriting all of the last FBA, so read in the
2229                  * old contents now before we overwrite it with the new
2230                  * data.
2231                  */
2232 
2233                 DTRACE_PROBE2(sv_dbg_read_start, sv_dev_t *, svp,
2234                     uint64_t, (uint64_t)(hndl->sb_pos + hndl->sb_len - 1));
2235 
2236                 rc = nsc_read(hndl, (hndl->sb_pos + hndl->sb_len - 1), 1, 0);
2237                 if (rc > 0) {
2238                         bioerror(bp, rc);
2239                         goto done;
2240                 }
2241 
2242                 DTRACE_PROBE1(sv_dbg_read_end, sv_dev_t *, svp);
2243         }
2244 
2245         DTRACE_PROBE1(sv_dbg_bcopy_start, sv_dev_t *, svp);
2246 
2247         while (tocopy > 0) {
2248                 nbytes = min(tocopy, (nsc_size_t)v->sv_len);
2249 
2250                 if (bp->b_flags & B_READ)
2251                         (void) bcopy(v->sv_addr, buf_addr, nbytes);
2252                 else
2253                         (void) bcopy(buf_addr, v->sv_addr, nbytes);
2254 
2255                 bp->b_resid -= nbytes;
2256                 buf_addr += nbytes;
2257                 tocopy -= nbytes;
2258                 v++;
2259         }
2260 
2261         DTRACE_PROBE1(sv_dbg_bcopy_end, sv_dev_t *, svp);
2262 
2263         if ((bp->b_flags & B_READ) == 0) {
2264                 DTRACE_PROBE3(sv_dbg_write_start, sv_dev_t *, svp,
2265                     uint64_t, (uint64_t)hndl->sb_pos,
2266                     uint64_t, (uint64_t)hndl->sb_len);
2267 
2268                 rc = nsc_write(hndl, hndl->sb_pos, hndl->sb_len, 0);
2269 
2270                 DTRACE_PROBE1(sv_dbg_write_end, sv_dev_t *, svp);
2271 
2272                 if (rc > 0) {
2273                         bioerror(bp, rc);
2274                         goto done;
2275                 }
2276         }
2277 
2278         /*
2279          * Adjust FBA offset and requested (ie. remaining) length,
2280          * loop if more data to transfer.
2281          */
2282 
2283         fba_off += fba_len;
2284         fba_req -= fba_len;
2285 
2286         if (fba_req > 0) {
2287                 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2288 
2289                 rc = nsc_free_buf(hndl);
2290 
2291                 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2292 
2293                 if (rc > 0) {
2294                         DTRACE_PROBE1(sv_lyr_strategy_err_free,
2295                             struct buf *, bp);
2296                         bioerror(bp, rc);
2297                 }
2298 
2299                 hndl = NULL;
2300 
2301                 if (rc <= 0)
2302                         goto loop;
2303         }
2304 
2305 done:
2306         if (hndl != NULL) {
2307                 DTRACE_PROBE1(sv_dbg_freeb_start, sv_dev_t *, svp);
2308 
2309                 rc = nsc_free_buf(hndl);
2310 
2311                 DTRACE_PROBE1(sv_dbg_freeb_end, sv_dev_t *, svp);
2312 
2313                 if (rc > 0) {
2314                         DTRACE_PROBE1(sv_lyr_strategy_err_free,
2315                             struct buf *, bp);
2316                         bioerror(bp, rc);
2317                 }
2318 
2319                 hndl = NULL;
2320         }
2321 
2322         if (bufh)
2323                 (void) nsc_free_handle(bufh);
2324 
2325         DTRACE_PROBE1(sv_dbg_rlse_start, sv_dev_t *, svp);
2326 
2327         nsc_release(svp->sv_fd);
2328 
2329         DTRACE_PROBE1(sv_dbg_rlse_end, sv_dev_t *, svp);
2330 
2331 out:
2332         if (sv_debug > 5) {
2333                 cmn_err(CE_CONT,
2334                     "!_sv_lyr_strategy: bp %p, bufh %p, bp->b_error %d\n",
2335                     (void *)bp, (void *)bufh, bp->b_error);
2336         }
2337 
2338         DTRACE_PROBE2(sv_lyr_strategy_end, struct buf *, bp, int, bp->b_error);
2339 
2340         rw_exit(&svp->sv_lock);
2341         biodone(bp);
2342 }
2343 
2344 
2345 static void
2346 sv_async_strategy(blind_t arg)
2347 {
2348         struct buf *bp = (struct buf *)arg;
2349         _sv_lyr_strategy(bp);
2350 }
2351 
2352 
2353 static int
2354 sv_lyr_strategy(struct buf *bp)
2355 {
2356         nsthread_t *tp;
2357         int nlive;
2358 
2359         /*
2360          * If B_ASYNC was part of the DDI we could use it as a hint to
2361          * not create a thread for synchronous i/o.
2362          */
2363         if (sv_dev_to_sv(bp->b_edev, NULL) == NULL) {
2364                 /* not sv enabled - just pass through */
2365                 DTRACE_PROBE1(sv_lyr_strategy_notsv, struct buf *, bp);
2366                 _sv_lyr_strategy(bp);
2367                 return (0);
2368         }
2369 
2370         if (sv_debug > 4) {
2371                 cmn_err(CE_CONT, "!sv_lyr_strategy: nthread %d nlive %d\n",
2372                     nst_nthread(sv_tset), nst_nlive(sv_tset));
2373         }
2374 
2375         /*
2376          * If there are only guard devices enabled there
2377          * won't be a threadset, so don't try and use it.
2378          */
2379         tp = NULL;
2380         if (sv_tset != NULL) {
2381                 tp = nst_create(sv_tset, sv_async_strategy, (blind_t)bp, 0);
2382         }
2383 
2384         if (tp == NULL) {
2385                 /*
2386                  * out of threads, so fall back to synchronous io.
2387                  */
2388                 if (sv_debug > 0) {
2389                         cmn_err(CE_CONT,
2390                             "!sv_lyr_strategy: thread alloc failed\n");
2391                 }
2392 
2393                 DTRACE_PROBE1(sv_lyr_strategy_no_thread,
2394                     struct buf *, bp);
2395 
2396                 _sv_lyr_strategy(bp);
2397                 sv_no_threads++;
2398         } else {
2399                 nlive = nst_nlive(sv_tset);
2400                 if (nlive > sv_max_nlive) {
2401                         if (sv_debug > 0) {
2402                                 cmn_err(CE_CONT,
2403                                     "!sv_lyr_strategy: "
2404                                     "new max nlive %d (nthread %d)\n",
2405                                     nlive, nst_nthread(sv_tset));
2406                         }
2407 
2408                         sv_max_nlive = nlive;
2409                 }
2410         }
2411 
2412         return (0);
2413 }
2414 
2415 
2416 #ifndef offsetof
2417 #define offsetof(s, m)  ((size_t)(&((s *)0)->m))
2418 #endif
2419 
2420 /*
2421  * re-write the size of the current partition
2422  */
2423 static int
2424 sv_fix_dkiocgvtoc(const intptr_t arg, const int mode, sv_dev_t *svp)
2425 {
2426         size_t offset;
2427         int ilp32;
2428         int pnum;
2429         int rc;
2430 
2431         ilp32 = (ddi_model_convert_from((mode & FMODELS)) == DDI_MODEL_ILP32);
2432 
2433         rc = nskern_partition(svp->sv_dev, &pnum);
2434         if (rc != 0) {
2435                 return (rc);
2436         }
2437 
2438         if (pnum < 0 || pnum >= V_NUMPAR) {
2439                 cmn_err(CE_WARN,
2440                     "!sv_gvtoc: unable to determine partition number "
2441                     "for dev %lx", svp->sv_dev);
2442                 return (EINVAL);
2443         }
2444 
2445         if (ilp32) {
2446                 int32_t p_size;
2447 
2448 #ifdef _SunOS_5_6
2449                 offset = offsetof(struct vtoc, v_part);
2450                 offset += sizeof (struct partition) * pnum;
2451                 offset += offsetof(struct partition, p_size);
2452 #else
2453                 offset = offsetof(struct vtoc32, v_part);
2454                 offset += sizeof (struct partition32) * pnum;
2455                 offset += offsetof(struct partition32, p_size);
2456 #endif
2457 
2458                 p_size = (int32_t)svp->sv_nblocks;
2459                 if (p_size == 0) {
2460                         if (sv_reserve(svp->sv_fd,
2461                             NSC_MULTI|NSC_PCATCH) == 0) {
2462                                 p_size = (int32_t)svp->sv_nblocks;
2463                                 nsc_release(svp->sv_fd);
2464                         } else {
2465                                 rc = EINTR;
2466                         }
2467                 }
2468 
2469                 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2470                     sizeof (p_size), mode) != 0) {
2471                         rc = EFAULT;
2472                 }
2473         } else {
2474                 long p_size;
2475 
2476                 offset = offsetof(struct vtoc, v_part);
2477                 offset += sizeof (struct partition) * pnum;
2478                 offset += offsetof(struct partition, p_size);
2479 
2480                 p_size = (long)svp->sv_nblocks;
2481                 if (p_size == 0) {
2482                         if (sv_reserve(svp->sv_fd,
2483                             NSC_MULTI|NSC_PCATCH) == 0) {
2484                                 p_size = (long)svp->sv_nblocks;
2485                                 nsc_release(svp->sv_fd);
2486                         } else {
2487                                 rc = EINTR;
2488                         }
2489                 }
2490 
2491                 if ((rc == 0) && ddi_copyout(&p_size, (void *)(arg + offset),
2492                     sizeof (p_size), mode) != 0) {
2493                         rc = EFAULT;
2494                 }
2495         }
2496 
2497         return (rc);
2498 }
2499 
2500 
2501 #ifdef DKIOCPARTITION
2502 /*
2503  * re-write the size of the current partition
2504  *
2505  * arg is dk_efi_t.
2506  *
2507  * dk_efi_t->dki_data = (void *)(uintptr_t)efi.dki_data_64;
2508  *
2509  * dk_efi_t->dki_data --> efi_gpt_t (label header)
2510  * dk_efi_t->dki_data + 1 --> efi_gpe_t[] (array of partitions)
2511  *
2512  * efi_gpt_t->efi_gpt_PartitionEntryArrayCRC32 --> CRC32 of array of parts
2513  * efi_gpt_t->efi_gpt_HeaderCRC32 --> CRC32 of header itself
2514  *
2515  * This assumes that sizeof (efi_gpt_t) is the same as the size of a
2516  * logical block on the disk.
2517  *
2518  * Everything is little endian (i.e. disk format).
2519  */
2520 static int
2521 sv_fix_dkiocgetefi(const intptr_t arg, const int mode, sv_dev_t *svp)
2522 {
2523         dk_efi_t efi;
2524         efi_gpt_t gpt;
2525         efi_gpe_t *gpe = NULL;
2526         size_t sgpe;
2527         uint64_t p_size;        /* virtual partition size from nsctl */
2528         uint32_t crc;
2529         int unparts;            /* number of parts in user's array */
2530         int pnum;
2531         int rc;
2532 
2533         rc = nskern_partition(svp->sv_dev, &pnum);
2534         if (rc != 0) {
2535                 return (rc);
2536         }
2537 
2538         if (pnum < 0) {
2539                 cmn_err(CE_WARN,
2540                     "!sv_efi: unable to determine partition number for dev %lx",
2541                     svp->sv_dev);
2542                 return (EINVAL);
2543         }
2544 
2545         if (ddi_copyin((void *)arg, &efi, sizeof (efi), mode)) {
2546                 return (EFAULT);
2547         }
2548 
2549         efi.dki_data = (void *)(uintptr_t)efi.dki_data_64;
2550 
2551         if (efi.dki_length < sizeof (gpt) + sizeof (gpe)) {
2552                 return (EINVAL);
2553         }
2554 
2555         if (ddi_copyin((void *)efi.dki_data, &gpt, sizeof (gpt), mode)) {
2556                 rc = EFAULT;
2557                 goto out;
2558         }
2559 
2560         if ((unparts = LE_32(gpt.efi_gpt_NumberOfPartitionEntries)) == 0)
2561                 unparts = 1;
2562         else if (pnum >= unparts) {
2563                 cmn_err(CE_WARN,
2564                     "!sv_efi: partition# beyond end of user array (%d >= %d)",
2565                     pnum, unparts);
2566                 return (EINVAL);
2567         }
2568 
2569         sgpe = sizeof (*gpe) * unparts;
2570         gpe = kmem_alloc(sgpe, KM_SLEEP);
2571 
2572         if (ddi_copyin((void *)(efi.dki_data + 1), gpe, sgpe, mode)) {
2573                 rc = EFAULT;
2574                 goto out;
2575         }
2576 
2577         p_size = svp->sv_nblocks;
2578         if (p_size == 0) {
2579                 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2580                         p_size = (diskaddr_t)svp->sv_nblocks;
2581                         nsc_release(svp->sv_fd);
2582                 } else {
2583                         rc = EINTR;
2584                 }
2585         }
2586 
2587         gpe[pnum].efi_gpe_EndingLBA = LE_64(
2588             LE_64(gpe[pnum].efi_gpe_StartingLBA) + p_size - 1);
2589 
2590         gpt.efi_gpt_PartitionEntryArrayCRC32 = 0;
2591         CRC32(crc, gpe, sgpe, -1U, sv_crc32_table);
2592         gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
2593 
2594         gpt.efi_gpt_HeaderCRC32 = 0;
2595         CRC32(crc, &gpt, sizeof (gpt), -1U, sv_crc32_table);
2596         gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
2597 
2598         if ((rc == 0) && ddi_copyout(&gpt, efi.dki_data, sizeof (gpt), mode)) {
2599                 rc = EFAULT;
2600                 goto out;
2601         }
2602 
2603         if ((rc == 0) && ddi_copyout(gpe, efi.dki_data + 1, sgpe, mode)) {
2604                 rc = EFAULT;
2605                 goto out;
2606         }
2607 
2608 out:
2609         if (gpe) {
2610                 kmem_free(gpe, sgpe);
2611         }
2612 
2613         return (rc);
2614 }
2615 
2616 
2617 /*
2618  * Re-write the size of the partition specified by p_partno
2619  *
2620  * Note that if a DKIOCPARTITION is issued to an fd opened against a
2621  * non-sv'd device, but p_partno requests the size for a different
2622  * device that is sv'd, this function will *not* be called as sv is
2623  * not interposed on the original device (the fd).
2624  *
2625  * It would not be easy to change this as we cannot get the partition
2626  * number for the non-sv'd device, so cannot compute the dev_t of the
2627  * (sv'd) p_partno device, and so cannot find out if it is sv'd or get
2628  * its size from nsctl.
2629  *
2630  * See also the "Bug 4755783" comment in sv_lyr_ioctl().
2631  */
2632 static int
2633 sv_fix_dkiocpartition(const intptr_t arg, const int mode, sv_dev_t *svp)
2634 {
2635         struct partition64 p64;
2636         sv_dev_t *nsvp = NULL;
2637         diskaddr_t p_size;
2638         minor_t nminor;
2639         int pnum, rc;
2640         dev_t ndev;
2641 
2642         rc = nskern_partition(svp->sv_dev, &pnum);
2643         if (rc != 0) {
2644                 return (rc);
2645         }
2646 
2647         if (ddi_copyin((void *)arg, &p64, sizeof (p64), mode)) {
2648                 return (EFAULT);
2649         }
2650 
2651         if (p64.p_partno != pnum) {
2652                 /* switch to requested partition, not the current one */
2653                 nminor = getminor(svp->sv_dev) + (p64.p_partno - pnum);
2654                 ndev = makedevice(getmajor(svp->sv_dev), nminor);
2655                 nsvp = sv_find_enabled(ndev, NULL);
2656                 if (nsvp == NULL) {
2657                         /* not sv device - just return */
2658                         return (0);
2659                 }
2660 
2661                 svp = nsvp;
2662         }
2663 
2664         p_size = svp->sv_nblocks;
2665         if (p_size == 0) {
2666                 if (sv_reserve(svp->sv_fd, NSC_MULTI|NSC_PCATCH) == 0) {
2667                         p_size = (diskaddr_t)svp->sv_nblocks;
2668                         nsc_release(svp->sv_fd);
2669                 } else {
2670                         rc = EINTR;
2671                 }
2672         }
2673 
2674         if (nsvp != NULL) {
2675                 rw_exit(&nsvp->sv_lock);
2676         }
2677 
2678         if ((rc == 0) && ddi_copyout(&p_size,
2679             (void *)(arg + offsetof(struct partition64, p_size)),
2680             sizeof (p_size), mode) != 0) {
2681                 return (EFAULT);
2682         }
2683 
2684         return (rc);
2685 }
2686 #endif /* DKIOCPARTITION */
2687 
2688 
2689 static int
2690 sv_lyr_ioctl(const dev_t dev, const int cmd, const intptr_t arg,
2691     const int mode, cred_t *crp, int *rvalp)
2692 {
2693         sv_dev_t *svp;
2694         sv_maj_t *maj;
2695         int (*fn)();
2696         int rc = 0;
2697 
2698         maj = 0;
2699         fn = 0;
2700 
2701         /*
2702          * If sv_mod_status is 0 or SV_PREVENT_UNLOAD, then it will continue.
2703          * else it means it previously was SV_PREVENT_UNLOAD, and now it's
2704          * SV_ALLOW_UNLOAD, expecting the driver to eventually unload.
2705          *
2706          * SV_ALLOW_UNLOAD is final state, so no need to grab sv_mutex.
2707          */
2708         if (sv_mod_status == SV_ALLOW_UNLOAD) {
2709                 return (EBUSY);
2710         }
2711 
2712         svp = sv_find_enabled(dev, &maj);
2713         if (svp != NULL) {
2714                 if (nskernd_isdaemon()) {
2715                         /*
2716                          * This is nskernd which always needs to see
2717                          * the underlying disk device accurately.
2718                          *
2719                          * So just pass the ioctl straight through
2720                          * to the underlying driver as though the device
2721                          * was not sv enabled.
2722                          */
2723                         DTRACE_PROBE2(sv_lyr_ioctl_nskernd, sv_dev_t *, svp,
2724                             dev_t, dev);
2725 
2726                         rw_exit(&svp->sv_lock);
2727                         svp = NULL;
2728                 } else {
2729                         ASSERT(RW_READ_HELD(&svp->sv_lock));
2730                 }
2731         }
2732 
2733         /*
2734          * We now have a locked and enabled SV device, or a non-SV device.
2735          */
2736 
2737         switch (cmd) {
2738                 /*
2739                  * DKIOCGVTOC, DKIOCSVTOC, DKIOCPARTITION, DKIOCGETEFI
2740                  * and DKIOCSETEFI are intercepted and faked up as some
2741                  * i/o providers emulate volumes of a different size to
2742                  * the underlying volume.
2743                  *
2744                  * Setting the size by rewriting the vtoc is not permitted.
2745                  */
2746 
2747         case DKIOCSVTOC:
2748 #ifdef DKIOCPARTITION
2749         case DKIOCSETEFI:
2750 #endif
2751                 if (svp == NULL) {
2752                         /* not intercepted -- allow ioctl through */
2753                         break;
2754                 }
2755 
2756                 rw_exit(&svp->sv_lock);
2757 
2758                 DTRACE_PROBE2(sv_lyr_ioctl_svtoc, dev_t, dev, int, EPERM);
2759 
2760                 return (EPERM);
2761 
2762         default:
2763                 break;
2764         }
2765 
2766         /*
2767          * Pass through the real ioctl command.
2768          */
2769 
2770         if (maj && (fn = maj->sm_ioctl) != 0) {
2771                 if (!(maj->sm_flag & D_MP)) {
2772                         UNSAFE_ENTER();
2773                         rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2774                         UNSAFE_EXIT();
2775                 } else {
2776                         rc = (*fn)(dev, cmd, arg, mode, crp, rvalp);
2777                 }
2778         } else {
2779                 rc = ENODEV;
2780         }
2781 
2782         /*
2783          * Bug 4755783
2784          * Fix up the size of the current partition to allow
2785          * for the virtual volume to be a different size to the
2786          * physical volume (e.g. for II compact dependent shadows).
2787          *
2788          * Note that this only attempts to fix up the current partition
2789          * - the one that the ioctl was issued against.  There could be
2790          * other sv'd partitions in the same vtoc, but we cannot tell
2791          * so we don't attempt to fix them up.
2792          */
2793 
2794         if (svp != NULL && rc == 0) {
2795                 switch (cmd) {
2796                 case DKIOCGVTOC:
2797                         rc = sv_fix_dkiocgvtoc(arg, mode, svp);
2798                         break;
2799 
2800 #ifdef DKIOCPARTITION
2801                 case DKIOCGETEFI:
2802                         rc = sv_fix_dkiocgetefi(arg, mode, svp);
2803                         break;
2804 
2805                 case DKIOCPARTITION:
2806                         rc = sv_fix_dkiocpartition(arg, mode, svp);
2807                         break;
2808 #endif /* DKIOCPARTITION */
2809                 }
2810         }
2811 
2812         if (svp != NULL) {
2813                 rw_exit(&svp->sv_lock);
2814         }
2815 
2816         return (rc);
2817 }