1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2012 Milan Jurik. All rights reserved.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 
  29 /*
  30  * Overview of the RSM Kernel Agent:
  31  * ---------------------------------
  32  *
  33  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
  34  * kernel agent is a pseudo device driver which makes use of the RSMPI
  35  * interface on behalf of the RSMAPI user library.
  36  *
  37  * The kernel agent functionality can be categorized into the following
  38  * components:
  39  * 1. Driver Infrastructure
  40  * 2. Export/Import Segment Management
  41  * 3. Internal resource allocation/deallocation
  42  *
  43  * The driver infrastructure includes the basic module loading entry points
  44  * like _init, _info, _fini to load, unload and report information about
  45  * the driver module. The driver infrastructure also includes the
  46  * autoconfiguration entry points namely, attach, detach and getinfo for
  47  * the device autoconfiguration.
  48  *
  49  * The kernel agent is a pseudo character device driver and exports
  50  * a cb_ops structure which defines the driver entry points for character
  51  * device access. This includes the open and close entry points. The
  52  * other entry points provided include ioctl, devmap and segmap and chpoll.
  53  * read and write entry points are not used since the device is memory
  54  * mapped. Also ddi_prop_op is used for the prop_op entry point.
  55  *
  56  * The ioctl entry point supports a number of commands, which are used by
  57  * the RSMAPI library in order to export and import segments. These
  58  * commands include commands for binding and rebinding the physical pages
  59  * allocated to the virtual address range, publishing the export segment,
  60  * unpublishing and republishing an export segment, creating an
  61  * import segment and a virtual connection from this import segment to
  62  * an export segment, performing scatter-gather data transfer, barrier
  63  * operations.
  64  *
  65  *
  66  * Export and Import segments:
  67  * ---------------------------
  68  *
  69  * In order to create an RSM export segment a process allocates a range in its
  70  * virtual address space for the segment using standard Solaris interfaces.
  71  * The process then calls RSMAPI, which in turn makes an ioctl call to the
  72  * RSM kernel agent for an allocation of physical memory pages and for
  73  * creation of the export segment by binding these pages to the virtual
  74  * address range. These pages are locked in memory so that remote accesses
  75  * are always applied to the correct page. Then the RSM segment is published,
  76  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
  77  * is assigned to it.
  78  *
  79  * In order to import a published RSM segment, RSMAPI creates an import
  80  * segment and forms a virtual connection across the interconnect to the
  81  * export segment, via an ioctl into the kernel agent with the connect
  82  * command. The import segment setup is completed by mapping the
  83  * local device memory into the importers virtual address space. The
  84  * mapping of the import segment is handled by the segmap/devmap
  85  * infrastructure described as follows.
  86  *
  87  * Segmap and Devmap interfaces:
  88  *
  89  * The RSM kernel agent allows device memory to be directly accessed by user
  90  * threads via memory mapping. In order to do so, the RSM kernel agent
  91  * supports the devmap and segmap entry points.
  92  *
  93  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
  94  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
  95  * responsible for exporting the device memory to the user applications.
  96  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
  97  * control is transfered to the devmap_setup call which calls rsm_devmap.
  98  *
  99  * rsm_devmap validates the user mapping to the device or kernel memory
 100  * and passes the information to the system for setting up the mapping. The
 101  * actual setting up of the mapping is done by devmap_devmem_setup(for
 102  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
 103  * registered for device context management via the devmap_devmem_setup
 104  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
 105  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
 106  * is created, a mapping is freed, a mapping is accessed or an existing
 107  * mapping is duplicated respectively. These callbacks allow the RSM kernel
 108  * agent to maintain state information associated with the mappings.
 109  * The state information is mainly in the form of a cookie list for the import
 110  * segment for which mapping has been done.
 111  *
 112  * Forced disconnect of import segments:
 113  *
 114  * When an exported segment is unpublished, the exporter sends a forced
 115  * disconnect message to all its importers. The importer segments are
 116  * unloaded and disconnected. This involves unloading the original
 117  * mappings and remapping to a preallocated kernel trash page. This is
 118  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
 119  * preallocated by the kernel agent during attach using ddi_umem_alloc with
 120  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
 121  * due to unloading of the original mappings.
 122  *
 123  * Additionally every segment has a mapping generation number associated
 124  * with it. This is an entry in the barrier generation page, created
 125  * during attach time. This mapping generation number for the import
 126  * segments is incremented on a force disconnect to notify the application
 127  * of the force disconnect. On this notification, the application needs
 128  * to reconnect the segment to establish a new legitimate mapping.
 129  *
 130  *
 131  * Locks used in the kernel agent:
 132  * -------------------------------
 133  *
 134  * The kernel agent uses a variety of mutexes and condition variables for
 135  * mutual exclusion of the shared data structures and for synchronization
 136  * between the various threads. Some of the locks are described as follows.
 137  *
 138  * Each resource structure, which represents either an export/import segment
 139  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
 140  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
 141  * rsmseglock_acquire and rsmseglock_release macros. An additional
 142  * lock called the rsmsi_lock is used for the shared import data structure
 143  * that is relevant for resources representing import segments. There is
 144  * also a condition variable associated with the resource called s_cv. This
 145  * is used to wait for events like the segment state change etc.
 146  *
 147  * The resource structures are allocated from a pool of resource structures,
 148  * called rsm_resource. This pool is protected via a reader-writer lock,
 149  * called rsmrc_lock.
 150  *
 151  * There are two separate hash tables, one for the export segments and
 152  * one for the import segments. The export segments are inserted into the
 153  * export segment hash table only after they have been published and the
 154  * import segments are inserted in the import segments list only after they
 155  * have successfully connected to an exported segment. These tables are
 156  * protected via reader-writer locks.
 157  *
 158  * Debug Support in the kernel agent:
 159  * ----------------------------------
 160  *
 161  * Debugging support in the kernel agent is provided by the following
 162  * macros.
 163  *
 164  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
 165  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
 166  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
 167  * on the definition of the category and level. All messages that belong to
 168  * the specified category(rsmdbg_category) and are of an equal or greater
 169  * severity than the specified level(rsmdbg_level) are logged. The message
 170  * is a string which uses the same formatting rules as the strings used in
 171  * printf.
 172  *
 173  * The category defines which component of the kernel agent has logged this
 174  * message. There are a number of categories that have been defined such as
 175  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
 176  * DBG_ADDCATEGORY is used to add in another category to the currently
 177  * specified category value so that the component using this new category
 178  * can also effectively log debug messages. Thus, the category of a specific
 179  * message is some combination of the available categories and we can define
 180  * sub-categories if we want a finer level of granularity.
 181  *
 182  * The level defines the severity of the message. Different level values are
 183  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
 184  * the least severe(debug level is 0).
 185  *
 186  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
 187  * variable or a string respectively.
 188  *
 189  *
 190  * NOTES:
 191  *
 192  * Special Fork and Exec Handling:
 193  * -------------------------------
 194  *
 195  * The backing physical pages of an exported segment are always locked down.
 196  * Thus, there are two cases in which a process having exported segments
 197  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
 198  * forks and invokes exit before the duped file descriptors for the export
 199  * segments are closed in the child process. The hang is caused because the
 200  * address space release algorithm in Solaris VM subsystem is based on a
 201  * non-blocking loop which does not terminate while segments are locked
 202  * down. In addition to this, Solaris VM subsystem lacks a callback
 203  * mechanism to the rsm kernel agent to allow unlocking these export
 204  * segment pages.
 205  *
 206  * In order to circumvent this problem, the kernel agent does the following.
 207  * The Solaris VM subsystem keeps memory segments in increasing order of
 208  * virtual addressses. Thus a special page(special_exit_offset) is allocated
 209  * by the kernel agent and is mmapped into the heap area of the process address
 210  * space(the mmap is done by the RSMAPI library). During the mmap processing
 211  * of this special page by the devmap infrastructure, a callback(the same
 212  * devmap context management callbacks discussed above) is registered for an
 213  * unmap.
 214  *
 215  * As discussed above, this page is processed by the Solaris address space
 216  * release code before any of the exported segments pages(which are allocated
 217  * from high memory). It is during this processing that the unmap callback gets
 218  * called and this callback is responsible for force destroying the exported
 219  * segments and thus eliminating the problem of locked pages.
 220  *
 221  * Flow-control:
 222  * ------------
 223  *
 224  * A credit based flow control algorithm is used for messages whose
 225  * processing cannot be done in the interrupt context because it might
 226  * involve invoking rsmpi calls, or might take a long time to complete
 227  * or might need to allocate resources. The algorithm operates on a per
 228  * path basis. To send a message the pathend needs to have a credit and
 229  * it consumes one for every message that is flow controlled. On the
 230  * receiving pathend the message is put on a msgbuf_queue and a task is
 231  * dispatched on the worker thread - recv_taskq where it is processed.
 232  * After processing the message, the receiving pathend dequeues the message,
 233  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
 234  * credits to the sender pathend.
 235  *
 236  * RSM_DRTEST:
 237  * -----------
 238  *
 239  * This is used to enable the DR testing using a test driver on test
 240  * platforms which do not supported DR.
 241  *
 242  */
 243 
 244 #include <sys/types.h>
 245 #include <sys/param.h>
 246 #include <sys/user.h>
 247 #include <sys/buf.h>
 248 #include <sys/systm.h>
 249 #include <sys/cred.h>
 250 #include <sys/vm.h>
 251 #include <sys/uio.h>
 252 #include <vm/seg.h>
 253 #include <vm/page.h>
 254 #include <sys/stat.h>
 255 
 256 #include <sys/time.h>
 257 #include <sys/errno.h>
 258 
 259 #include <sys/file.h>
 260 #include <sys/uio.h>
 261 #include <sys/proc.h>
 262 #include <sys/mman.h>
 263 #include <sys/open.h>
 264 #include <sys/atomic.h>
 265 #include <sys/mem_config.h>
 266 
 267 
 268 #include <sys/ddi.h>
 269 #include <sys/devops.h>
 270 #include <sys/ddidevmap.h>
 271 #include <sys/sunddi.h>
 272 #include <sys/esunddi.h>
 273 #include <sys/ddi_impldefs.h>
 274 
 275 #include <sys/kmem.h>
 276 #include <sys/conf.h>
 277 #include <sys/devops.h>
 278 #include <sys/ddi_impldefs.h>
 279 
 280 #include <sys/modctl.h>
 281 
 282 #include <sys/policy.h>
 283 #include <sys/types.h>
 284 #include <sys/conf.h>
 285 #include <sys/param.h>
 286 
 287 #include <sys/taskq.h>
 288 
 289 #include <sys/rsm/rsm_common.h>
 290 #include <sys/rsm/rsmapi_common.h>
 291 #include <sys/rsm/rsm.h>
 292 #include <rsm_in.h>
 293 #include <sys/rsm/rsmka_path_int.h>
 294 #include <sys/rsm/rsmpi.h>
 295 
 296 #include <sys/modctl.h>
 297 #include <sys/debug.h>
 298 
 299 #include <sys/tuneable.h>
 300 
 301 #ifdef  RSM_DRTEST
 302 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
 303                 void *arg);
 304 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
 305                 void *arg);
 306 #endif
 307 
 308 extern void dbg_printf(int category, int level, char *fmt, ...);
 309 extern void rsmka_pathmanager_init();
 310 extern void rsmka_pathmanager_cleanup();
 311 extern void rele_sendq_token(sendq_token_t *);
 312 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
 313 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
 314 extern int rsmka_topology_ioctl(caddr_t, int, int);
 315 
 316 extern pri_t maxclsyspri;
 317 extern work_queue_t work_queue;
 318 extern kmutex_t ipc_info_lock;
 319 extern kmutex_t ipc_info_cvlock;
 320 extern kcondvar_t ipc_info_cv;
 321 extern kmutex_t path_hold_cvlock;
 322 extern kcondvar_t path_hold_cv;
 323 
 324 extern kmutex_t rsmka_buf_lock;
 325 
 326 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
 327 extern adapter_t *rsmka_lookup_adapter(char *, int);
 328 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
 329 extern boolean_t rsmka_do_path_active(path_t *, int);
 330 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
 331 extern void rsmka_release_adapter(adapter_t *);
 332 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
 333 extern void rsmka_dequeue_msgbuf(path_t *path);
 334 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
 335 /* lint -w2 */
 336 
 337 static int rsm_open(dev_t *, int, int, cred_t *);
 338 static int rsm_close(dev_t, int, int, cred_t *);
 339 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
 340     cred_t *credp, int *rvalp);
 341 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
 342     uint_t);
 343 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
 344     uint_t, uint_t, cred_t *);
 345 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 346     struct pollhead **phpp);
 347 
 348 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 349 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
 350 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
 351 
 352 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
 353 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
 354 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
 355 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
 356                                 rsm_permission_t);
 357 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
 358 static void rsmacl_free(rsmapi_access_entry_t *, int);
 359 static void rsmpiacl_free(rsm_access_entry_t *, int);
 360 
 361 static int rsm_inc_pgcnt(pgcnt_t);
 362 static void rsm_dec_pgcnt(pgcnt_t);
 363 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
 364 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
 365                                         size_t *);
 366 static void exporter_quiesce();
 367 static void rsmseg_suspend(rsmseg_t *, int *);
 368 static void rsmsegshare_suspend(rsmseg_t *);
 369 static int rsmseg_resume(rsmseg_t *, void **);
 370 static int rsmsegshare_resume(rsmseg_t *);
 371 
 372 static struct cb_ops rsm_cb_ops = {
 373         rsm_open,               /* open */
 374         rsm_close,              /* close */
 375         nodev,                  /* strategy */
 376         nodev,                  /* print */
 377         nodev,                  /* dump */
 378         nodev,                  /* read */
 379         nodev,                  /* write */
 380         rsm_ioctl,              /* ioctl */
 381         rsm_devmap,             /* devmap */
 382         NULL,                   /* mmap */
 383         rsm_segmap,             /* segmap */
 384         rsm_chpoll,             /* poll */
 385         ddi_prop_op,            /* cb_prop_op */
 386         0,                      /* streamtab  */
 387         D_NEW|D_MP|D_DEVMAP,    /* Driver compatibility flag */
 388         0,
 389         0,
 390         0
 391 };
 392 
 393 static struct dev_ops rsm_ops = {
 394         DEVO_REV,               /* devo_rev, */
 395         0,                      /* refcnt  */
 396         rsm_info,               /* get_dev_info */
 397         nulldev,                /* identify */
 398         nulldev,                /* probe */
 399         rsm_attach,             /* attach */
 400         rsm_detach,             /* detach */
 401         nodev,                  /* reset */
 402         &rsm_cb_ops,                /* driver operations */
 403         (struct bus_ops *)0,    /* bus operations */
 404         0,
 405         ddi_quiesce_not_needed,         /* quiesce */
 406 };
 407 
 408 /*
 409  * Module linkage information for the kernel.
 410  */
 411 
 412 static struct modldrv modldrv = {
 413         &mod_driverops, /* Type of module.  This one is a pseudo driver */
 414         "Remote Shared Memory Driver",
 415         &rsm_ops,   /* driver ops */
 416 };
 417 
 418 static struct modlinkage modlinkage = {
 419         MODREV_1,
 420         (void *)&modldrv,
 421         0,
 422         0,
 423         0
 424 };
 425 
 426 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
 427 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
 428 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
 429 
 430 static kphysm_setup_vector_t rsm_dr_callback_vec = {
 431         KPHYSM_SETUP_VECTOR_VERSION,
 432         rsm_dr_callback_post_add,
 433         rsm_dr_callback_pre_del,
 434         rsm_dr_callback_post_del
 435 };
 436 
 437 /* This flag can be changed to 0 to help with PIT testing */
 438 int rsmka_modunloadok = 1;
 439 int no_reply_cnt = 0;
 440 
 441 uint64_t rsm_ctrlmsg_errcnt = 0;
 442 uint64_t rsm_ipcsend_errcnt = 0;
 443 
 444 #define MAX_NODES 64
 445 
 446 static struct rsm_driver_data rsm_drv_data;
 447 static struct rsmresource_table rsm_resource;
 448 
 449 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
 450 static void rsmresource_destroy(void);
 451 static int rsmresource_alloc(minor_t *);
 452 static rsmresource_t *rsmresource_free(minor_t rnum);
 453 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
 454 static int rsm_unpublish(rsmseg_t *seg, int mode);
 455 static int rsm_unbind(rsmseg_t *seg);
 456 static uint_t rsmhash(rsm_memseg_id_t key);
 457 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
 458 static void rsmhash_free(rsmhash_table_t *rhash, int size);
 459 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
 460 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
 461 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
 462                                         void *cookie);
 463 int rsm_disconnect(rsmseg_t *seg);
 464 void rsmseg_unload(rsmseg_t *);
 465 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
 466 
 467 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
 468     rsm_intr_q_op_t opcode, rsm_addr_t src,
 469     void *data, size_t size, rsm_intr_hand_arg_t arg);
 470 
 471 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
 472 
 473 rsm_node_id_t my_nodeid;
 474 
 475 /* cookie, va, offsets and length for the barrier */
 476 static rsm_gnum_t               *bar_va;
 477 static ddi_umem_cookie_t        bar_cookie;
 478 static off_t                    barrier_offset;
 479 static size_t                   barrier_size;
 480 static int                      max_segs;
 481 
 482 /* cookie for the trash memory */
 483 static ddi_umem_cookie_t        remap_cookie;
 484 
 485 static rsm_memseg_id_t  rsm_nextavail_segmentid;
 486 
 487 extern taskq_t *work_taskq;
 488 extern char *taskq_name;
 489 
 490 static dev_info_t *rsm_dip;     /* private copy of devinfo pointer */
 491 
 492 static rsmhash_table_t rsm_export_segs;         /* list of exported segs */
 493 rsmhash_table_t rsm_import_segs;                /* list of imported segs */
 494 static rsmhash_table_t rsm_event_queues;        /* list of event queues */
 495 
 496 static  rsm_ipc_t       rsm_ipc;                /* ipc info */
 497 
 498 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
 499 static list_head_t      rsm_suspend_list;
 500 
 501 /* list of descriptors for remote importers */
 502 static importers_table_t importer_list;
 503 
 504 kmutex_t rsm_suspend_cvlock;
 505 kcondvar_t rsm_suspend_cv;
 506 
 507 static kmutex_t rsm_lock;
 508 
 509 adapter_t loopback_adapter;
 510 rsm_controller_attr_t loopback_attr;
 511 
 512 int rsmipc_send_controlmsg(path_t *path, int msgtype);
 513 
 514 void rsmka_init_loopback();
 515 
 516 int rsmka_null_seg_create(
 517     rsm_controller_handle_t,
 518     rsm_memseg_export_handle_t *,
 519     size_t,
 520     uint_t,
 521     rsm_memory_local_t *,
 522     rsm_resource_callback_t,
 523     rsm_resource_callback_arg_t);
 524 
 525 int rsmka_null_seg_destroy(
 526     rsm_memseg_export_handle_t);
 527 
 528 int rsmka_null_bind(
 529     rsm_memseg_export_handle_t,
 530     off_t,
 531     rsm_memory_local_t *,
 532     rsm_resource_callback_t,
 533     rsm_resource_callback_arg_t);
 534 
 535 int rsmka_null_unbind(
 536     rsm_memseg_export_handle_t,
 537     off_t,
 538     size_t);
 539 
 540 int rsmka_null_rebind(
 541     rsm_memseg_export_handle_t,
 542     off_t,
 543     rsm_memory_local_t *,
 544     rsm_resource_callback_t,
 545     rsm_resource_callback_arg_t);
 546 
 547 int rsmka_null_publish(
 548     rsm_memseg_export_handle_t,
 549     rsm_access_entry_t [],
 550     uint_t,
 551     rsm_memseg_id_t,
 552     rsm_resource_callback_t,
 553     rsm_resource_callback_arg_t);
 554 
 555 
 556 int rsmka_null_republish(
 557     rsm_memseg_export_handle_t,
 558     rsm_access_entry_t [],
 559     uint_t,
 560     rsm_resource_callback_t,
 561     rsm_resource_callback_arg_t);
 562 
 563 int rsmka_null_unpublish(
 564     rsm_memseg_export_handle_t);
 565 
 566 rsm_ops_t null_rsmpi_ops;
 567 
 568 /*
 569  * data and locks to keep track of total amount of exported memory
 570  */
 571 static  pgcnt_t         rsm_pgcnt;
 572 static  pgcnt_t         rsm_pgcnt_max;  /* max allowed */
 573 static  kmutex_t        rsm_pgcnt_lock;
 574 
 575 static  int             rsm_enable_dr;
 576 
 577 static  char            loopback_str[] = "loopback";
 578 
 579 int             rsm_hash_size;
 580 
 581 /*
 582  * The locking model is as follows:
 583  *
 584  * Local operations:
 585  *              find resource - grab reader lock on resouce list
 586  *              insert rc     - grab writer lock
 587  *              delete rc     - grab writer lock and resource mutex
 588  *              read/write    - no lock
 589  *
 590  * Remote invocations:
 591  *              find resource - grab read lock and resource mutex
 592  *
 593  * State:
 594  *              resource state - grab resource mutex
 595  */
 596 
 597 int
 598 _init(void)
 599 {
 600         int e;
 601 
 602         e = mod_install(&modlinkage);
 603         if (e != 0) {
 604                 return (e);
 605         }
 606 
 607         mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
 608 
 609         mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
 610 
 611 
 612         rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
 613 
 614         rsm_hash_size = RSM_HASHSZ;
 615 
 616         rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 617 
 618         rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 619 
 620         mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
 621 
 622         mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
 623         cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
 624 
 625         mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
 626         cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
 627 
 628         mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
 629         cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
 630 
 631         rsm_ipc.count = RSMIPC_SZ;
 632         rsm_ipc.wanted = 0;
 633         rsm_ipc.sequence = 0;
 634 
 635         (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
 636 
 637         for (e = 0; e < RSMIPC_SZ; e++) {
 638                 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
 639 
 640                 RSMIPC_SET(slot, RSMIPC_FREE);
 641                 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
 642                 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
 643         }
 644 
 645         /*
 646          * Initialize the suspend message list
 647          */
 648         rsm_suspend_list.list_head = NULL;
 649         mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
 650 
 651         /*
 652          * It is assumed here that configuration data is available
 653          * during system boot since _init may be called at that time.
 654          */
 655 
 656         rsmka_pathmanager_init();
 657 
 658         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 659             "rsm: _init done\n"));
 660 
 661         return (DDI_SUCCESS);
 662 
 663 }
 664 
 665 int
 666 _info(struct modinfo *modinfop)
 667 {
 668 
 669         return (mod_info(&modlinkage, modinfop));
 670 }
 671 
 672 int
 673 _fini(void)
 674 {
 675         int e;
 676 
 677         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 678             "rsm: _fini enter\n"));
 679 
 680         /*
 681          * The rsmka_modunloadok flag is simply used to help with
 682          * the PIT testing. Make this flag 0 to disallow modunload.
 683          */
 684         if (rsmka_modunloadok == 0)
 685                 return (EBUSY);
 686 
 687         /* rsm_detach will be called as a result of mod_remove */
 688         e = mod_remove(&modlinkage);
 689         if (e) {
 690                 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
 691                     "Unable to fini RSM %x\n", e));
 692                 return (e);
 693         }
 694 
 695         rsmka_pathmanager_cleanup();
 696 
 697         rw_destroy(&rsm_resource.rsmrc_lock);
 698 
 699         rw_destroy(&rsm_export_segs.rsmhash_rw);
 700         rw_destroy(&rsm_import_segs.rsmhash_rw);
 701         rw_destroy(&rsm_event_queues.rsmhash_rw);
 702 
 703         mutex_destroy(&importer_list.lock);
 704 
 705         mutex_destroy(&rsm_ipc.lock);
 706         cv_destroy(&rsm_ipc.cv);
 707 
 708         (void) mutex_destroy(&rsm_suspend_list.list_lock);
 709 
 710         (void) mutex_destroy(&rsm_pgcnt_lock);
 711 
 712         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
 713 
 714         return (DDI_SUCCESS);
 715 
 716 }
 717 
 718 /*ARGSUSED1*/
 719 static int
 720 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 721 {
 722         minor_t rnum;
 723         int     percent;
 724         int     ret;
 725         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 726 
 727         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
 728 
 729         switch (cmd) {
 730         case DDI_ATTACH:
 731                 break;
 732         case DDI_RESUME:
 733         default:
 734                 DBG_PRINTF((category, RSM_ERR,
 735                     "rsm:rsm_attach - cmd not supported\n"));
 736                 return (DDI_FAILURE);
 737         }
 738 
 739         if (rsm_dip != NULL) {
 740                 DBG_PRINTF((category, RSM_ERR,
 741                     "rsm:rsm_attach - supports only "
 742                     "one instance\n"));
 743                 return (DDI_FAILURE);
 744         }
 745 
 746         rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 747             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 748             "enable-dynamic-reconfiguration", 1);
 749 
 750         mutex_enter(&rsm_drv_data.drv_lock);
 751         rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
 752         mutex_exit(&rsm_drv_data.drv_lock);
 753 
 754         if (rsm_enable_dr) {
 755 #ifdef  RSM_DRTEST
 756                 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
 757                     (void *)NULL);
 758 #else
 759                 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
 760                     (void *)NULL);
 761 #endif
 762                 if (ret != 0) {
 763                         mutex_exit(&rsm_drv_data.drv_lock);
 764                         cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
 765                             "reconfiguration setup failed\n");
 766                         return (DDI_FAILURE);
 767                 }
 768         }
 769 
 770         mutex_enter(&rsm_drv_data.drv_lock);
 771         ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
 772         rsm_drv_data.drv_state = RSM_DRV_OK;
 773         cv_broadcast(&rsm_drv_data.drv_cv);
 774         mutex_exit(&rsm_drv_data.drv_lock);
 775 
 776         /*
 777          * page_list_read_lock();
 778          * xx_setup();
 779          * page_list_read_unlock();
 780          */
 781 
 782         rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 783             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 784             "segment-hashtable-size", RSM_HASHSZ);
 785         if (rsm_hash_size == 0) {
 786                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 787                     "rsm: segment-hashtable-size in rsm.conf "
 788                     "must be greater than 0, defaulting to 128\n"));
 789                 rsm_hash_size = RSM_HASHSZ;
 790         }
 791 
 792         DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
 793             rsm_hash_size));
 794 
 795         rsm_pgcnt = 0;
 796 
 797         percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 798             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 799             "max-exported-memory", 0);
 800         if (percent < 0) {
 801                 DBG_PRINTF((category, RSM_ERR,
 802                     "rsm:rsm_attach not enough memory available to "
 803                     "export, or max-exported-memory set incorrectly.\n"));
 804                 return (DDI_FAILURE);
 805         }
 806         /* 0 indicates no fixed upper limit. maxmem is the max  */
 807         /* available pageable physical mem                      */
 808         rsm_pgcnt_max = (percent*maxmem)/100;
 809 
 810         if (rsm_pgcnt_max > 0) {
 811                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 812                     "rsm: Available physical memory = %lu pages, "
 813                     "Max exportable memory = %lu pages",
 814                     maxmem, rsm_pgcnt_max));
 815         }
 816 
 817         /*
 818          * Create minor number
 819          */
 820         if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
 821                 DBG_PRINTF((category, RSM_ERR,
 822                     "rsm: rsm_attach - Unable to get "
 823                     "minor number\n"));
 824                 return (DDI_FAILURE);
 825         }
 826 
 827         ASSERT(rnum == RSM_DRIVER_MINOR);
 828 
 829         if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
 830             rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 831                 DBG_PRINTF((category, RSM_ERR,
 832                     "rsm: rsm_attach - unable to allocate "
 833                     "minor #\n"));
 834                 return (DDI_FAILURE);
 835         }
 836 
 837         rsm_dip = devi;
 838         /*
 839          * Allocate the hashtables
 840          */
 841         rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
 842         rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
 843 
 844         importer_list.bucket = (importing_token_t **)
 845             kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
 846 
 847         /*
 848          * Allocate a resource struct
 849          */
 850         {
 851                 rsmresource_t *p;
 852 
 853                 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
 854 
 855                 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
 856 
 857                 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
 858         }
 859 
 860         /*
 861          * Based on the rsm.conf property max-segments, determine the maximum
 862          * number of segments that can be exported/imported. This is then used
 863          * to determine the size for barrier failure pages.
 864          */
 865 
 866         /* First get the max number of segments from the rsm.conf file */
 867         max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 868             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 869             "max-segments", 0);
 870         if (max_segs == 0) {
 871                 /* Use default number of segments */
 872                 max_segs = RSM_MAX_NUM_SEG;
 873         }
 874 
 875         /*
 876          * Based on the max number of segments allowed, determine the barrier
 877          * page size. add 1 to max_segs since the barrier page itself uses
 878          * a slot
 879          */
 880         barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
 881             PAGESIZE);
 882 
 883         /*
 884          * allocation of the barrier failure page
 885          */
 886         bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
 887             DDI_UMEM_SLEEP, &bar_cookie);
 888 
 889         /*
 890          * Set the barrier_offset
 891          */
 892         barrier_offset = 0;
 893 
 894         /*
 895          * Allocate a trash memory and get a cookie for it. This will be used
 896          * when remapping segments during force disconnects. Allocate the
 897          * trash memory with a large size which is page aligned.
 898          */
 899         (void) ddi_umem_alloc((size_t)TRASHSIZE,
 900             DDI_UMEM_TRASH, &remap_cookie);
 901 
 902         /* initialize user segment id allocation variable */
 903         rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
 904 
 905         /*
 906          * initialize the null_rsmpi_ops vector and the loopback adapter
 907          */
 908         rsmka_init_loopback();
 909 
 910 
 911         ddi_report_dev(devi);
 912 
 913         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
 914 
 915         return (DDI_SUCCESS);
 916 }
 917 
 918 /*
 919  * The call to mod_remove in the _fine routine will cause the system
 920  * to call rsm_detach
 921  */
 922 /*ARGSUSED*/
 923 static int
 924 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 925 {
 926         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 927 
 928         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
 929 
 930         switch (cmd) {
 931         case DDI_DETACH:
 932                 break;
 933         default:
 934                 DBG_PRINTF((category, RSM_ERR,
 935                     "rsm:rsm_detach - cmd %x not supported\n",
 936                     cmd));
 937                 return (DDI_FAILURE);
 938         }
 939 
 940         mutex_enter(&rsm_drv_data.drv_lock);
 941         while (rsm_drv_data.drv_state != RSM_DRV_OK)
 942                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
 943         rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
 944         mutex_exit(&rsm_drv_data.drv_lock);
 945 
 946         /*
 947          * Unregister the DR callback functions
 948          */
 949         if (rsm_enable_dr) {
 950 #ifdef  RSM_DRTEST
 951                 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 952                     (void *)NULL);
 953 #else
 954                 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 955                     (void *)NULL);
 956 #endif
 957         }
 958 
 959         mutex_enter(&rsm_drv_data.drv_lock);
 960         ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
 961         rsm_drv_data.drv_state = RSM_DRV_NEW;
 962         mutex_exit(&rsm_drv_data.drv_lock);
 963 
 964         ASSERT(rsm_suspend_list.list_head == NULL);
 965 
 966         /*
 967          * Release all resources, seglist, controller, ...
 968          */
 969 
 970         /* remove intersend queues */
 971         /* remove registered services */
 972 
 973 
 974         ddi_remove_minor_node(dip, DRIVER_NAME);
 975         rsm_dip = NULL;
 976 
 977         /*
 978          * Free minor zero resource
 979          */
 980         {
 981                 rsmresource_t *p;
 982 
 983                 p = rsmresource_free(RSM_DRIVER_MINOR);
 984                 if (p) {
 985                         mutex_destroy(&p->rsmrc_lock);
 986                         kmem_free((void *)p, sizeof (*p));
 987                 }
 988         }
 989 
 990         /*
 991          * Free resource table
 992          */
 993 
 994         rsmresource_destroy();
 995 
 996         /*
 997          * Free the hash tables
 998          */
 999         rsmhash_free(&rsm_export_segs, rsm_hash_size);
1000         rsmhash_free(&rsm_import_segs, rsm_hash_size);
1001 
1002         kmem_free((void *)importer_list.bucket,
1003             rsm_hash_size * sizeof (importing_token_t *));
1004         importer_list.bucket = NULL;
1005 
1006 
1007         /* free barrier page */
1008         if (bar_cookie != NULL) {
1009                 ddi_umem_free(bar_cookie);
1010         }
1011         bar_va = NULL;
1012         bar_cookie = NULL;
1013 
1014         /*
1015          * Free the memory allocated for the trash
1016          */
1017         if (remap_cookie != NULL) {
1018                 ddi_umem_free(remap_cookie);
1019         }
1020         remap_cookie = NULL;
1021 
1022         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1023 
1024         return (DDI_SUCCESS);
1025 }
1026 
1027 /*ARGSUSED*/
1028 static int
1029 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1030 {
1031         register int error;
1032         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1033 
1034         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1035 
1036         switch (infocmd) {
1037         case DDI_INFO_DEVT2DEVINFO:
1038                 if (rsm_dip == NULL)
1039                         error = DDI_FAILURE;
1040                 else {
1041                         *result = (void *)rsm_dip;
1042                         error = DDI_SUCCESS;
1043                 }
1044                 break;
1045         case DDI_INFO_DEVT2INSTANCE:
1046                 *result = (void *)0;
1047                 error = DDI_SUCCESS;
1048                 break;
1049         default:
1050                 error = DDI_FAILURE;
1051         }
1052 
1053         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1054         return (error);
1055 }
1056 
1057 adapter_t *
1058 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1059 {
1060         adapter_t *adapter;
1061         char adapter_devname[MAXNAMELEN];
1062         int instance;
1063         DBG_DEFINE(category,
1064             RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1065 
1066         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1067 
1068         instance = msg->cnum;
1069 
1070         if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1071                 return (NULL);
1072         }
1073 
1074         if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1075                 return (NULL);
1076 
1077         if (strcmp(adapter_devname, "loopback") == 0)
1078                 return (&loopback_adapter);
1079 
1080         adapter = rsmka_lookup_adapter(adapter_devname, instance);
1081 
1082         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1083 
1084         return (adapter);
1085 }
1086 
1087 
1088 /*
1089  * *********************** Resource Number Management ********************
1090  * All resources are stored in a simple hash table. The table is an array
1091  * of pointers to resource blks. Each blk contains:
1092  *      base    - base number of this blk
1093  *      used    - number of used slots in this blk.
1094  *      blks    - array of pointers to resource items.
1095  * An entry in a resource blk is empty if it's NULL.
1096  *
1097  * We start with no resource array. Each time we run out of slots, we
1098  * reallocate a new larger array and copy the pointer to the new array and
1099  * a new resource blk is allocated and added to the hash table.
1100  *
1101  * The resource control block contains:
1102  *      root    - array of pointer of resource blks
1103  *      sz      - current size of array.
1104  *      len     - last valid entry in array.
1105  *
1106  * A search operation based on a resource number is as follows:
1107  *      index = rnum / RESOURCE_BLKSZ;
1108  *      ASSERT(index < resource_block.len);
1109  *      ASSERT(index < resource_block.sz);
1110  *      offset = rnum % RESOURCE_BLKSZ;
1111  *      ASSERT(offset >= resource_block.root[index]->base);
1112  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1113  *      return resource_block.root[index]->blks[offset];
1114  *
1115  * A resource blk is freed with its used count reachs zero.
1116  */
1117 static int
1118 rsmresource_alloc(minor_t *rnum)
1119 {
1120 
1121         /* search for available resource slot */
1122         int i, j, empty = -1;
1123         rsmresource_blk_t *blk;
1124 
1125         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1126             "rsmresource_alloc enter\n"));
1127 
1128         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1129 
1130         /* Try to find an empty slot */
1131         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1132                 blk = rsm_resource.rsmrc_root[i];
1133                 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1134                         /* found an empty slot in this blk */
1135                         for (j = 0; j < RSMRC_BLKSZ; j++) {
1136                                 if (blk->rsmrcblk_blks[j] == NULL) {
1137                                         *rnum = (minor_t)
1138                                             (j + (i * RSMRC_BLKSZ));
1139                                         /*
1140                                          * obey gen page limits
1141                                          */
1142                                         if (*rnum >= max_segs + 1) {
1143                                                 if (empty < 0) {
1144                                                         rw_exit(&rsm_resource.
1145                                                             rsmrc_lock);
1146                                                         DBG_PRINTF((
1147                                                             RSM_KERNEL_ALL,
1148                                                             RSM_ERR,
1149                                                             "rsmresource"
1150                                                             "_alloc failed:"
1151                                                             "not enough res"
1152                                                             "%d\n", *rnum));
1153                                         return (RSMERR_INSUFFICIENT_RESOURCES);
1154                                                 } else {
1155                                                         /* use empty slot */
1156                                                         break;
1157                                                 }
1158 
1159                                         }
1160 
1161                                         blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1162                                         blk->rsmrcblk_avail--;
1163                                         rw_exit(&rsm_resource.rsmrc_lock);
1164                                         DBG_PRINTF((RSM_KERNEL_ALL,
1165                                             RSM_DEBUG_VERBOSE,
1166                                             "rsmresource_alloc done\n"));
1167                                         return (RSM_SUCCESS);
1168                                 }
1169                         }
1170                 } else if (blk == NULL && empty < 0) {
1171                         /* remember first empty slot */
1172                         empty = i;
1173                 }
1174         }
1175 
1176         /* Couldn't find anything, allocate a new blk */
1177         /*
1178          * Do we need to reallocate the root array
1179          */
1180         if (empty < 0) {
1181                 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1182                         /*
1183                          * Allocate new array and copy current stuff into it
1184                          */
1185                         rsmresource_blk_t       **p;
1186                         uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1187                             RSMRC_BLKSZ;
1188                         /*
1189                          * Don't allocate more that max valid rnum
1190                          */
1191                         if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1192                             max_segs + 1) {
1193                                 rw_exit(&rsm_resource.rsmrc_lock);
1194                                 return (RSMERR_INSUFFICIENT_RESOURCES);
1195                         }
1196 
1197                         p = (rsmresource_blk_t **)kmem_zalloc(
1198                             newsz * sizeof (*p),
1199                             KM_SLEEP);
1200 
1201                         if (rsm_resource.rsmrc_root) {
1202                                 uint_t oldsz;
1203 
1204                                 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1205                                     (int)sizeof (*p));
1206 
1207                                 /*
1208                                  * Copy old data into new space and
1209                                  * free old stuff
1210                                  */
1211                                 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1212                                 kmem_free(rsm_resource.rsmrc_root, oldsz);
1213                         }
1214 
1215                         rsm_resource.rsmrc_root = p;
1216                         rsm_resource.rsmrc_sz = (int)newsz;
1217                 }
1218 
1219                 empty = rsm_resource.rsmrc_len;
1220                 rsm_resource.rsmrc_len++;
1221         }
1222 
1223         /*
1224          * Allocate a new blk
1225          */
1226         blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1227         ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1228         rsm_resource.rsmrc_root[empty] = blk;
1229         blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1230 
1231         /*
1232          * Allocate slot
1233          */
1234 
1235         *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1236 
1237         /*
1238          * watch out not to exceed bounds of barrier page
1239          */
1240         if (*rnum >= max_segs + 1) {
1241                 rw_exit(&rsm_resource.rsmrc_lock);
1242                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1243                     "rsmresource_alloc failed %d\n", *rnum));
1244 
1245                 return (RSMERR_INSUFFICIENT_RESOURCES);
1246         }
1247         blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1248 
1249 
1250         rw_exit(&rsm_resource.rsmrc_lock);
1251 
1252         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1253             "rsmresource_alloc done\n"));
1254 
1255         return (RSM_SUCCESS);
1256 }
1257 
1258 static rsmresource_t *
1259 rsmresource_free(minor_t rnum)
1260 {
1261 
1262         /* search for available resource slot */
1263         int i, j;
1264         rsmresource_blk_t *blk;
1265         rsmresource_t *p;
1266 
1267         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1268             "rsmresource_free enter\n"));
1269 
1270         i = (int)(rnum / RSMRC_BLKSZ);
1271         j = (int)(rnum % RSMRC_BLKSZ);
1272 
1273         if (i >= rsm_resource.rsmrc_len) {
1274                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1275                     "rsmresource_free done\n"));
1276                 return (NULL);
1277         }
1278 
1279         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1280 
1281         ASSERT(rsm_resource.rsmrc_root);
1282         ASSERT(i < rsm_resource.rsmrc_len);
1283         ASSERT(i < rsm_resource.rsmrc_sz);
1284         blk = rsm_resource.rsmrc_root[i];
1285         if (blk == NULL) {
1286                 rw_exit(&rsm_resource.rsmrc_lock);
1287                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1288                     "rsmresource_free done\n"));
1289                 return (NULL);
1290         }
1291 
1292         ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1293 
1294         p = blk->rsmrcblk_blks[j];
1295         if (p == RSMRC_RESERVED) {
1296                 p = NULL;
1297         }
1298 
1299         blk->rsmrcblk_blks[j] = NULL;
1300         blk->rsmrcblk_avail++;
1301         if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1302                 /* free this blk */
1303                 kmem_free(blk, sizeof (*blk));
1304                 rsm_resource.rsmrc_root[i] = NULL;
1305         }
1306 
1307         rw_exit(&rsm_resource.rsmrc_lock);
1308 
1309         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1310             "rsmresource_free done\n"));
1311 
1312         return (p);
1313 }
1314 
1315 static rsmresource_t *
1316 rsmresource_lookup(minor_t rnum, int lock)
1317 {
1318         int i, j;
1319         rsmresource_blk_t *blk;
1320         rsmresource_t *p;
1321 
1322         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1323             "rsmresource_lookup enter\n"));
1324 
1325         /* Find resource and lock it in READER mode */
1326         /* search for available resource slot */
1327 
1328         i = (int)(rnum / RSMRC_BLKSZ);
1329         j = (int)(rnum % RSMRC_BLKSZ);
1330 
1331         if (i >= rsm_resource.rsmrc_len) {
1332                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1333                     "rsmresource_lookup done\n"));
1334                 return (NULL);
1335         }
1336 
1337         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1338 
1339         blk = rsm_resource.rsmrc_root[i];
1340         if (blk != NULL) {
1341                 ASSERT(i < rsm_resource.rsmrc_len);
1342                 ASSERT(i < rsm_resource.rsmrc_sz);
1343 
1344                 p = blk->rsmrcblk_blks[j];
1345                 if (lock == RSM_LOCK) {
1346                         if (p != RSMRC_RESERVED) {
1347                                 mutex_enter(&p->rsmrc_lock);
1348                         } else {
1349                                 p = NULL;
1350                         }
1351                 }
1352         } else {
1353                 p = NULL;
1354         }
1355         rw_exit(&rsm_resource.rsmrc_lock);
1356 
1357         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1358             "rsmresource_lookup done\n"));
1359 
1360         return (p);
1361 }
1362 
1363 static void
1364 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1365 {
1366         /* Find resource and lock it in READER mode */
1367         /* Caller can upgrade if need be */
1368         /* search for available resource slot */
1369         int i, j;
1370         rsmresource_blk_t *blk;
1371 
1372         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1373             "rsmresource_insert enter\n"));
1374 
1375         i = (int)(rnum / RSMRC_BLKSZ);
1376         j = (int)(rnum % RSMRC_BLKSZ);
1377 
1378         p->rsmrc_type = type;
1379         p->rsmrc_num = rnum;
1380 
1381         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1382 
1383         ASSERT(rsm_resource.rsmrc_root);
1384         ASSERT(i < rsm_resource.rsmrc_len);
1385         ASSERT(i < rsm_resource.rsmrc_sz);
1386 
1387         blk = rsm_resource.rsmrc_root[i];
1388         ASSERT(blk);
1389 
1390         ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1391 
1392         blk->rsmrcblk_blks[j] = p;
1393 
1394         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1395             "rsmresource_insert done\n"));
1396 
1397         rw_exit(&rsm_resource.rsmrc_lock);
1398 }
1399 
1400 static void
1401 rsmresource_destroy()
1402 {
1403         int i, j;
1404 
1405         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1406             "rsmresource_destroy enter\n"));
1407 
1408         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1409 
1410         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1411                 rsmresource_blk_t       *blk;
1412 
1413                 blk = rsm_resource.rsmrc_root[i];
1414                 if (blk == NULL) {
1415                         continue;
1416                 }
1417                 for (j = 0; j < RSMRC_BLKSZ; j++) {
1418                         if (blk->rsmrcblk_blks[j] != NULL) {
1419                                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1420                                     "Not null slot %d, %lx\n", j,
1421                                     (size_t)blk->rsmrcblk_blks[j]));
1422                         }
1423                 }
1424                 kmem_free(blk, sizeof (*blk));
1425                 rsm_resource.rsmrc_root[i] = NULL;
1426         }
1427         if (rsm_resource.rsmrc_root) {
1428                 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1429                 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1430                 rsm_resource.rsmrc_root = NULL;
1431                 rsm_resource.rsmrc_len = 0;
1432                 rsm_resource.rsmrc_sz = 0;
1433         }
1434 
1435         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1436             "rsmresource_destroy done\n"));
1437 
1438         rw_exit(&rsm_resource.rsmrc_lock);
1439 }
1440 
1441 
1442 /* ******************** Generic Key Hash Table Management ********* */
1443 static rsmresource_t *
1444 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1445     rsm_resource_state_t state)
1446 {
1447         rsmresource_t   *p;
1448         uint_t          hashval;
1449         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1450 
1451         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1452 
1453         hashval = rsmhash(key);
1454 
1455         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1456             key, hashval));
1457 
1458         rw_enter(&rhash->rsmhash_rw, RW_READER);
1459 
1460         p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1461 
1462         for (; p; p = p->rsmrc_next) {
1463                 if (p->rsmrc_key == key) {
1464                         /* acquire resource lock */
1465                         RSMRC_LOCK(p);
1466                         break;
1467                 }
1468         }
1469 
1470         rw_exit(&rhash->rsmhash_rw);
1471 
1472         if (p != NULL && p->rsmrc_state != state) {
1473                 /* state changed, release lock and return null */
1474                 RSMRC_UNLOCK(p);
1475                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1476                     "rsmhash_lookup done: state changed\n"));
1477                 return (NULL);
1478         }
1479 
1480         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1481 
1482         return (p);
1483 }
1484 
1485 static void
1486 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1487 {
1488         rsmresource_t           *p, **back;
1489         uint_t                  hashval;
1490         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1491 
1492         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1493 
1494         hashval = rsmhash(rcelm->rsmrc_key);
1495 
1496         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1497             rcelm->rsmrc_key, hashval));
1498 
1499         /*
1500          * It's ok not to find the segment.
1501          */
1502         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1503 
1504         back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1505 
1506         for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1507                 if (p == rcelm) {
1508                         *back = rcelm->rsmrc_next;
1509                         break;
1510                 }
1511         }
1512 
1513         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1514 
1515         rw_exit(&rhash->rsmhash_rw);
1516 }
1517 
1518 static int
1519 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1520     int dup_check, rsm_resource_state_t state)
1521 {
1522         rsmresource_t   *p = NULL, **bktp;
1523         uint_t          hashval;
1524         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1525 
1526         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1527 
1528         /* lock table */
1529         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1530 
1531         /*
1532          * If the current resource state is other than the state passed in
1533          * then the resource is (probably) already on the list. eg. for an
1534          * import segment if the state is not RSM_STATE_NEW then it's on the
1535          * list already.
1536          */
1537         RSMRC_LOCK(new);
1538         if (new->rsmrc_state != state) {
1539                 RSMRC_UNLOCK(new);
1540                 rw_exit(&rhash->rsmhash_rw);
1541                 return (RSMERR_BAD_SEG_HNDL);
1542         }
1543 
1544         hashval = rsmhash(key);
1545         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1546 
1547         if (dup_check) {
1548                 /*
1549                  * Used for checking export segments; don't want to have
1550                  * the same key used for multiple segments.
1551                  */
1552 
1553                 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1554 
1555                 for (; p; p = p->rsmrc_next) {
1556                         if (p->rsmrc_key == key) {
1557                                 RSMRC_UNLOCK(new);
1558                                 break;
1559                         }
1560                 }
1561         }
1562 
1563         if (p == NULL) {
1564                 /* Key doesn't exist, add it */
1565 
1566                 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1567 
1568                 new->rsmrc_key = key;
1569                 new->rsmrc_next = *bktp;
1570                 *bktp = new;
1571         }
1572 
1573         rw_exit(&rhash->rsmhash_rw);
1574 
1575         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1576 
1577         return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1578 }
1579 
1580 /*
1581  * XOR each byte of the key.
1582  */
1583 static uint_t
1584 rsmhash(rsm_memseg_id_t key)
1585 {
1586         uint_t  hash = key;
1587 
1588         hash ^=  (key >> 8);
1589         hash ^=  (key >> 16);
1590         hash ^=  (key >> 24);
1591 
1592         return (hash % rsm_hash_size);
1593 
1594 }
1595 
1596 /*
1597  * generic function to get a specific bucket
1598  */
1599 static void *
1600 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1601 {
1602 
1603         if (rhash->bucket == NULL)
1604                 return (NULL);
1605         else
1606                 return ((void *)rhash->bucket[hashval]);
1607 }
1608 
1609 /*
1610  * generic function to get a specific bucket's address
1611  */
1612 static void **
1613 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1614 {
1615         if (rhash->bucket == NULL)
1616                 return (NULL);
1617         else
1618                 return ((void **)&(rhash->bucket[hashval]));
1619 }
1620 
1621 /*
1622  * generic function to alloc a hash table
1623  */
1624 static void
1625 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1626 {
1627         rhash->bucket = (rsmresource_t **)
1628             kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1629 }
1630 
1631 /*
1632  * generic function to free a hash table
1633  */
1634 static void
1635 rsmhash_free(rsmhash_table_t *rhash, int size)
1636 {
1637 
1638         kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1639         rhash->bucket = NULL;
1640 
1641 }
1642 /* *********************** Exported Segment Key Management ************ */
1643 
1644 #define rsmexport_add(new, key)         \
1645         rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1646             RSM_STATE_BIND)
1647 
1648 #define rsmexport_rm(arg)       \
1649         rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1650 
1651 #define rsmexport_lookup(key)   \
1652         (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1653 
1654 /* ************************** Import Segment List Management ********** */
1655 
1656 /*
1657  *  Add segment to import list. This will be useful for paging and loopback
1658  * segment unloading.
1659  */
1660 #define rsmimport_add(arg, key) \
1661         rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1662             RSM_STATE_NEW)
1663 
1664 #define rsmimport_rm(arg)       \
1665         rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1666 
1667 /*
1668  *      #define rsmimport_lookup(key)   \
1669  *      (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1670  */
1671 
1672 /*
1673  * increase the ref count and make the import segment point to the
1674  * shared data structure. Return a pointer to the share data struct
1675  * and the shared data struct is locked upon return
1676  */
1677 static rsm_import_share_t *
1678 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1679     rsmseg_t *segp)
1680 {
1681         uint_t          hash;
1682         rsmresource_t           *p;
1683         rsm_import_share_t      *shdatap;
1684         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1685 
1686         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1687 
1688         hash = rsmhash(key);
1689         /* lock table */
1690         rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1691         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1692             key, hash));
1693 
1694         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1695 
1696         for (; p; p = p->rsmrc_next) {
1697                 /*
1698                  * Look for an entry that is importing the same exporter
1699                  * with the share data structure allocated.
1700                  */
1701                 if ((p->rsmrc_key == key) &&
1702                     (p->rsmrc_node == node) &&
1703                     (p->rsmrc_adapter == adapter) &&
1704                     (((rsmseg_t *)p)->s_share != NULL)) {
1705                         shdatap = ((rsmseg_t *)p)->s_share;
1706                         break;
1707                 }
1708         }
1709 
1710         if (p == NULL) {
1711                 /* we are the first importer, create the shared data struct */
1712                 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1713                 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1714                 shdatap->rsmsi_segid = key;
1715                 shdatap->rsmsi_node = node;
1716                 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1717                 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1718         }
1719 
1720         rsmseglock_acquire(segp);
1721 
1722         /* we grab the shared lock before returning from this function */
1723         mutex_enter(&shdatap->rsmsi_lock);
1724 
1725         shdatap->rsmsi_refcnt++;
1726         segp->s_share = shdatap;
1727 
1728         rsmseglock_release(segp);
1729 
1730         rw_exit(&rsm_import_segs.rsmhash_rw);
1731 
1732         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1733 
1734         return (shdatap);
1735 }
1736 
1737 /*
1738  * the shared data structure should be locked before calling
1739  * rsmsharecv_signal().
1740  * Change the state and signal any waiting segments.
1741  */
1742 void
1743 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1744 {
1745         ASSERT(rsmsharelock_held(seg));
1746 
1747         if (seg->s_share->rsmsi_state == oldstate) {
1748                 seg->s_share->rsmsi_state = newstate;
1749                 cv_broadcast(&seg->s_share->rsmsi_cv);
1750         }
1751 }
1752 
1753 /*
1754  * Add to the hash table
1755  */
1756 static void
1757 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1758     void *cookie)
1759 {
1760 
1761         importing_token_t       *head;
1762         importing_token_t       *new_token;
1763         int                     index;
1764 
1765         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1766 
1767         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1768 
1769         new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1770         new_token->importing_node = node;
1771         new_token->key = key;
1772         new_token->import_segment_cookie = cookie;
1773         new_token->importing_adapter_hwaddr = hwaddr;
1774 
1775         index = rsmhash(key);
1776 
1777         mutex_enter(&importer_list.lock);
1778 
1779         head = importer_list.bucket[index];
1780         importer_list.bucket[index] = new_token;
1781         new_token->next = head;
1782         mutex_exit(&importer_list.lock);
1783 
1784         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1785 }
1786 
1787 static void
1788 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1789 {
1790 
1791         importing_token_t       *prev, *token = NULL;
1792         int                     index;
1793         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1794 
1795         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1796 
1797         index = rsmhash(key);
1798 
1799         mutex_enter(&importer_list.lock);
1800 
1801         token = importer_list.bucket[index];
1802 
1803         prev = token;
1804         while (token != NULL) {
1805                 if (token->importing_node == node &&
1806                     token->import_segment_cookie == cookie) {
1807                         if (prev == token)
1808                                 importer_list.bucket[index] = token->next;
1809                         else
1810                                 prev->next = token->next;
1811                         kmem_free((void *)token, sizeof (*token));
1812                         break;
1813                 } else {
1814                         prev = token;
1815                         token = token->next;
1816                 }
1817         }
1818 
1819         mutex_exit(&importer_list.lock);
1820 
1821         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1822 
1823 
1824 }
1825 
1826 /* **************************Segment Structure Management ************* */
1827 
1828 /*
1829  * Free segment structure
1830  */
1831 static void
1832 rsmseg_free(rsmseg_t *seg)
1833 {
1834 
1835         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1836 
1837         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1838 
1839         /* need to take seglock here to avoid race with rsmmap_unmap() */
1840         rsmseglock_acquire(seg);
1841         if (seg->s_ckl != NULL) {
1842                 /* Segment is still busy */
1843                 seg->s_state = RSM_STATE_END;
1844                 rsmseglock_release(seg);
1845                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1846                     "rsmseg_free done\n"));
1847                 return;
1848         }
1849 
1850         rsmseglock_release(seg);
1851 
1852         ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1853 
1854         /*
1855          * If it's an importer decrement the refcount
1856          * and if its down to zero free the shared data structure.
1857          * This is where failures during rsm_connect() are unrefcounted
1858          */
1859         if (seg->s_share != NULL) {
1860 
1861                 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1862 
1863                 rsmsharelock_acquire(seg);
1864 
1865                 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1866 
1867                 seg->s_share->rsmsi_refcnt--;
1868 
1869                 if (seg->s_share->rsmsi_refcnt == 0) {
1870                         rsmsharelock_release(seg);
1871                         mutex_destroy(&seg->s_share->rsmsi_lock);
1872                         cv_destroy(&seg->s_share->rsmsi_cv);
1873                         kmem_free((void *)(seg->s_share),
1874                             sizeof (rsm_import_share_t));
1875                 } else {
1876                         rsmsharelock_release(seg);
1877                 }
1878                 /*
1879                  * The following needs to be done after any
1880                  * rsmsharelock calls which use seg->s_share.
1881                  */
1882                 seg->s_share = NULL;
1883         }
1884 
1885         cv_destroy(&seg->s_cv);
1886         mutex_destroy(&seg->s_lock);
1887         rsmacl_free(seg->s_acl, seg->s_acl_len);
1888         rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1889         if (seg->s_adapter)
1890                 rsmka_release_adapter(seg->s_adapter);
1891 
1892         kmem_free((void *)seg, sizeof (*seg));
1893 
1894         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1895 
1896 }
1897 
1898 
1899 static rsmseg_t *
1900 rsmseg_alloc(minor_t num, struct cred *cred)
1901 {
1902         rsmseg_t        *new;
1903         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1904 
1905         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1906         /*
1907          * allocate memory for new segment. This should be a segkmem cache.
1908          */
1909         new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1910 
1911         new->s_state = RSM_STATE_NEW;
1912         new->s_minor = num;
1913         new->s_acl_len       = 0;
1914         new->s_cookie = NULL;
1915         new->s_adapter = NULL;
1916 
1917         new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1918         /* we don't have a key yet, will set at export/connect */
1919         new->s_uid  = crgetuid(cred);
1920         new->s_gid  = crgetgid(cred);
1921 
1922         mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1923         cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1924 
1925         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1926 
1927         return (new);
1928 }
1929 
1930 /* ******************************** Driver Open/Close/Poll *************** */
1931 
1932 /*ARGSUSED1*/
1933 static int
1934 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1935 {
1936         minor_t rnum;
1937         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1938 
1939         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1940         /*
1941          * Char only
1942          */
1943         if (otyp != OTYP_CHR) {
1944                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1945                 return (EINVAL);
1946         }
1947 
1948         /*
1949          * Only zero can be opened, clones are used for resources.
1950          */
1951         if (getminor(*devp) != RSM_DRIVER_MINOR) {
1952                 DBG_PRINTF((category, RSM_ERR,
1953                     "rsm_open: bad minor %d\n", getminor(*devp)));
1954                 return (ENODEV);
1955         }
1956 
1957         if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1958                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1959                 return (EPERM);
1960         }
1961 
1962         if (!(flag & FWRITE)) {
1963                 /*
1964                  * The library function _rsm_librsm_init calls open for
1965                  * /dev/rsm with flag set to O_RDONLY.  We want a valid
1966                  * file descriptor to be returned for minor device zero.
1967                  */
1968 
1969                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1970                     "rsm_open RDONLY done\n"));
1971                 return (DDI_SUCCESS);
1972         }
1973 
1974         /*
1975          * - allocate new minor number and segment.
1976          * - add segment to list of all segments.
1977          * - set minordev data to segment
1978          * - update devp argument to new device
1979          * - update s_cred to cred; make sure you do crhold(cred);
1980          */
1981 
1982         /* allocate a new resource number */
1983         if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1984                 /*
1985                  * We will bind this minor to a specific resource in first
1986                  * ioctl
1987                  */
1988                 *devp = makedevice(getmajor(*devp), rnum);
1989         } else {
1990                 return (EAGAIN);
1991         }
1992 
1993         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1994         return (DDI_SUCCESS);
1995 }
1996 
1997 static void
1998 rsmseg_close(rsmseg_t *seg, int force_flag)
1999 {
2000         int e = RSM_SUCCESS;
2001 
2002         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2003 
2004         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2005 
2006         rsmseglock_acquire(seg);
2007         if (!force_flag && (seg->s_hdr.rsmrc_type ==
2008             RSM_RESOURCE_EXPORT_SEGMENT)) {
2009                 /*
2010                  * If we are processing rsm_close wait for force_destroy
2011                  * processing to complete since force_destroy processing
2012                  * needs to finish first before we can free the segment.
2013                  * force_destroy is only for export segments
2014                  */
2015                 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2016                         cv_wait(&seg->s_cv, &seg->s_lock);
2017                 }
2018         }
2019         rsmseglock_release(seg);
2020 
2021         /* It's ok to read the state without a lock */
2022         switch (seg->s_state) {
2023         case RSM_STATE_EXPORT:
2024         case RSM_STATE_EXPORT_QUIESCING:
2025         case RSM_STATE_EXPORT_QUIESCED:
2026                 e = rsm_unpublish(seg, 1);
2027                 /* FALLTHRU */
2028         case RSM_STATE_BIND_QUIESCED:
2029                 /* FALLTHRU */
2030         case RSM_STATE_BIND:
2031                 e = rsm_unbind(seg);
2032                 if (e != RSM_SUCCESS && force_flag == 1)
2033                         return;
2034                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2035                 /* FALLTHRU */
2036         case RSM_STATE_NEW_QUIESCED:
2037                 rsmseglock_acquire(seg);
2038                 seg->s_state = RSM_STATE_NEW;
2039                 cv_broadcast(&seg->s_cv);
2040                 rsmseglock_release(seg);
2041                 break;
2042         case RSM_STATE_NEW:
2043                 break;
2044         case RSM_STATE_ZOMBIE:
2045                 /*
2046                  * Segments in this state have been removed off the
2047                  * exported segments list and have been unpublished
2048                  * and unbind. These segments have been removed during
2049                  * a callback to the rsm_export_force_destroy, which
2050                  * is called for the purpose of unlocking these
2051                  * exported memory segments when a process exits but
2052                  * leaves the segments locked down since rsm_close is
2053                  * is not called for the segments. This can happen
2054                  * when a process calls fork or exec and then exits.
2055                  * Once the segments are in the ZOMBIE state, all that
2056                  * remains is to destroy them when rsm_close is called.
2057                  * This is done here. Thus, for such segments the
2058                  * the state is changed to new so that later in this
2059                  * function rsmseg_free is called.
2060                  */
2061                 rsmseglock_acquire(seg);
2062                 seg->s_state = RSM_STATE_NEW;
2063                 rsmseglock_release(seg);
2064                 break;
2065         case RSM_STATE_MAP_QUIESCE:
2066         case RSM_STATE_ACTIVE:
2067                 /* Disconnect will handle the unmap */
2068         case RSM_STATE_CONN_QUIESCE:
2069         case RSM_STATE_CONNECT:
2070         case RSM_STATE_DISCONNECT:
2071                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2072                 (void) rsm_disconnect(seg);
2073                 break;
2074         case RSM_STATE_MAPPING:
2075                 /*FALLTHRU*/
2076         case RSM_STATE_END:
2077                 DBG_PRINTF((category, RSM_ERR,
2078                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2079                 break;
2080         default:
2081                 DBG_PRINTF((category, RSM_ERR,
2082                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2083                 break;
2084         }
2085 
2086         /*
2087          * check state.
2088          * - make sure you do crfree(s_cred);
2089          * release segment and minor number
2090          */
2091         ASSERT(seg->s_state == RSM_STATE_NEW);
2092 
2093         /*
2094          * The export_force_destroy callback is created to unlock
2095          * the exported segments of a process
2096          * when the process does a fork or exec and then exits calls this
2097          * function with the force flag set to 1 which indicates that the
2098          * segment state must be converted to ZOMBIE. This state means that the
2099          * segments still exist and have been unlocked and most importantly the
2100          * only operation allowed is to destroy them on an rsm_close.
2101          */
2102         if (force_flag) {
2103                 rsmseglock_acquire(seg);
2104                 seg->s_state = RSM_STATE_ZOMBIE;
2105                 rsmseglock_release(seg);
2106         } else {
2107                 rsmseg_free(seg);
2108         }
2109 
2110         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2111 }
2112 
2113 static int
2114 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2115 {
2116         minor_t rnum = getminor(dev);
2117         rsmresource_t *res;
2118         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2119 
2120         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2121 
2122         flag = flag; cred = cred;
2123 
2124         if (otyp != OTYP_CHR)
2125                 return (EINVAL);
2126 
2127         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2128 
2129         /*
2130          * At this point we are the last reference to the resource.
2131          * Free resource number from resource table.
2132          * It's ok to remove number before we free the segment.
2133          * We need to lock the resource to protect against remote calls.
2134          */
2135         if (rnum == RSM_DRIVER_MINOR ||
2136             (res = rsmresource_free(rnum)) == NULL) {
2137                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2138                 return (DDI_SUCCESS);
2139         }
2140 
2141         switch (res->rsmrc_type) {
2142         case RSM_RESOURCE_EXPORT_SEGMENT:
2143         case RSM_RESOURCE_IMPORT_SEGMENT:
2144                 rsmseg_close((rsmseg_t *)res, 0);
2145                 break;
2146         case RSM_RESOURCE_BAR:
2147                 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2148                 break;
2149         default:
2150                 break;
2151         }
2152 
2153         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2154 
2155         return (DDI_SUCCESS);
2156 }
2157 
2158 /*
2159  * rsm_inc_pgcnt
2160  *
2161  * Description: increment rsm page counter.
2162  *
2163  * Parameters:  pgcnt_t pnum;   number of pages to be used
2164  *
2165  * Returns:     RSM_SUCCESS     if memory limit not exceeded
2166  *              ENOSPC          if memory limit exceeded. In this case, the
2167  *                              page counter remains unchanged.
2168  *
2169  */
2170 static int
2171 rsm_inc_pgcnt(pgcnt_t pnum)
2172 {
2173         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2174         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2175                 return (RSM_SUCCESS);
2176         }
2177 
2178         mutex_enter(&rsm_pgcnt_lock);
2179 
2180         if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2181                 /* ensure that limits have not been exceeded */
2182                 mutex_exit(&rsm_pgcnt_lock);
2183                 return (RSMERR_INSUFFICIENT_MEM);
2184         }
2185 
2186         rsm_pgcnt += pnum;
2187         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2188             rsm_pgcnt));
2189         mutex_exit(&rsm_pgcnt_lock);
2190 
2191         return (RSM_SUCCESS);
2192 }
2193 
2194 /*
2195  * rsm_dec_pgcnt
2196  *
2197  * Description: decrement rsm page counter.
2198  *
2199  * Parameters:  pgcnt_t pnum;   number of pages freed
2200  *
2201  */
2202 static void
2203 rsm_dec_pgcnt(pgcnt_t pnum)
2204 {
2205         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2206 
2207         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2208                 return;
2209         }
2210 
2211         mutex_enter(&rsm_pgcnt_lock);
2212         ASSERT(rsm_pgcnt >= pnum);
2213         rsm_pgcnt -= pnum;
2214         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2215             rsm_pgcnt));
2216         mutex_exit(&rsm_pgcnt_lock);
2217 }
2218 
2219 static struct umem_callback_ops rsm_as_ops = {
2220         UMEM_CALLBACK_VERSION, /* version number */
2221         rsm_export_force_destroy,
2222 };
2223 
2224 static int
2225 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2226     proc_t *procp)
2227 {
2228         int error = RSM_SUCCESS;
2229         ulong_t pnum;
2230         struct umem_callback_ops *callbackops = &rsm_as_ops;
2231 
2232         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2233 
2234         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2235 
2236         /*
2237          * Make sure vaddr and len are aligned on a page boundary
2238          */
2239         if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2240                 return (RSMERR_BAD_ADDR);
2241         }
2242 
2243         if (len & (PAGESIZE - 1)) {
2244                 return (RSMERR_BAD_LENGTH);
2245         }
2246 
2247         /*
2248          * Find number of pages
2249          */
2250         pnum = btopr(len);
2251         error = rsm_inc_pgcnt(pnum);
2252         if (error != RSM_SUCCESS) {
2253                 DBG_PRINTF((category, RSM_ERR,
2254                     "rsm_bind_pages:mem limit exceeded\n"));
2255                 return (RSMERR_INSUFFICIENT_MEM);
2256         }
2257 
2258         error = umem_lockmemory(vaddr, len,
2259             DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2260             cookie,
2261             callbackops, procp);
2262 
2263         if (error) {
2264                 rsm_dec_pgcnt(pnum);
2265                 DBG_PRINTF((category, RSM_ERR,
2266                     "rsm_bind_pages:ddi_umem_lock failed\n"));
2267                 /*
2268                  * ddi_umem_lock, in the case of failure, returns one of
2269                  * the following three errors. These are translated into
2270                  * the RSMERR namespace and returned.
2271                  */
2272                 if (error == EFAULT)
2273                         return (RSMERR_BAD_ADDR);
2274                 else if (error == EACCES)
2275                         return (RSMERR_PERM_DENIED);
2276                 else
2277                         return (RSMERR_INSUFFICIENT_MEM);
2278         }
2279 
2280         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2281 
2282         return (error);
2283 
2284 }
2285 
2286 static int
2287 rsm_unbind_pages(rsmseg_t *seg)
2288 {
2289         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2290 
2291         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2292 
2293         ASSERT(rsmseglock_held(seg));
2294 
2295         if (seg->s_cookie != NULL) {
2296                 /* unlock address range */
2297                 ddi_umem_unlock(seg->s_cookie);
2298                 rsm_dec_pgcnt(btopr(seg->s_len));
2299                 seg->s_cookie = NULL;
2300         }
2301 
2302         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2303 
2304         return (RSM_SUCCESS);
2305 }
2306 
2307 
2308 static int
2309 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2310 {
2311         int e;
2312         adapter_t *adapter;
2313         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2314 
2315         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2316 
2317         adapter = rsm_getadapter(msg, mode);
2318         if (adapter == NULL) {
2319                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2320                     "rsm_bind done:no adapter\n"));
2321                 return (RSMERR_CTLR_NOT_PRESENT);
2322         }
2323 
2324         /* lock address range */
2325         if (msg->vaddr == NULL) {
2326                 rsmka_release_adapter(adapter);
2327                 DBG_PRINTF((category, RSM_ERR,
2328                     "rsm: rsm_bind done: invalid vaddr\n"));
2329                 return (RSMERR_BAD_ADDR);
2330         }
2331         if (msg->len <= 0) {
2332                 rsmka_release_adapter(adapter);
2333                 DBG_PRINTF((category, RSM_ERR,
2334                     "rsm_bind: invalid length\n"));
2335                 return (RSMERR_BAD_LENGTH);
2336         }
2337 
2338         /* Lock segment */
2339         rsmseglock_acquire(seg);
2340 
2341         while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2342                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2343                         DBG_PRINTF((category, RSM_DEBUG,
2344                             "rsm_bind done: cv_wait INTERRUPTED"));
2345                         rsmka_release_adapter(adapter);
2346                         rsmseglock_release(seg);
2347                         return (RSMERR_INTERRUPTED);
2348                 }
2349         }
2350 
2351         ASSERT(seg->s_state == RSM_STATE_NEW);
2352 
2353         ASSERT(seg->s_cookie == NULL);
2354 
2355         e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2356         if (e == RSM_SUCCESS) {
2357                 seg->s_flags |= RSM_USER_MEMORY;
2358                 if (msg->perm & RSM_ALLOW_REBIND) {
2359                         seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2360                 }
2361                 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2362                         seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2363                 }
2364                 seg->s_region.r_vaddr = msg->vaddr;
2365                 /*
2366                  * Set the s_pid value in the segment structure. This is used
2367                  * to identify exported segments belonging to a particular
2368                  * process so that when the process exits, these segments can
2369                  * be unlocked forcefully even if rsm_close is not called on
2370                  * process exit since there maybe other processes referencing
2371                  * them (for example on a fork or exec).
2372                  * The s_pid value is also used to authenticate the process
2373                  * doing a publish or unpublish on the export segment. Only
2374                  * the creator of the export segment has a right to do a
2375                  * publish or unpublish and unbind on the segment.
2376                  */
2377                 seg->s_pid = ddi_get_pid();
2378                 seg->s_len = msg->len;
2379                 seg->s_state = RSM_STATE_BIND;
2380                 seg->s_adapter = adapter;
2381                 seg->s_proc = curproc;
2382         } else {
2383                 rsmka_release_adapter(adapter);
2384                 DBG_PRINTF((category, RSM_WARNING,
2385                     "unable to lock down pages\n"));
2386         }
2387 
2388         msg->rnum = seg->s_minor;
2389         /* Unlock segment */
2390         rsmseglock_release(seg);
2391 
2392         if (e == RSM_SUCCESS) {
2393                 /* copyout the resource number */
2394 #ifdef _MULTI_DATAMODEL
2395                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2396                         rsm_ioctlmsg32_t msg32;
2397 
2398                         msg32.rnum = msg->rnum;
2399                         if (ddi_copyout((caddr_t)&msg32.rnum,
2400                             (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2401                             sizeof (minor_t), mode)) {
2402                                 rsmka_release_adapter(adapter);
2403                                 e = RSMERR_BAD_ADDR;
2404                         }
2405                 }
2406 #endif
2407                 if (ddi_copyout((caddr_t)&msg->rnum,
2408                     (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2409                     sizeof (minor_t), mode)) {
2410                         rsmka_release_adapter(adapter);
2411                         e = RSMERR_BAD_ADDR;
2412                 }
2413         }
2414 
2415         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2416 
2417         return (e);
2418 }
2419 
2420 static void
2421 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2422     rsm_memseg_id_t ex_segid,
2423     ddi_umem_cookie_t cookie)
2424 
2425 {
2426         rsmresource_t   *p = NULL;
2427         rsmhash_table_t *rhash = &rsm_import_segs;
2428         uint_t          index;
2429 
2430         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2431             "rsm_remap_local_importers enter\n"));
2432 
2433         index = rsmhash(ex_segid);
2434 
2435         rw_enter(&rhash->rsmhash_rw, RW_READER);
2436 
2437         p = rsmhash_getbkt(rhash, index);
2438 
2439         for (; p; p = p->rsmrc_next) {
2440                 rsmseg_t *seg = (rsmseg_t *)p;
2441                 rsmseglock_acquire(seg);
2442                 /*
2443                  * Change the s_cookie value of only the local importers
2444                  * which have been mapped (in state RSM_STATE_ACTIVE).
2445                  * Note that there is no need to change the s_cookie value
2446                  * if the imported segment is in RSM_STATE_MAPPING since
2447                  * eventually the s_cookie will be updated via the mapping
2448                  * functionality.
2449                  */
2450                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2451                     (seg->s_state == RSM_STATE_ACTIVE)) {
2452                         seg->s_cookie = cookie;
2453                 }
2454                 rsmseglock_release(seg);
2455         }
2456         rw_exit(&rhash->rsmhash_rw);
2457 
2458         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2459             "rsm_remap_local_importers done\n"));
2460 }
2461 
2462 static int
2463 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2464 {
2465         int e;
2466         adapter_t *adapter;
2467         ddi_umem_cookie_t cookie;
2468         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2469 
2470         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2471 
2472         /* Check for permissions to rebind */
2473         if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2474                 return (RSMERR_REBIND_NOT_ALLOWED);
2475         }
2476 
2477         if (seg->s_pid != ddi_get_pid() &&
2478             ddi_get_pid() != 0) {
2479                 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2480                 return (RSMERR_NOT_CREATOR);
2481         }
2482 
2483         /*
2484          * We will not be allowing partial rebind and hence length passed
2485          * in must be same as segment length
2486          */
2487         if (msg->vaddr == NULL) {
2488                 DBG_PRINTF((category, RSM_ERR,
2489                     "rsm_rebind done: null msg->vaddr\n"));
2490                 return (RSMERR_BAD_ADDR);
2491         }
2492         if (msg->len != seg->s_len) {
2493                 DBG_PRINTF((category, RSM_ERR,
2494                     "rsm_rebind: invalid length\n"));
2495                 return (RSMERR_BAD_LENGTH);
2496         }
2497 
2498         /* Lock segment */
2499         rsmseglock_acquire(seg);
2500 
2501         while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2502             (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2503             (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2504                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2505                         rsmseglock_release(seg);
2506                         DBG_PRINTF((category, RSM_DEBUG,
2507                             "rsm_rebind done: cv_wait INTERRUPTED"));
2508                         return (RSMERR_INTERRUPTED);
2509                 }
2510         }
2511 
2512         /* verify segment state */
2513         if ((seg->s_state != RSM_STATE_BIND) &&
2514             (seg->s_state != RSM_STATE_EXPORT)) {
2515                 /* Unlock segment */
2516                 rsmseglock_release(seg);
2517                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2518                     "rsm_rebind done: invalid state\n"));
2519                 return (RSMERR_BAD_SEG_HNDL);
2520         }
2521 
2522         ASSERT(seg->s_cookie != NULL);
2523 
2524         if (msg->vaddr == seg->s_region.r_vaddr) {
2525                 rsmseglock_release(seg);
2526                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2527                 return (RSM_SUCCESS);
2528         }
2529 
2530         e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2531         if (e == RSM_SUCCESS) {
2532                 struct buf *xbuf;
2533                 dev_t sdev = 0;
2534                 rsm_memory_local_t mem;
2535 
2536                 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2537                     sdev, 0, NULL, DDI_UMEM_SLEEP);
2538                 ASSERT(xbuf != NULL);
2539 
2540                 mem.ms_type = RSM_MEM_BUF;
2541                 mem.ms_bp = xbuf;
2542 
2543                 adapter = seg->s_adapter;
2544                 e = adapter->rsmpi_ops->rsm_rebind(
2545                     seg->s_handle.out, 0, &mem,
2546                     RSM_RESOURCE_DONTWAIT, NULL);
2547 
2548                 if (e == RSM_SUCCESS) {
2549                         /*
2550                          * unbind the older pages, and unload local importers;
2551                          * but don't disconnect importers
2552                          */
2553                         (void) rsm_unbind_pages(seg);
2554                         seg->s_cookie = cookie;
2555                         seg->s_region.r_vaddr = msg->vaddr;
2556                         rsm_remap_local_importers(my_nodeid, seg->s_segid,
2557                             cookie);
2558                 } else {
2559                         /*
2560                          * Unbind the pages associated with "cookie" by the
2561                          * rsm_bind_pages calls prior to this. This is
2562                          * similar to what is done in the rsm_unbind_pages
2563                          * routine for the seg->s_cookie.
2564                          */
2565                         ddi_umem_unlock(cookie);
2566                         rsm_dec_pgcnt(btopr(msg->len));
2567                         DBG_PRINTF((category, RSM_ERR,
2568                             "rsm_rebind failed with %d\n", e));
2569                 }
2570                 /*
2571                  * At present there is no dependency on the existence of xbuf.
2572                  * So we can free it here. If in the future this changes, it can
2573                  * be freed sometime during the segment destroy.
2574                  */
2575                 freerbuf(xbuf);
2576         }
2577 
2578         /* Unlock segment */
2579         rsmseglock_release(seg);
2580 
2581         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2582 
2583         return (e);
2584 }
2585 
2586 static int
2587 rsm_unbind(rsmseg_t *seg)
2588 {
2589         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2590 
2591         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2592 
2593         rsmseglock_acquire(seg);
2594 
2595         /* verify segment state */
2596         if ((seg->s_state != RSM_STATE_BIND) &&
2597             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2598                 rsmseglock_release(seg);
2599                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2600                     "rsm_unbind: invalid state\n"));
2601                 return (RSMERR_BAD_SEG_HNDL);
2602         }
2603 
2604         /* unlock current range */
2605         (void) rsm_unbind_pages(seg);
2606 
2607         if (seg->s_state == RSM_STATE_BIND) {
2608                 seg->s_state = RSM_STATE_NEW;
2609         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2610                 seg->s_state = RSM_STATE_NEW_QUIESCED;
2611         }
2612 
2613         rsmseglock_release(seg);
2614 
2615         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2616 
2617         return (RSM_SUCCESS);
2618 }
2619 
2620 /* **************************** Exporter Access List Management ******* */
2621 static void
2622 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2623 {
2624         int     acl_sz;
2625         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2626 
2627         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2628 
2629         /* acl could be NULL */
2630 
2631         if (acl != NULL && acl_len > 0) {
2632                 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2633                 kmem_free((void *)acl, acl_sz);
2634         }
2635 
2636         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2637 }
2638 
2639 static void
2640 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2641 {
2642         int     acl_sz;
2643         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2644 
2645         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2646 
2647         if (acl != NULL && acl_len > 0) {
2648                 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2649                 kmem_free((void *)acl, acl_sz);
2650         }
2651 
2652         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2653 
2654 }
2655 
2656 static int
2657 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2658     rsmapi_access_entry_t **list, int *len, int loopback)
2659 {
2660         rsmapi_access_entry_t *acl;
2661         int     acl_len;
2662         int i;
2663         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2664 
2665         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2666 
2667         *len = 0;
2668         *list = NULL;
2669 
2670         acl_len = msg->acl_len;
2671         if ((loopback && acl_len > 1) || (acl_len < 0) ||
2672             (acl_len > MAX_NODES)) {
2673                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2674                     "rsmacl_build done: acl invalid\n"));
2675                 return (RSMERR_BAD_ACL);
2676         }
2677 
2678         if (acl_len > 0 && acl_len <= MAX_NODES) {
2679                 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2680 
2681                 acl = kmem_alloc(acl_size, KM_SLEEP);
2682 
2683                 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2684                     acl_size, mode)) {
2685                         kmem_free((void *) acl, acl_size);
2686                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2687                             "rsmacl_build done: BAD_ADDR\n"));
2688                         return (RSMERR_BAD_ADDR);
2689                 }
2690 
2691                 /*
2692                  * Verify access list
2693                  */
2694                 for (i = 0; i < acl_len; i++) {
2695                         if (acl[i].ae_node > MAX_NODES ||
2696                             (loopback && (acl[i].ae_node != my_nodeid)) ||
2697                             acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2698                                 /* invalid entry */
2699                                 kmem_free((void *) acl, acl_size);
2700                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2701                                     "rsmacl_build done: EINVAL\n"));
2702                                 return (RSMERR_BAD_ACL);
2703                         }
2704                 }
2705 
2706                 *len = acl_len;
2707                 *list = acl;
2708         }
2709 
2710         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2711 
2712         return (DDI_SUCCESS);
2713 }
2714 
2715 static int
2716 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2717     int acl_len, adapter_t *adapter)
2718 {
2719         rsm_access_entry_t *acl;
2720         rsm_addr_t hwaddr;
2721         int i;
2722         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2723 
2724         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2725 
2726         if (src != NULL) {
2727                 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2728                 acl = kmem_alloc(acl_size, KM_SLEEP);
2729 
2730                 /*
2731                  * translate access list
2732                  */
2733                 for (i = 0; i < acl_len; i++) {
2734                         if (src[i].ae_node == my_nodeid) {
2735                                 acl[i].ae_addr = adapter->hwaddr;
2736                         } else {
2737                                 hwaddr = get_remote_hwaddr(adapter,
2738                                     src[i].ae_node);
2739                                 if ((int64_t)hwaddr < 0) {
2740                                         /* invalid hwaddr */
2741                                         kmem_free((void *) acl, acl_size);
2742                                         DBG_PRINTF((category,
2743                                             RSM_DEBUG_VERBOSE,
2744                                             "rsmpiacl_create done:"
2745                                             "EINVAL hwaddr\n"));
2746                                         return (RSMERR_INTERNAL_ERROR);
2747                                 }
2748                                 acl[i].ae_addr = hwaddr;
2749                         }
2750                         /* rsmpi understands only RSM_PERM_XXXX */
2751                         acl[i].ae_permission =
2752                             src[i].ae_permission & RSM_PERM_RDWR;
2753                 }
2754                 *dest = acl;
2755         } else {
2756                 *dest = NULL;
2757         }
2758 
2759         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2760 
2761         return (RSM_SUCCESS);
2762 }
2763 
2764 static int
2765 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2766     rsmipc_reply_t *reply)
2767 {
2768 
2769         int             i;
2770         rsmseg_t        *seg;
2771         rsm_memseg_id_t key = req->rsmipc_key;
2772         rsm_permission_t perm = req->rsmipc_perm;
2773         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2774 
2775         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2776             "rsmsegacl_validate enter\n"));
2777 
2778         /*
2779          * Find segment and grab its lock. The reason why we grab the segment
2780          * lock in side the search is to avoid the race when the segment is
2781          * being deleted and we already have a pointer to it.
2782          */
2783         seg = rsmexport_lookup(key);
2784         if (!seg) {
2785                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2786                     "rsmsegacl_validate done: %u ENXIO\n", key));
2787                 return (RSMERR_SEG_NOT_PUBLISHED);
2788         }
2789 
2790         ASSERT(rsmseglock_held(seg));
2791         ASSERT(seg->s_state == RSM_STATE_EXPORT);
2792 
2793         /*
2794          * We implement a 2-level protection scheme.
2795          * First, we check if local/remote host has access rights.
2796          * Second, we check if the user has access rights.
2797          *
2798          * This routine only validates the rnode access_list
2799          */
2800         if (seg->s_acl_len > 0) {
2801                 /*
2802                  * Check host access list
2803                  */
2804                 ASSERT(seg->s_acl != NULL);
2805                 for (i = 0; i < seg->s_acl_len; i++) {
2806                         if (seg->s_acl[i].ae_node == rnode) {
2807                                 perm &= seg->s_acl[i].ae_permission;
2808                                 goto found;
2809                         }
2810                 }
2811                 /* rnode is not found in the list */
2812                 rsmseglock_release(seg);
2813                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2814                     "rsmsegacl_validate done: EPERM\n"));
2815                 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2816         } else {
2817                 /* use default owner creation umask */
2818                 perm &= seg->s_mode;
2819         }
2820 
2821 found:
2822         /* update perm for this node */
2823         reply->rsmipc_mode = perm;
2824         reply->rsmipc_uid = seg->s_uid;
2825         reply->rsmipc_gid = seg->s_gid;
2826         reply->rsmipc_segid = seg->s_segid;
2827         reply->rsmipc_seglen = seg->s_len;
2828 
2829         /*
2830          * Perm of requesting node is valid; source will validate user
2831          */
2832         rsmseglock_release(seg);
2833 
2834         /*
2835          * Add the importer to the list right away, if connect fails
2836          * the importer will ask the exporter to remove it.
2837          */
2838         importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2839             req->rsmipc_segment_cookie);
2840 
2841         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2842 
2843         return (RSM_SUCCESS);
2844 }
2845 
2846 
2847 /* ************************** Exporter Calls ************************* */
2848 
2849 static int
2850 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2851 {
2852         int                     e;
2853         int                     acl_len;
2854         rsmapi_access_entry_t   *acl;
2855         rsm_access_entry_t      *rsmpi_acl;
2856         rsm_memory_local_t      mem;
2857         struct buf              *xbuf;
2858         dev_t                   sdev = 0;
2859         adapter_t               *adapter;
2860         rsm_memseg_id_t         segment_id = 0;
2861         int                     loopback_flag = 0;
2862         int                     create_flags = 0;
2863         rsm_resource_callback_t callback_flag;
2864         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2865 
2866         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2867 
2868         if (seg->s_adapter == &loopback_adapter)
2869                 loopback_flag = 1;
2870 
2871         if (seg->s_pid != ddi_get_pid() &&
2872             ddi_get_pid() != 0) {
2873                 DBG_PRINTF((category, RSM_ERR,
2874                     "rsm_publish: Not creator\n"));
2875                 return (RSMERR_NOT_CREATOR);
2876         }
2877 
2878         /*
2879          * Get per node access list
2880          */
2881         e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2882         if (e != DDI_SUCCESS) {
2883                 DBG_PRINTF((category, RSM_ERR,
2884                     "rsm_publish done: rsmacl_build failed\n"));
2885                 return (e);
2886         }
2887 
2888         /*
2889          * The application provided msg->key is used for resolving a
2890          * segment id according to the following:
2891          *    key = 0                   Kernel Agent selects the segment id
2892          *    key <= RSM_DLPI_ID_END Reserved for system usage except
2893          *                              RSMLIB range
2894          *    key < RSM_USER_APP_ID_BASE segment id = key
2895          *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2896          *
2897          * rsm_nextavail_segmentid is initialized to 0x80000000 and
2898          * overflows to zero after 0x80000000 allocations.
2899          * An algorithm is needed which allows reinitialization and provides
2900          * for reallocation after overflow.  For now, ENOMEM is returned
2901          * once the overflow condition has occurred.
2902          */
2903         if (msg->key == 0) {
2904                 mutex_enter(&rsm_lock);
2905                 segment_id = rsm_nextavail_segmentid;
2906                 if (segment_id != 0) {
2907                         rsm_nextavail_segmentid++;
2908                         mutex_exit(&rsm_lock);
2909                 } else {
2910                         mutex_exit(&rsm_lock);
2911                         DBG_PRINTF((category, RSM_ERR,
2912                             "rsm_publish done: no more keys avlbl\n"));
2913                         return (RSMERR_INSUFFICIENT_RESOURCES);
2914                 }
2915         } else  if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2916                 /* range reserved for internal use by base/ndi libraries */
2917                 segment_id = msg->key;
2918         else    if (msg->key <= RSM_DLPI_ID_END)
2919                 return (RSMERR_RESERVED_SEGID);
2920         else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2921                 segment_id = msg->key;
2922         else {
2923                 DBG_PRINTF((category, RSM_ERR,
2924                     "rsm_publish done: invalid key %u\n", msg->key));
2925                 return (RSMERR_RESERVED_SEGID);
2926         }
2927 
2928         /* Add key to exportlist; The segment lock is held on success */
2929         e = rsmexport_add(seg, segment_id);
2930         if (e) {
2931                 rsmacl_free(acl, acl_len);
2932                 DBG_PRINTF((category, RSM_ERR,
2933                     "rsm_publish done: export_add failed: %d\n", e));
2934                 return (e);
2935         }
2936 
2937         seg->s_segid = segment_id;
2938 
2939         if ((seg->s_state != RSM_STATE_BIND) &&
2940             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2941                 /* state changed since then, free acl and return */
2942                 rsmseglock_release(seg);
2943                 rsmexport_rm(seg);
2944                 rsmacl_free(acl, acl_len);
2945                 DBG_PRINTF((category, RSM_ERR,
2946                     "rsm_publish done: segment in wrong state: %d\n",
2947                     seg->s_state));
2948                 return (RSMERR_BAD_SEG_HNDL);
2949         }
2950 
2951         /*
2952          * If this is for a local memory handle and permissions are zero,
2953          * then the surrogate segment is very large and we want to skip
2954          * allocation of DVMA space.
2955          *
2956          * Careful!  If the user didn't use an ACL list, acl will be a NULL
2957          * pointer.  Check that before dereferencing it.
2958          */
2959         if (acl != (rsmapi_access_entry_t *)NULL) {
2960                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2961                         goto skipdriver;
2962         }
2963 
2964         /* create segment  */
2965         xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2966             sdev, 0, NULL, DDI_UMEM_SLEEP);
2967         ASSERT(xbuf != NULL);
2968 
2969         mem.ms_type = RSM_MEM_BUF;
2970         mem.ms_bp = xbuf;
2971 
2972         /* This call includes a bind operations */
2973 
2974         adapter = seg->s_adapter;
2975         /*
2976          * create a acl list with hwaddr for RSMPI publish
2977          */
2978         e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2979 
2980         if (e != RSM_SUCCESS) {
2981                 rsmseglock_release(seg);
2982                 rsmexport_rm(seg);
2983                 rsmacl_free(acl, acl_len);
2984                 freerbuf(xbuf);
2985                 DBG_PRINTF((category, RSM_ERR,
2986                     "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2987                 return (e);
2988         }
2989 
2990         if (seg->s_state == RSM_STATE_BIND) {
2991                 /* create segment  */
2992 
2993                 /* This call includes a bind operations */
2994 
2995                 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2996                         create_flags = RSM_ALLOW_UNBIND_REBIND;
2997                 }
2998 
2999                 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
3000                         callback_flag  = RSM_RESOURCE_DONTWAIT;
3001                 } else {
3002                         callback_flag  = RSM_RESOURCE_SLEEP;
3003                 }
3004 
3005                 e = adapter->rsmpi_ops->rsm_seg_create(
3006                     adapter->rsmpi_handle,
3007                     &seg->s_handle.out, seg->s_len,
3008                     create_flags, &mem,
3009                     callback_flag, NULL);
3010                 /*
3011                  * At present there is no dependency on the existence of xbuf.
3012                  * So we can free it here. If in the future this changes, it can
3013                  * be freed sometime during the segment destroy.
3014                  */
3015                 freerbuf(xbuf);
3016 
3017                 if (e != RSM_SUCCESS) {
3018                         rsmseglock_release(seg);
3019                         rsmexport_rm(seg);
3020                         rsmacl_free(acl, acl_len);
3021                         rsmpiacl_free(rsmpi_acl, acl_len);
3022                         DBG_PRINTF((category, RSM_ERR,
3023                             "rsm_publish done: export_create failed: %d\n", e));
3024                         /*
3025                          * The following assertion ensures that the two errors
3026                          * related to the length and its alignment do not occur
3027                          * since they have been checked during export_create
3028                          */
3029                         ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3030                             e != RSMERR_BAD_LENGTH);
3031                         if (e == RSMERR_NOT_MEM)
3032                                 e = RSMERR_INSUFFICIENT_MEM;
3033 
3034                         return (e);
3035                 }
3036                 /* export segment, this should create an IMMU mapping */
3037                 e = adapter->rsmpi_ops->rsm_publish(
3038                     seg->s_handle.out,
3039                     rsmpi_acl, acl_len,
3040                     seg->s_segid,
3041                     RSM_RESOURCE_DONTWAIT, NULL);
3042 
3043                 if (e != RSM_SUCCESS) {
3044                         adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3045                         rsmseglock_release(seg);
3046                         rsmexport_rm(seg);
3047                         rsmacl_free(acl, acl_len);
3048                         rsmpiacl_free(rsmpi_acl, acl_len);
3049                         DBG_PRINTF((category, RSM_ERR,
3050                             "rsm_publish done: export_publish failed: %d\n",
3051                             e));
3052                         return (e);
3053                 }
3054         }
3055 
3056         seg->s_acl_in = rsmpi_acl;
3057 
3058 skipdriver:
3059         /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3060         seg->s_acl_len       = acl_len;
3061         seg->s_acl   = acl;
3062 
3063         if (seg->s_state == RSM_STATE_BIND) {
3064                 seg->s_state = RSM_STATE_EXPORT;
3065         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3066                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3067                 cv_broadcast(&seg->s_cv);
3068         }
3069 
3070         rsmseglock_release(seg);
3071 
3072         /*
3073          * If the segment id was solicited, then return it in
3074          * the original incoming message.
3075          */
3076         if (msg->key == 0) {
3077                 msg->key = segment_id;
3078 #ifdef _MULTI_DATAMODEL
3079                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3080                         rsm_ioctlmsg32_t msg32;
3081 
3082                         msg32.key = msg->key;
3083                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3084                             "rsm_publish done\n"));
3085                         return (ddi_copyout((caddr_t)&msg32,
3086                             (caddr_t)dataptr, sizeof (msg32), mode));
3087                 }
3088 #endif
3089                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3090                     "rsm_publish done\n"));
3091                 return (ddi_copyout((caddr_t)msg,
3092                     (caddr_t)dataptr, sizeof (*msg), mode));
3093         }
3094 
3095         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3096         return (DDI_SUCCESS);
3097 }
3098 
3099 /*
3100  * This function modifies the access control list of an already published
3101  * segment.  There is no effect on import segments which are already
3102  * connected.
3103  */
3104 static int
3105 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3106 {
3107         rsmapi_access_entry_t   *new_acl, *old_acl, *tmp_acl;
3108         rsm_access_entry_t      *rsmpi_new_acl, *rsmpi_old_acl;
3109         int                     new_acl_len, old_acl_len, tmp_acl_len;
3110         int                     e, i;
3111         adapter_t               *adapter;
3112         int                     loopback_flag = 0;
3113         rsm_memseg_id_t         key;
3114         rsm_permission_t        permission;
3115         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3116 
3117         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3118 
3119         if ((seg->s_state != RSM_STATE_EXPORT) &&
3120             (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3121             (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3122                 return (RSMERR_SEG_NOT_PUBLISHED);
3123 
3124         if (seg->s_pid != ddi_get_pid() &&
3125             ddi_get_pid() != 0) {
3126                 DBG_PRINTF((category, RSM_ERR,
3127                     "rsm_republish: Not owner\n"));
3128                 return (RSMERR_NOT_CREATOR);
3129         }
3130 
3131         if (seg->s_adapter == &loopback_adapter)
3132                 loopback_flag = 1;
3133 
3134         /*
3135          * Build new list first
3136          */
3137         e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3138         if (e) {
3139                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3140                     "rsm_republish done: rsmacl_build failed %d", e));
3141                 return (e);
3142         }
3143 
3144         /* Lock segment */
3145         rsmseglock_acquire(seg);
3146         /*
3147          * a republish is in progress - REPUBLISH message is being
3148          * sent to the importers so wait for it to complete OR
3149          * wait till DR completes
3150          */
3151         while (((seg->s_state == RSM_STATE_EXPORT) &&
3152             (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3153             (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3154             (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3155                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3156                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3157                             "rsm_republish done: cv_wait  INTERRUPTED"));
3158                         rsmseglock_release(seg);
3159                         rsmacl_free(new_acl, new_acl_len);
3160                         return (RSMERR_INTERRUPTED);
3161                 }
3162         }
3163 
3164         /* recheck if state is valid */
3165         if (seg->s_state != RSM_STATE_EXPORT) {
3166                 rsmseglock_release(seg);
3167                 rsmacl_free(new_acl, new_acl_len);
3168                 return (RSMERR_SEG_NOT_PUBLISHED);
3169         }
3170 
3171         key = seg->s_key;
3172         old_acl = seg->s_acl;
3173         old_acl_len = seg->s_acl_len;
3174 
3175         seg->s_acl = new_acl;
3176         seg->s_acl_len = new_acl_len;
3177 
3178         /*
3179          * This call will only be meaningful if and when the interconnect
3180          * layer makes use of the access list
3181          */
3182         adapter = seg->s_adapter;
3183         /*
3184          * create a acl list with hwaddr for RSMPI publish
3185          */
3186         e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3187 
3188         if (e != RSM_SUCCESS) {
3189                 seg->s_acl = old_acl;
3190                 seg->s_acl_len = old_acl_len;
3191                 rsmseglock_release(seg);
3192                 rsmacl_free(new_acl, new_acl_len);
3193                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3194                     "rsm_republish done: rsmpiacl_create failed %d", e));
3195                 return (e);
3196         }
3197         rsmpi_old_acl = seg->s_acl_in;
3198         seg->s_acl_in = rsmpi_new_acl;
3199 
3200         e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3201             seg->s_acl_in, seg->s_acl_len,
3202             RSM_RESOURCE_DONTWAIT, NULL);
3203 
3204         if (e != RSM_SUCCESS) {
3205                 seg->s_acl = old_acl;
3206                 seg->s_acl_in = rsmpi_old_acl;
3207                 seg->s_acl_len = old_acl_len;
3208                 rsmseglock_release(seg);
3209                 rsmacl_free(new_acl, new_acl_len);
3210                 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3211 
3212                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3213                     "rsm_republish done: rsmpi republish failed %d\n", e));
3214                 return (e);
3215         }
3216 
3217         /* create a tmp copy of the new acl */
3218         tmp_acl_len = new_acl_len;
3219         if (tmp_acl_len > 0) {
3220                 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3221                 for (i = 0; i < tmp_acl_len; i++) {
3222                         tmp_acl[i].ae_node = new_acl[i].ae_node;
3223                         tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3224                 }
3225                 /*
3226                  * The default permission of a node which was in the old
3227                  * ACL but not in the new ACL is 0 ie no access.
3228                  */
3229                 permission = 0;
3230         } else {
3231                 /*
3232                  * NULL acl means all importers can connect and
3233                  * default permission will be owner creation umask
3234                  */
3235                 tmp_acl = NULL;
3236                 permission = seg->s_mode;
3237         }
3238 
3239         /* make other republishers to wait for republish to complete */
3240         seg->s_flags |= RSM_REPUBLISH_WAIT;
3241 
3242         rsmseglock_release(seg);
3243 
3244         /* send the new perms to the importing nodes */
3245         rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3246 
3247         rsmseglock_acquire(seg);
3248         seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3249         /* wake up any one waiting for republish to complete */
3250         cv_broadcast(&seg->s_cv);
3251         rsmseglock_release(seg);
3252 
3253         rsmacl_free(tmp_acl, tmp_acl_len);
3254         rsmacl_free(old_acl, old_acl_len);
3255         rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3256 
3257         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3258         return (DDI_SUCCESS);
3259 }
3260 
3261 static int
3262 rsm_unpublish(rsmseg_t *seg, int mode)
3263 {
3264         rsmapi_access_entry_t   *acl;
3265         rsm_access_entry_t      *rsmpi_acl;
3266         int                     acl_len;
3267         int                     e;
3268         adapter_t *adapter;
3269         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3270 
3271         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3272 
3273         if (seg->s_pid != ddi_get_pid() &&
3274             ddi_get_pid() != 0) {
3275                 DBG_PRINTF((category, RSM_ERR,
3276                     "rsm_unpublish: Not creator\n"));
3277                 return (RSMERR_NOT_CREATOR);
3278         }
3279 
3280         rsmseglock_acquire(seg);
3281         /*
3282          * wait for QUIESCING to complete here before rsmexport_rm
3283          * is called because the SUSPEND_COMPLETE mesg which changes
3284          * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3285          * signals the cv_wait needs to find it in the hashtable.
3286          */
3287         while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3288             ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3289                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3290                         rsmseglock_release(seg);
3291                         DBG_PRINTF((category, RSM_ERR,
3292                             "rsm_unpublish done: cv_wait INTR qscing"
3293                             "getv/putv in progress"));
3294                         return (RSMERR_INTERRUPTED);
3295                 }
3296         }
3297 
3298         /* verify segment state */
3299         if ((seg->s_state != RSM_STATE_EXPORT) &&
3300             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3301                 rsmseglock_release(seg);
3302                 DBG_PRINTF((category, RSM_ERR,
3303                     "rsm_unpublish done: bad state %x\n", seg->s_state));
3304                 return (RSMERR_SEG_NOT_PUBLISHED);
3305         }
3306 
3307         rsmseglock_release(seg);
3308 
3309         rsmexport_rm(seg);
3310 
3311         rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3312 
3313         rsmseglock_acquire(seg);
3314         /*
3315          * wait for republish to complete
3316          */
3317         while ((seg->s_state == RSM_STATE_EXPORT) &&
3318             (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3319                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3320                         DBG_PRINTF((category, RSM_ERR,
3321                             "rsm_unpublish done: cv_wait INTR repubing"));
3322                         rsmseglock_release(seg);
3323                         return (RSMERR_INTERRUPTED);
3324                 }
3325         }
3326 
3327         if ((seg->s_state != RSM_STATE_EXPORT) &&
3328             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3329                 DBG_PRINTF((category, RSM_ERR,
3330                     "rsm_unpublish done: invalid state"));
3331                 rsmseglock_release(seg);
3332                 return (RSMERR_SEG_NOT_PUBLISHED);
3333         }
3334 
3335         /*
3336          * check for putv/get surrogate segment which was not published
3337          * to the driver.
3338          *
3339          * Be certain to see if there is an ACL first!  If this segment was
3340          * not published with an ACL, acl will be a null pointer.  Check
3341          * that before dereferencing it.
3342          */
3343         acl = seg->s_acl;
3344         if (acl != (rsmapi_access_entry_t *)NULL) {
3345                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3346                         goto bypass;
3347         }
3348 
3349         /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3350         if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3351                 goto bypass;
3352 
3353         adapter = seg->s_adapter;
3354         for (;;) {
3355                 if (seg->s_state != RSM_STATE_EXPORT) {
3356                         rsmseglock_release(seg);
3357                         DBG_PRINTF((category, RSM_ERR,
3358                             "rsm_unpublish done: bad state %x\n",
3359                             seg->s_state));
3360                         return (RSMERR_SEG_NOT_PUBLISHED);
3361                 }
3362 
3363                 /* unpublish from adapter */
3364                 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3365 
3366                 if (e == RSM_SUCCESS) {
3367                         break;
3368                 }
3369 
3370                 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3371                         /*
3372                          * wait for unpublish to succeed, it's busy.
3373                          */
3374                         seg->s_flags |= RSM_EXPORT_WAIT;
3375 
3376                         /* wait for a max of 1 ms - this is an empirical */
3377                         /* value that was found by some minimal testing  */
3378                         /* can be fine tuned when we have better numbers */
3379                         /* A long term fix would be to send cv_signal    */
3380                         /* from the intr callback routine                */
3381                         /* currently nobody signals this wait            */
3382                         (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3383                             drv_usectohz(1000), TR_CLOCK_TICK);
3384 
3385                         DBG_PRINTF((category, RSM_ERR,
3386                             "rsm_unpublish: SEG_IN_USE\n"));
3387 
3388                         seg->s_flags &= ~RSM_EXPORT_WAIT;
3389                 } else {
3390                         if (mode == 1) {
3391                                 DBG_PRINTF((category, RSM_ERR,
3392                                     "rsm:rsmpi unpublish err %x\n", e));
3393                                 seg->s_state = RSM_STATE_BIND;
3394                         }
3395                         rsmseglock_release(seg);
3396                         return (e);
3397                 }
3398         }
3399 
3400         /* Free segment */
3401         e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3402 
3403         if (e != RSM_SUCCESS) {
3404                 DBG_PRINTF((category, RSM_ERR,
3405                     "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3406                     seg->s_key, e));
3407         }
3408 
3409 bypass:
3410         acl = seg->s_acl;
3411         rsmpi_acl = seg->s_acl_in;
3412         acl_len = seg->s_acl_len;
3413 
3414         seg->s_acl = NULL;
3415         seg->s_acl_in = NULL;
3416         seg->s_acl_len = 0;
3417 
3418         if (seg->s_state == RSM_STATE_EXPORT) {
3419                 seg->s_state = RSM_STATE_BIND;
3420         } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3421                 seg->s_state = RSM_STATE_BIND_QUIESCED;
3422                 cv_broadcast(&seg->s_cv);
3423         }
3424 
3425         rsmseglock_release(seg);
3426 
3427         rsmacl_free(acl, acl_len);
3428         rsmpiacl_free(rsmpi_acl, acl_len);
3429 
3430         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3431 
3432         return (DDI_SUCCESS);
3433 }
3434 
3435 /*
3436  * Called from rsm_unpublish to force an unload and disconnection of all
3437  * importers of the unpublished segment.
3438  *
3439  * First build the list of segments requiring a force disconnect, then
3440  * send a request for each.
3441  */
3442 static void
3443 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3444     rsm_node_id_t ex_nodeid)
3445 {
3446         rsmipc_request_t        request;
3447         importing_token_t       *prev_token, *token, *tmp_token, *tokp;
3448         importing_token_t       *force_disconnect_list = NULL;
3449         int                     index;
3450 
3451         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3452             "rsm_send_importer_disconnects enter\n"));
3453 
3454         index = rsmhash(ex_segid);
3455 
3456         mutex_enter(&importer_list.lock);
3457 
3458         prev_token = NULL;
3459         token = importer_list.bucket[index];
3460 
3461         while (token != NULL) {
3462                 if (token->key == ex_segid) {
3463                         /*
3464                          * take it off the importer list and add it
3465                          * to the force disconnect list.
3466                          */
3467                         if (prev_token == NULL)
3468                                 importer_list.bucket[index] = token->next;
3469                         else
3470                                 prev_token->next = token->next;
3471                         tmp_token = token;
3472                         token = token->next;
3473                         if (force_disconnect_list == NULL) {
3474                                 force_disconnect_list = tmp_token;
3475                                 tmp_token->next = NULL;
3476                         } else {
3477                                 tokp = force_disconnect_list;
3478                                 /*
3479                                  * make sure that the tmp_token's node
3480                                  * is not already on the force disconnect
3481                                  * list.
3482                                  */
3483                                 while (tokp != NULL) {
3484                                         if (tokp->importing_node ==
3485                                             tmp_token->importing_node) {
3486                                                 break;
3487                                         }
3488                                         tokp = tokp->next;
3489                                 }
3490                                 if (tokp == NULL) {
3491                                         tmp_token->next =
3492                                             force_disconnect_list;
3493                                         force_disconnect_list = tmp_token;
3494                                 } else {
3495                                         kmem_free((void *)tmp_token,
3496                                             sizeof (*token));
3497                                 }
3498                         }
3499 
3500                 } else {
3501                         prev_token = token;
3502                         token = token->next;
3503                 }
3504         }
3505         mutex_exit(&importer_list.lock);
3506 
3507         token = force_disconnect_list;
3508         while (token != NULL) {
3509                 if (token->importing_node == my_nodeid) {
3510                         rsm_force_unload(ex_nodeid, ex_segid,
3511                             DISCONNECT);
3512                 } else {
3513                         request.rsmipc_hdr.rsmipc_type =
3514                             RSMIPC_MSG_DISCONNECT;
3515                         request.rsmipc_key = token->key;
3516                         for (;;) {
3517                                 if (rsmipc_send(token->importing_node,
3518                                     &request,
3519                                     RSM_NO_REPLY) == RSM_SUCCESS) {
3520                                         break;
3521                                 } else {
3522                                         delay(drv_usectohz(10000));
3523                                 }
3524                         }
3525                 }
3526                 tmp_token = token;
3527                 token = token->next;
3528                 kmem_free((void *)tmp_token, sizeof (*token));
3529         }
3530 
3531         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3532             "rsm_send_importer_disconnects done\n"));
3533 }
3534 
3535 /*
3536  * This function is used as a callback for unlocking the pages locked
3537  * down by a process which then does a fork or an exec.
3538  * It marks the export segments corresponding to umem cookie given by
3539  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3540  * destroyed later when an rsm_close occurs).
3541  */
3542 static void
3543 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3544 {
3545         rsmresource_blk_t *blk;
3546         rsmresource_t *p;
3547         rsmseg_t *eseg = NULL;
3548         int i, j;
3549         int found = 0;
3550 
3551         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3552             "rsm_export_force_destroy enter\n"));
3553 
3554         /*
3555          * Walk the resource list and locate the export segment (either
3556          * in the BIND or the EXPORT state) which corresponds to the
3557          * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3558          * Change the state to ZOMBIE by calling rsmseg_close with the
3559          * force_flag argument (the second argument) set to 1. Also,
3560          * unpublish and unbind the segment, but don't free it. Free it
3561          * only on a rsm_close call for the segment.
3562          */
3563         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3564 
3565         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3566                 blk = rsm_resource.rsmrc_root[i];
3567                 if (blk == NULL) {
3568                         continue;
3569                 }
3570 
3571                 for (j = 0; j < RSMRC_BLKSZ; j++) {
3572                         p = blk->rsmrcblk_blks[j];
3573                         if ((p != NULL) && (p != RSMRC_RESERVED) &&
3574                             (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3575                                 eseg = (rsmseg_t *)p;
3576                                 if (eseg->s_cookie != ck)
3577                                         continue; /* continue searching */
3578                                 /*
3579                                  * Found the segment, set flag to indicate
3580                                  * force destroy processing is in progress
3581                                  */
3582                                 rsmseglock_acquire(eseg);
3583                                 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3584                                 rsmseglock_release(eseg);
3585                                 found = 1;
3586                                 break;
3587                         }
3588                 }
3589 
3590                 if (found)
3591                         break;
3592         }
3593 
3594         rw_exit(&rsm_resource.rsmrc_lock);
3595 
3596         if (found) {
3597                 ASSERT(eseg != NULL);
3598                 /* call rsmseg_close with force flag set to 1 */
3599                 rsmseg_close(eseg, 1);
3600                 /*
3601                  * force destroy processing done, clear flag and signal any
3602                  * thread waiting in rsmseg_close.
3603                  */
3604                 rsmseglock_acquire(eseg);
3605                 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3606                 cv_broadcast(&eseg->s_cv);
3607                 rsmseglock_release(eseg);
3608         }
3609 
3610         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3611             "rsm_export_force_destroy done\n"));
3612 }
3613 
3614 /* ******************************* Remote Calls *********************** */
3615 static void
3616 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3617 {
3618         rsmipc_reply_t reply;
3619         DBG_DEFINE(category,
3620             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3621 
3622         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3623             "rsm_intr_segconnect enter\n"));
3624 
3625         reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3626 
3627         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3628         reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3629 
3630         (void) rsmipc_send(src, NULL, &reply);
3631 
3632         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3633             "rsm_intr_segconnect done\n"));
3634 }
3635 
3636 
3637 /*
3638  * When an exported segment is unpublished the exporter sends an ipc
3639  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3640  * calls this function.  The import list is scanned; segments which match the
3641  * exported segment id are unloaded and disconnected.
3642  *
3643  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3644  *
3645  */
3646 static void
3647 rsm_force_unload(rsm_node_id_t src_nodeid,
3648     rsm_memseg_id_t ex_segid,
3649     boolean_t disconnect_flag)
3650 
3651 {
3652         rsmresource_t   *p = NULL;
3653         rsmhash_table_t *rhash = &rsm_import_segs;
3654         uint_t          index;
3655         DBG_DEFINE(category,
3656             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3657 
3658         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3659 
3660         index = rsmhash(ex_segid);
3661 
3662         rw_enter(&rhash->rsmhash_rw, RW_READER);
3663 
3664         p = rsmhash_getbkt(rhash, index);
3665 
3666         for (; p; p = p->rsmrc_next) {
3667                 rsmseg_t *seg = (rsmseg_t *)p;
3668                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3669                         /*
3670                          * In order to make rsmseg_unload and rsm_force_unload
3671                          * thread safe, acquire the segment lock here.
3672                          * rsmseg_unload is responsible for releasing the lock.
3673                          * rsmseg_unload releases the lock just before a call
3674                          * to rsmipc_send or in case of an early exit which
3675                          * occurs if the segment was in the state
3676                          * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3677                          */
3678                         rsmseglock_acquire(seg);
3679                         if (disconnect_flag)
3680                                 seg->s_flags |= RSM_FORCE_DISCONNECT;
3681                         rsmseg_unload(seg);
3682                 }
3683         }
3684         rw_exit(&rhash->rsmhash_rw);
3685 
3686         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3687 }
3688 
3689 static void
3690 rsm_intr_reply(rsmipc_msghdr_t *msg)
3691 {
3692         /*
3693          * Find slot for cookie in reply.
3694          * Match sequence with sequence in cookie
3695          * If no match; return
3696          * Try to grap lock of slot, if locked return
3697          * copy data into reply slot area
3698          * signal waiter
3699          */
3700         rsmipc_slot_t   *slot;
3701         rsmipc_cookie_t *cookie;
3702         void *data = (void *) msg;
3703         size_t size = sizeof (rsmipc_reply_t);
3704         DBG_DEFINE(category,
3705             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3706 
3707         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3708 
3709         cookie = &msg->rsmipc_cookie;
3710         if (cookie->ic.index >= RSMIPC_SZ) {
3711                 DBG_PRINTF((category, RSM_ERR,
3712                     "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3713                 return;
3714         }
3715 
3716         ASSERT(cookie->ic.index < RSMIPC_SZ);
3717         slot = &rsm_ipc.slots[cookie->ic.index];
3718         mutex_enter(&slot->rsmipc_lock);
3719         if (slot->rsmipc_cookie.value == cookie->value) {
3720                 /* found a match */
3721                 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3722                         bcopy(data, slot->rsmipc_data, size);
3723                         RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3724                         cv_signal(&slot->rsmipc_cv);
3725                 }
3726         } else {
3727                 DBG_PRINTF((category, RSM_DEBUG,
3728                     "rsm: rsm_intr_reply mismatched reply %d\n",
3729                     cookie->ic.index));
3730         }
3731         mutex_exit(&slot->rsmipc_lock);
3732         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3733 }
3734 
3735 /*
3736  * This function gets dispatched on the worker thread when we receive
3737  * the SQREADY message. This function sends the SQREADY_ACK message.
3738  */
3739 static void
3740 rsm_sqready_ack_deferred(void *arg)
3741 {
3742         path_t  *path = (path_t *)arg;
3743         DBG_DEFINE(category,
3744             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3745 
3746         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3747             "rsm_sqready_ack_deferred enter\n"));
3748 
3749         mutex_enter(&path->mutex);
3750 
3751         /*
3752          * If path is not active no point in sending the ACK
3753          * because the whole SQREADY protocol will again start
3754          * when the path becomes active.
3755          */
3756         if (path->state != RSMKA_PATH_ACTIVE) {
3757                 /*
3758                  * decrement the path refcnt incremented in rsm_proc_sqready
3759                  */
3760                 PATH_RELE_NOLOCK(path);
3761                 mutex_exit(&path->mutex);
3762                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3763                     "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3764                 return;
3765         }
3766 
3767         /* send an SQREADY_ACK message */
3768         (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3769 
3770         /* initialize credits to the max level */
3771         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3772 
3773         /* wake up any send that is waiting for credits */
3774         cv_broadcast(&path->sendq_token.sendq_cv);
3775 
3776         /*
3777          * decrement the path refcnt since we incremented it in
3778          * rsm_proc_sqready
3779          */
3780         PATH_RELE_NOLOCK(path);
3781 
3782         mutex_exit(&path->mutex);
3783 
3784         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3785             "rsm_sqready_ack_deferred done\n"));
3786 }
3787 
3788 /*
3789  * Process the SQREADY message
3790  */
3791 static void
3792 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3793     rsm_intr_hand_arg_t arg)
3794 {
3795         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3796         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3797         path_t                  *path;
3798         DBG_DEFINE(category,
3799             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3800 
3801         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3802 
3803         /* look up the path - incr the path refcnt */
3804         path = rsm_find_path(hdlr_argp->adapter_name,
3805             hdlr_argp->adapter_instance, src_hwaddr);
3806 
3807         /*
3808          * No path exists or path is not active - drop the message
3809          */
3810         if (path == NULL) {
3811                 DBG_PRINTF((category, RSM_DEBUG,
3812                     "rsm_proc_sqready done: msg dropped no path\n"));
3813                 return;
3814         }
3815 
3816         mutex_exit(&path->mutex);
3817 
3818         /* drain any tasks from the previous incarnation */
3819         taskq_wait(path->recv_taskq);
3820 
3821         mutex_enter(&path->mutex);
3822         /*
3823          * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3824          * in the meanwhile we received an SQREADY message, blindly reset
3825          * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3826          * and forget about the SQREADY that we sent.
3827          */
3828         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3829 
3830         if (path->state != RSMKA_PATH_ACTIVE) {
3831                 /* decr refcnt and drop the mutex */
3832                 PATH_RELE_NOLOCK(path);
3833                 mutex_exit(&path->mutex);
3834                 DBG_PRINTF((category, RSM_DEBUG,
3835                     "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3836                 return;
3837         }
3838 
3839         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3840             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3841 
3842         /*
3843          * The sender's local incarnation number is our remote incarnation
3844          * number save it in the path data structure
3845          */
3846         path->remote_incn = msg->rsmipc_local_incn;
3847         path->sendq_token.msgbuf_avail = 0;
3848         path->procmsg_cnt = 0;
3849 
3850         /*
3851          * path is active - dispatch task to send SQREADY_ACK - remember
3852          * RSMPI calls can't be done in interrupt context
3853          *
3854          * We can use the recv_taskq to send because the remote endpoint
3855          * cannot start sending messages till it receives SQREADY_ACK hence
3856          * at this point there are no tasks on recv_taskq.
3857          *
3858          * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3859          */
3860         (void) taskq_dispatch(path->recv_taskq,
3861             rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3862 
3863         mutex_exit(&path->mutex);
3864 
3865 
3866         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3867 }
3868 
3869 /*
3870  * Process the SQREADY_ACK message
3871  */
3872 static void
3873 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3874     rsm_intr_hand_arg_t arg)
3875 {
3876         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3877         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3878         path_t                  *path;
3879         DBG_DEFINE(category,
3880             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3881 
3882         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3883             "rsm_proc_sqready_ack enter\n"));
3884 
3885         /* look up the path - incr the path refcnt */
3886         path = rsm_find_path(hdlr_argp->adapter_name,
3887             hdlr_argp->adapter_instance, src_hwaddr);
3888 
3889         /*
3890          * drop the message if - no path exists or path is not active
3891          * or if its not waiting for SQREADY_ACK message
3892          */
3893         if (path == NULL) {
3894                 DBG_PRINTF((category, RSM_DEBUG,
3895                     "rsm_proc_sqready_ack done: msg dropped no path\n"));
3896                 return;
3897         }
3898 
3899         if ((path->state != RSMKA_PATH_ACTIVE) ||
3900             !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3901                 /* decrement the refcnt */
3902                 PATH_RELE_NOLOCK(path);
3903                 mutex_exit(&path->mutex);
3904                 DBG_PRINTF((category, RSM_DEBUG,
3905                     "rsm_proc_sqready_ack done: msg dropped\n"));
3906                 return;
3907         }
3908 
3909         /*
3910          * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3911          * sent, if not drop it.
3912          */
3913         if (path->local_incn != msghdr->rsmipc_incn) {
3914                 /* decrement the refcnt */
3915                 PATH_RELE_NOLOCK(path);
3916                 mutex_exit(&path->mutex);
3917                 DBG_PRINTF((category, RSM_DEBUG,
3918                     "rsm_proc_sqready_ack done: msg old incn %lld\n",
3919                     msghdr->rsmipc_incn));
3920                 return;
3921         }
3922 
3923         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3924             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3925 
3926         /*
3927          * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3928          */
3929         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3930 
3931         /* save the remote sendq incn number */
3932         path->remote_incn = msg->rsmipc_local_incn;
3933 
3934         /* initialize credits to the max level */
3935         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3936 
3937         /* wake up any send that is waiting for credits */
3938         cv_broadcast(&path->sendq_token.sendq_cv);
3939 
3940         /* decrement the refcnt */
3941         PATH_RELE_NOLOCK(path);
3942 
3943         mutex_exit(&path->mutex);
3944 
3945         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3946             "rsm_proc_sqready_ack done\n"));
3947 }
3948 
3949 /*
3950  * process the RSMIPC_MSG_CREDIT message
3951  */
3952 static void
3953 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3954     rsm_intr_hand_arg_t arg)
3955 {
3956         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3957         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3958         path_t                  *path;
3959         DBG_DEFINE(category,
3960             RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3961             RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3962 
3963         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3964 
3965         /* look up the path - incr the path refcnt */
3966         path = rsm_find_path(hdlr_argp->adapter_name,
3967             hdlr_argp->adapter_instance, src_hwaddr);
3968 
3969         if (path == NULL) {
3970                 DBG_PRINTF((category, RSM_DEBUG,
3971                     "rsm_add_credits enter: path not found\n"));
3972                 return;
3973         }
3974 
3975         /* the path is not active - discard credits */
3976         if (path->state != RSMKA_PATH_ACTIVE) {
3977                 PATH_RELE_NOLOCK(path);
3978                 mutex_exit(&path->mutex);
3979                 DBG_PRINTF((category, RSM_DEBUG,
3980                     "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3981                 return;
3982         }
3983 
3984         /*
3985          * Check if these credits are for current incarnation of the path.
3986          */
3987         if (path->local_incn != msghdr->rsmipc_incn) {
3988                 /* decrement the refcnt */
3989                 PATH_RELE_NOLOCK(path);
3990                 mutex_exit(&path->mutex);
3991                 DBG_PRINTF((category, RSM_DEBUG,
3992                     "rsm_add_credits enter: old incn %lld\n",
3993                     msghdr->rsmipc_incn));
3994                 return;
3995         }
3996 
3997         DBG_PRINTF((category, RSM_DEBUG,
3998             "rsm_add_credits:path=%lx new-creds=%d "
3999             "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
4000             path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4001             src_hwaddr));
4002 
4003 
4004         /* add credits to the path's sendq */
4005         path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4006 
4007         ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4008 
4009         /* wake up any send that is waiting for credits */
4010         cv_broadcast(&path->sendq_token.sendq_cv);
4011 
4012         /* decrement the refcnt */
4013         PATH_RELE_NOLOCK(path);
4014 
4015         mutex_exit(&path->mutex);
4016 
4017         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4018 }
4019 
4020 static void
4021 rsm_intr_event(rsmipc_request_t *msg)
4022 {
4023         rsmseg_t        *seg;
4024         rsmresource_t   *p;
4025         rsm_node_id_t   src_node;
4026         DBG_DEFINE(category,
4027             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4028 
4029         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4030 
4031         src_node = msg->rsmipc_hdr.rsmipc_src;
4032 
4033         if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4034                 /* This is for an import segment */
4035                 uint_t hashval = rsmhash(msg->rsmipc_key);
4036 
4037                 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4038 
4039                 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4040 
4041                 for (; p; p = p->rsmrc_next) {
4042                         if ((p->rsmrc_key == msg->rsmipc_key) &&
4043                             (p->rsmrc_node == src_node)) {
4044                                 seg = (rsmseg_t *)p;
4045                                 rsmseglock_acquire(seg);
4046 
4047                                 atomic_inc_32(&seg->s_pollevent);
4048 
4049                                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4050                                         pollwakeup(&seg->s_poll, POLLRDNORM);
4051 
4052                                 rsmseglock_release(seg);
4053                         }
4054                 }
4055 
4056                 rw_exit(&rsm_import_segs.rsmhash_rw);
4057         } else {
4058                 /* This is for an export segment */
4059                 seg = rsmexport_lookup(msg->rsmipc_key);
4060                 if (!seg) {
4061                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4062                             "rsm_intr_event done: exp seg not found\n"));
4063                         return;
4064                 }
4065 
4066                 ASSERT(rsmseglock_held(seg));
4067 
4068                 atomic_inc_32(&seg->s_pollevent);
4069 
4070                 /*
4071                  * We must hold the segment lock here, or else the segment
4072                  * can be freed while pollwakeup is using it. This implies
4073                  * that we MUST NOT grab the segment lock during rsm_chpoll,
4074                  * as outlined in the chpoll(2) man page.
4075                  */
4076                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4077                         pollwakeup(&seg->s_poll, POLLRDNORM);
4078 
4079                 rsmseglock_release(seg);
4080         }
4081 
4082         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4083 }
4084 
4085 /*
4086  * The exporter did a republish and changed the ACL - this change is only
4087  * visible to new importers.
4088  */
4089 static void
4090 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4091     rsm_permission_t perm)
4092 {
4093 
4094         rsmresource_t   *p;
4095         rsmseg_t        *seg;
4096         uint_t          hashval = rsmhash(key);
4097         DBG_DEFINE(category,
4098             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4099 
4100         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4101 
4102         rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4103 
4104         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4105 
4106         for (; p; p = p->rsmrc_next) {
4107                 /*
4108                  * find the importer and update the permission in the shared
4109                  * data structure. Any new importers will use the new perms
4110                  */
4111                 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4112                         seg = (rsmseg_t *)p;
4113 
4114                         rsmseglock_acquire(seg);
4115                         rsmsharelock_acquire(seg);
4116                         seg->s_share->rsmsi_mode = perm;
4117                         rsmsharelock_release(seg);
4118                         rsmseglock_release(seg);
4119 
4120                         break;
4121                 }
4122         }
4123 
4124         rw_exit(&rsm_import_segs.rsmhash_rw);
4125 
4126         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4127 }
4128 
4129 void
4130 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4131 {
4132         int             done = 1; /* indicate all SUSPENDS have been acked */
4133         list_element_t  *elem;
4134         DBG_DEFINE(category,
4135             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4136 
4137         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4138             "rsm_suspend_complete enter\n"));
4139 
4140         mutex_enter(&rsm_suspend_list.list_lock);
4141 
4142         if (rsm_suspend_list.list_head == NULL) {
4143                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4144                     "rsm_suspend_complete done: suspend_list is empty\n"));
4145                 mutex_exit(&rsm_suspend_list.list_lock);
4146                 return;
4147         }
4148 
4149         elem = rsm_suspend_list.list_head;
4150         while (elem != NULL) {
4151                 if (elem->nodeid == src_node) {
4152                         /* clear the pending flag for the node */
4153                         elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4154                         elem->flags |= flag;
4155                 }
4156 
4157                 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4158                         done = 0; /* still some nodes have not yet ACKED */
4159 
4160                 elem = elem->next;
4161         }
4162 
4163         mutex_exit(&rsm_suspend_list.list_lock);
4164 
4165         if (!done) {
4166                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4167                     "rsm_suspend_complete done: acks pending\n"));
4168                 return;
4169         }
4170         /*
4171          * Now that we are done with suspending all the remote importers
4172          * time to quiesce the local exporters
4173          */
4174         exporter_quiesce();
4175 
4176         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4177             "rsm_suspend_complete done\n"));
4178 }
4179 
4180 static void
4181 exporter_quiesce()
4182 {
4183         int             i, e;
4184         rsmresource_t   *current;
4185         rsmseg_t        *seg;
4186         adapter_t       *adapter;
4187         DBG_DEFINE(category,
4188             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4189 
4190         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4191         /*
4192          * The importers send a SUSPEND_COMPLETE to the exporter node
4193          *      Unpublish, unbind the export segment and
4194          *      move the segments to the EXPORT_QUIESCED state
4195          */
4196 
4197         rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4198 
4199         for (i = 0; i < rsm_hash_size; i++) {
4200                 current = rsm_export_segs.bucket[i];
4201                 while (current != NULL) {
4202                         seg = (rsmseg_t *)current;
4203                         rsmseglock_acquire(seg);
4204                         if (current->rsmrc_state ==
4205                             RSM_STATE_EXPORT_QUIESCING) {
4206                                 adapter = seg->s_adapter;
4207                                 /*
4208                                  * some local memory handles are not published
4209                                  * check if it was published
4210                                  */
4211                                 if ((seg->s_acl == NULL) ||
4212                                     (seg->s_acl[0].ae_node != my_nodeid) ||
4213                                     (seg->s_acl[0].ae_permission != 0)) {
4214 
4215                                         e = adapter->rsmpi_ops->rsm_unpublish(
4216                                             seg->s_handle.out);
4217                                         DBG_PRINTF((category, RSM_DEBUG,
4218                                             "exporter_quiesce:unpub %d\n", e));
4219 
4220                                         e = adapter->rsmpi_ops->rsm_seg_destroy(
4221                                             seg->s_handle.out);
4222 
4223                                         DBG_PRINTF((category, RSM_DEBUG,
4224                                             "exporter_quiesce:destroy %d\n",
4225                                             e));
4226                                 }
4227 
4228                                 (void) rsm_unbind_pages(seg);
4229                                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4230                                 cv_broadcast(&seg->s_cv);
4231                         }
4232                         rsmseglock_release(seg);
4233                         current = current->rsmrc_next;
4234                 }
4235         }
4236         rw_exit(&rsm_export_segs.rsmhash_rw);
4237 
4238         /*
4239          * All the local segments we are done with the pre-del processing
4240          * - time to move to PREDEL_COMPLETED.
4241          */
4242 
4243         mutex_enter(&rsm_drv_data.drv_lock);
4244 
4245         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4246 
4247         rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4248 
4249         cv_broadcast(&rsm_drv_data.drv_cv);
4250 
4251         mutex_exit(&rsm_drv_data.drv_lock);
4252 
4253         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4254 }
4255 
4256 static void
4257 importer_suspend(rsm_node_id_t src_node)
4258 {
4259         int             i;
4260         int             susp_flg; /* true means already suspended */
4261         int             num_importers;
4262         rsmresource_t   *p = NULL, *curp;
4263         rsmhash_table_t *rhash = &rsm_import_segs;
4264         rsmseg_t        *seg;
4265         rsmipc_request_t request;
4266         DBG_DEFINE(category,
4267             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4268 
4269         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4270 
4271         rw_enter(&rhash->rsmhash_rw, RW_READER);
4272         for (i = 0; i < rsm_hash_size; i++) {
4273                 p = rhash->bucket[i];
4274 
4275                 /*
4276                  * Suspend all importers with same <node, key> pair.
4277                  * After the last one of the shared importers has been
4278                  * suspended - suspend the shared mappings/connection.
4279                  */
4280                 for (; p; p = p->rsmrc_next) {
4281                         rsmseg_t *first = (rsmseg_t *)p;
4282                         if ((first->s_node != src_node) ||
4283                             (first->s_state == RSM_STATE_DISCONNECT))
4284                                 continue; /* go to next entry */
4285                         /*
4286                          * search the rest of the bucket for
4287                          * other siblings (imprtrs with the same key)
4288                          * of "first" and suspend them.
4289                          * All importers with same key fall in
4290                          * the same bucket.
4291                          */
4292                         num_importers = 0;
4293                         for (curp = p; curp; curp = curp->rsmrc_next) {
4294                                 seg = (rsmseg_t *)curp;
4295 
4296                                 rsmseglock_acquire(seg);
4297 
4298                                 if ((seg->s_node != first->s_node) ||
4299                                     (seg->s_key != first->s_key) ||
4300                                     (seg->s_state == RSM_STATE_DISCONNECT)) {
4301                                         /*
4302                                          * either not a peer segment or its a
4303                                          * disconnected segment - skip it
4304                                          */
4305                                         rsmseglock_release(seg);
4306                                         continue;
4307                                 }
4308 
4309                                 rsmseg_suspend(seg, &susp_flg);
4310 
4311                                 if (susp_flg) { /* seg already suspended */
4312                                         rsmseglock_release(seg);
4313                                         break; /* the inner for loop */
4314                                 }
4315 
4316                                 num_importers++;
4317                                 rsmsharelock_acquire(seg);
4318                                 /*
4319                                  * we've processed all importers that are
4320                                  * siblings of "first"
4321                                  */
4322                                 if (num_importers ==
4323                                     seg->s_share->rsmsi_refcnt) {
4324                                         rsmsharelock_release(seg);
4325                                         rsmseglock_release(seg);
4326                                         break;
4327                                 }
4328                                 rsmsharelock_release(seg);
4329                                 rsmseglock_release(seg);
4330                         }
4331 
4332                         /*
4333                          * All the importers with the same key and
4334                          * nodeid as "first" have been suspended.
4335                          * Now suspend the shared connect/mapping.
4336                          * This is done only once.
4337                          */
4338                         if (!susp_flg) {
4339                                 rsmsegshare_suspend(seg);
4340                         }
4341                 }
4342         }
4343 
4344         rw_exit(&rhash->rsmhash_rw);
4345 
4346         /* send an ACK for SUSPEND message */
4347         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4348         (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4349 
4350 
4351         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4352 
4353 }
4354 
4355 static void
4356 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4357 {
4358         int             recheck_state;
4359         rsmcookie_t     *hdl;
4360         DBG_DEFINE(category,
4361             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4362 
4363         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4364             "rsmseg_suspend enter: key=%u\n", seg->s_key));
4365 
4366         *susp_flg = 0;
4367 
4368         ASSERT(rsmseglock_held(seg));
4369         /* wait if putv/getv is in progress */
4370         while (seg->s_rdmacnt > 0)
4371                 cv_wait(&seg->s_cv, &seg->s_lock);
4372 
4373         do {
4374                 recheck_state = 0;
4375 
4376                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4377                     "rsmseg_suspend:segment %x state=%d\n",
4378                     seg->s_key, seg->s_state));
4379 
4380                 switch (seg->s_state) {
4381                 case RSM_STATE_NEW:
4382                         /* not a valid state */
4383                         break;
4384                 case RSM_STATE_CONNECTING:
4385                         seg->s_state = RSM_STATE_ABORT_CONNECT;
4386                         break;
4387                 case RSM_STATE_ABORT_CONNECT:
4388                         break;
4389                 case RSM_STATE_CONNECT:
4390                         seg->s_handle.in = NULL;
4391                         seg->s_state = RSM_STATE_CONN_QUIESCE;
4392                         break;
4393                 case RSM_STATE_MAPPING:
4394                         /* wait until segment leaves the mapping state */
4395                         while (seg->s_state == RSM_STATE_MAPPING)
4396                                 cv_wait(&seg->s_cv, &seg->s_lock);
4397                         recheck_state = 1;
4398                         break;
4399                 case RSM_STATE_ACTIVE:
4400                         /* unload the mappings */
4401                         if (seg->s_ckl != NULL) {
4402                                 hdl = seg->s_ckl;
4403                                 for (; hdl != NULL; hdl = hdl->c_next) {
4404                                         (void) devmap_unload(hdl->c_dhp,
4405                                             hdl->c_off, hdl->c_len);
4406                                 }
4407                         }
4408                         seg->s_mapinfo = NULL;
4409                         seg->s_state = RSM_STATE_MAP_QUIESCE;
4410                         break;
4411                 case RSM_STATE_CONN_QUIESCE:
4412                         /* FALLTHRU */
4413                 case RSM_STATE_MAP_QUIESCE:
4414                         /* rsmseg_suspend already done for seg */
4415                         *susp_flg = 1;
4416                         break;
4417                 case RSM_STATE_DISCONNECT:
4418                         break;
4419                 default:
4420                         ASSERT(0); /* invalid state */
4421                 }
4422         } while (recheck_state);
4423 
4424         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4425 }
4426 
4427 static void
4428 rsmsegshare_suspend(rsmseg_t *seg)
4429 {
4430         int                     e;
4431         adapter_t               *adapter;
4432         rsm_import_share_t      *sharedp;
4433         DBG_DEFINE(category,
4434             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4435 
4436         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4437             "rsmsegshare_suspend enter\n"));
4438 
4439         rsmseglock_acquire(seg);
4440         rsmsharelock_acquire(seg);
4441 
4442         sharedp = seg->s_share;
4443         adapter = seg->s_adapter;
4444         switch (sharedp->rsmsi_state) {
4445         case RSMSI_STATE_NEW:
4446                 break;
4447         case RSMSI_STATE_CONNECTING:
4448                 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4449                 break;
4450         case RSMSI_STATE_ABORT_CONNECT:
4451                 break;
4452         case RSMSI_STATE_CONNECTED:
4453                 /* do the rsmpi disconnect */
4454                 if (sharedp->rsmsi_node != my_nodeid) {
4455                         e = adapter->rsmpi_ops->
4456                             rsm_disconnect(sharedp->rsmsi_handle);
4457 
4458                         DBG_PRINTF((category, RSM_DEBUG,
4459                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4460                             sharedp->rsmsi_segid, e));
4461                 }
4462 
4463                 sharedp->rsmsi_handle = NULL;
4464 
4465                 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4466                 break;
4467         case RSMSI_STATE_CONN_QUIESCE:
4468                 break;
4469         case RSMSI_STATE_MAPPED:
4470                 /* do the rsmpi unmap and disconnect */
4471                 if (sharedp->rsmsi_node != my_nodeid) {
4472                         e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4473 
4474                         DBG_PRINTF((category, RSM_DEBUG,
4475                             "rsmshare_suspend: rsmpi unmap %d\n", e));
4476 
4477                         e = adapter->rsmpi_ops->
4478                             rsm_disconnect(sharedp->rsmsi_handle);
4479                         DBG_PRINTF((category, RSM_DEBUG,
4480                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4481                             sharedp->rsmsi_segid, e));
4482                 }
4483 
4484                 sharedp->rsmsi_handle = NULL;
4485 
4486                 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4487                 break;
4488         case RSMSI_STATE_MAP_QUIESCE:
4489                 break;
4490         case RSMSI_STATE_DISCONNECTED:
4491                 break;
4492         default:
4493                 ASSERT(0); /* invalid state */
4494         }
4495 
4496         rsmsharelock_release(seg);
4497         rsmseglock_release(seg);
4498 
4499         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4500             "rsmsegshare_suspend done\n"));
4501 }
4502 
4503 /*
4504  * This should get called on receiving a RESUME message or from
4505  * the pathmanger if the node undergoing DR dies.
4506  */
4507 static void
4508 importer_resume(rsm_node_id_t src_node)
4509 {
4510         int             i;
4511         rsmresource_t   *p = NULL;
4512         rsmhash_table_t *rhash = &rsm_import_segs;
4513         void            *cookie;
4514         DBG_DEFINE(category,
4515             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4516 
4517         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4518 
4519         rw_enter(&rhash->rsmhash_rw, RW_READER);
4520 
4521         for (i = 0; i < rsm_hash_size; i++) {
4522                 p = rhash->bucket[i];
4523 
4524                 for (; p; p = p->rsmrc_next) {
4525                         rsmseg_t *seg = (rsmseg_t *)p;
4526 
4527                         rsmseglock_acquire(seg);
4528 
4529                         /* process only importers of node undergoing DR */
4530                         if (seg->s_node != src_node) {
4531                                 rsmseglock_release(seg);
4532                                 continue;
4533                         }
4534 
4535                         if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4536                                 rsmipc_request_t        request;
4537                                 /*
4538                                  * rsmpi map/connect failed
4539                                  * inform the exporter so that it can
4540                                  * remove the importer.
4541                                  */
4542                                 request.rsmipc_hdr.rsmipc_type =
4543                                     RSMIPC_MSG_NOTIMPORTING;
4544                                 request.rsmipc_key = seg->s_segid;
4545                                 request.rsmipc_segment_cookie = cookie;
4546                                 rsmseglock_release(seg);
4547                                 (void) rsmipc_send(seg->s_node, &request,
4548                                     RSM_NO_REPLY);
4549                         } else {
4550                                 rsmseglock_release(seg);
4551                         }
4552                 }
4553         }
4554 
4555         rw_exit(&rhash->rsmhash_rw);
4556 
4557         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4558 }
4559 
4560 static int
4561 rsmseg_resume(rsmseg_t *seg, void **cookie)
4562 {
4563         int                     e;
4564         int                     retc;
4565         off_t                   dev_offset;
4566         size_t                  maplen;
4567         uint_t                  maxprot;
4568         rsm_mapinfo_t           *p;
4569         rsmcookie_t             *hdl;
4570         rsm_import_share_t      *sharedp;
4571         DBG_DEFINE(category,
4572             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4573 
4574         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4575             "rsmseg_resume enter: key=%u\n", seg->s_key));
4576 
4577         *cookie = NULL;
4578 
4579         ASSERT(rsmseglock_held(seg));
4580 
4581         if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4582             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4583                 return (RSM_SUCCESS);
4584         }
4585 
4586         sharedp = seg->s_share;
4587 
4588         rsmsharelock_acquire(seg);
4589 
4590         /* resume the shared connection and/or mapping */
4591         retc = rsmsegshare_resume(seg);
4592 
4593         if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4594                 /* shared state can either be connected or mapped */
4595                 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4596                     (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4597                         ASSERT(retc == RSM_SUCCESS);
4598                         seg->s_handle.in = sharedp->rsmsi_handle;
4599                         rsmsharelock_release(seg);
4600                         seg->s_state = RSM_STATE_CONNECT;
4601 
4602                 } else { /* error in rsmpi connect during resume */
4603                         seg->s_handle.in = NULL;
4604                         seg->s_state = RSM_STATE_DISCONNECT;
4605 
4606                         sharedp->rsmsi_refcnt--;
4607                         cookie = (void *)sharedp->rsmsi_cookie;
4608 
4609                         if (sharedp->rsmsi_refcnt == 0) {
4610                                 ASSERT(sharedp->rsmsi_mapcnt == 0);
4611                                 rsmsharelock_release(seg);
4612 
4613                                 /* clean up the shared data structure */
4614                                 mutex_destroy(&sharedp->rsmsi_lock);
4615                                 cv_destroy(&sharedp->rsmsi_cv);
4616                                 kmem_free((void *)(sharedp),
4617                                     sizeof (rsm_import_share_t));
4618 
4619                         } else {
4620                                 rsmsharelock_release(seg);
4621                         }
4622                         /*
4623                          * The following needs to be done after any
4624                          * rsmsharelock calls which use seg->s_share.
4625                          */
4626                         seg->s_share = NULL;
4627                 }
4628 
4629                 /* signal any waiting segment */
4630                 cv_broadcast(&seg->s_cv);
4631 
4632                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4633                     "rsmseg_resume done:state=%d\n", seg->s_state));
4634                 return (retc);
4635         }
4636 
4637         ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4638 
4639         /* Setup protections for remap */
4640         maxprot = PROT_USER;
4641         if (seg->s_mode & RSM_PERM_READ) {
4642                 maxprot |= PROT_READ;
4643         }
4644         if (seg->s_mode & RSM_PERM_WRITE) {
4645                 maxprot |= PROT_WRITE;
4646         }
4647 
4648         if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4649                 /* error in rsmpi connect or map during resume */
4650 
4651                 /* remap to trash page */
4652                 ASSERT(seg->s_ckl != NULL);
4653 
4654                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4655                         e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4656                             remap_cookie, hdl->c_off, hdl->c_len,
4657                             maxprot, 0, NULL);
4658 
4659                         DBG_PRINTF((category, RSM_ERR,
4660                             "rsmseg_resume:remap=%d\n", e));
4661                 }
4662 
4663                 seg->s_handle.in = NULL;
4664                 seg->s_state = RSM_STATE_DISCONNECT;
4665 
4666                 sharedp->rsmsi_refcnt--;
4667 
4668                 sharedp->rsmsi_mapcnt--;
4669                 seg->s_mapinfo = NULL;
4670 
4671                 if (sharedp->rsmsi_refcnt == 0) {
4672                         ASSERT(sharedp->rsmsi_mapcnt == 0);
4673                         rsmsharelock_release(seg);
4674 
4675                         /* clean up the shared data structure */
4676                         mutex_destroy(&sharedp->rsmsi_lock);
4677                         cv_destroy(&sharedp->rsmsi_cv);
4678                         kmem_free((void *)(sharedp),
4679                             sizeof (rsm_import_share_t));
4680 
4681                 } else {
4682                         rsmsharelock_release(seg);
4683                 }
4684                 /*
4685                  * The following needs to be done after any
4686                  * rsmsharelock calls which use seg->s_share.
4687                  */
4688                 seg->s_share = NULL;
4689 
4690                 /* signal any waiting segment */
4691                 cv_broadcast(&seg->s_cv);
4692 
4693                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4694                     "rsmseg_resume done:seg=%x,err=%d\n",
4695                     seg->s_key, retc));
4696                 return (retc);
4697 
4698         }
4699 
4700         seg->s_handle.in = sharedp->rsmsi_handle;
4701 
4702         if (seg->s_node == my_nodeid) { /* loopback */
4703                 ASSERT(seg->s_mapinfo == NULL);
4704 
4705                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4706                         e = devmap_umem_remap(hdl->c_dhp,
4707                             rsm_dip, seg->s_cookie,
4708                             hdl->c_off, hdl->c_len,
4709                             maxprot, 0, NULL);
4710 
4711                         DBG_PRINTF((category, RSM_ERR,
4712                             "rsmseg_resume:remap=%d\n", e));
4713                 }
4714         } else { /* remote exporter */
4715                 /* remap to the new rsmpi maps */
4716                 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4717 
4718                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4719                         p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4720                             &dev_offset, &maplen);
4721                         e = devmap_devmem_remap(hdl->c_dhp,
4722                             p->dip, p->dev_register, dev_offset,
4723                             maplen, maxprot, 0, NULL);
4724 
4725                         DBG_PRINTF((category, RSM_ERR,
4726                             "rsmseg_resume:remap=%d\n", e));
4727                 }
4728         }
4729 
4730         rsmsharelock_release(seg);
4731 
4732         seg->s_state = RSM_STATE_ACTIVE;
4733         cv_broadcast(&seg->s_cv);
4734 
4735         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4736 
4737         return (retc);
4738 }
4739 
4740 static int
4741 rsmsegshare_resume(rsmseg_t *seg)
4742 {
4743         int                     e = RSM_SUCCESS;
4744         adapter_t               *adapter;
4745         rsm_import_share_t      *sharedp;
4746         DBG_DEFINE(category,
4747             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4748 
4749         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4750 
4751         ASSERT(rsmseglock_held(seg));
4752         ASSERT(rsmsharelock_held(seg));
4753 
4754         sharedp = seg->s_share;
4755 
4756         /*
4757          * If we are not in a xxxx_QUIESCE state that means shared
4758          * connect/mapping processing has been already been done
4759          * so return success.
4760          */
4761         if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4762             (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4763                 return (RSM_SUCCESS);
4764         }
4765 
4766         adapter = seg->s_adapter;
4767 
4768         if (sharedp->rsmsi_node != my_nodeid) {
4769                 rsm_addr_t      hwaddr;
4770                 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4771 
4772                 e = adapter->rsmpi_ops->rsm_connect(
4773                     adapter->rsmpi_handle, hwaddr,
4774                     sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4775 
4776                 DBG_PRINTF((category, RSM_DEBUG,
4777                     "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4778                     sharedp->rsmsi_segid, e));
4779 
4780                 if (e != RSM_SUCCESS) {
4781                         /* when do we send the NOT_IMPORTING message */
4782                         sharedp->rsmsi_handle = NULL;
4783                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4784                         /* signal any waiting segment */
4785                         cv_broadcast(&sharedp->rsmsi_cv);
4786                         return (e);
4787                 }
4788         }
4789 
4790         if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4791                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4792                 /* signal any waiting segment */
4793                 cv_broadcast(&sharedp->rsmsi_cv);
4794                 return (e);
4795         }
4796 
4797         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4798 
4799         /* do the rsmpi map of the whole segment here */
4800         if (sharedp->rsmsi_node != my_nodeid) {
4801                 size_t mapped_len;
4802                 rsm_mapinfo_t *p;
4803 
4804                 /*
4805                  * We need to do rsmpi maps with <off, lens> identical to
4806                  * the old mapinfo list because the segment mapping handles
4807                  * dhp and such need the fragmentation of rsmpi maps to be
4808                  * identical to what it was during the mmap of the segment
4809                  */
4810                 p = sharedp->rsmsi_mapinfo;
4811 
4812                 while (p != NULL) {
4813                         mapped_len = 0;
4814 
4815                         e = adapter->rsmpi_ops->rsm_map(
4816                             sharedp->rsmsi_handle, p->start_offset,
4817                             p->individual_len, &mapped_len,
4818                             &p->dip, &p->dev_register, &p->dev_offset,
4819                             NULL, NULL);
4820 
4821                         if (e != 0) {
4822                                 DBG_PRINTF((category, RSM_ERR,
4823                                     "rsmsegshare_resume: rsmpi map err=%d\n",
4824                                     e));
4825                                 break;
4826                         }
4827 
4828                         if (mapped_len != p->individual_len) {
4829                                 DBG_PRINTF((category, RSM_ERR,
4830                                     "rsmsegshare_resume: rsmpi maplen"
4831                                     "< reqlen=%lx\n", mapped_len));
4832                                 e = RSMERR_BAD_LENGTH;
4833                                 break;
4834                         }
4835 
4836                         p = p->next;
4837 
4838                 }
4839 
4840 
4841                 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4842                         int     err;
4843                         /* Check if this is the first rsm_map */
4844                         if (p != sharedp->rsmsi_mapinfo) {
4845                                 /*
4846                                  * A single rsm_unmap undoes multiple rsm_maps.
4847                                  */
4848                                 (void) seg->s_adapter->rsmpi_ops->
4849                                     rsm_unmap(sharedp->rsmsi_handle);
4850                         }
4851 
4852                         rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4853                         sharedp->rsmsi_mapinfo = NULL;
4854 
4855                         err = adapter->rsmpi_ops->
4856                             rsm_disconnect(sharedp->rsmsi_handle);
4857 
4858                         DBG_PRINTF((category, RSM_DEBUG,
4859                             "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4860                             sharedp->rsmsi_segid, err));
4861 
4862                         sharedp->rsmsi_handle = NULL;
4863                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4864 
4865                         /* signal the waiting segments */
4866                         cv_broadcast(&sharedp->rsmsi_cv);
4867                         DBG_PRINTF((category, RSM_DEBUG,
4868                             "rsmsegshare_resume done: rsmpi map err\n"));
4869                         return (e);
4870                 }
4871         }
4872 
4873         sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4874 
4875         /* signal any waiting segment */
4876         cv_broadcast(&sharedp->rsmsi_cv);
4877 
4878         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4879 
4880         return (e);
4881 }
4882 
4883 /*
4884  * this is the routine that gets called by recv_taskq which is the
4885  * thread that processes messages that are flow-controlled.
4886  */
4887 static void
4888 rsm_intr_proc_deferred(void *arg)
4889 {
4890         path_t                  *path = (path_t *)arg;
4891         rsmipc_request_t        *msg;
4892         rsmipc_msghdr_t         *msghdr;
4893         rsm_node_id_t           src_node;
4894         msgbuf_elem_t           *head;
4895         int                     e;
4896         DBG_DEFINE(category,
4897             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4898 
4899         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4900             "rsm_intr_proc_deferred enter\n"));
4901 
4902         mutex_enter(&path->mutex);
4903 
4904         /* use the head of the msgbuf_queue */
4905         head = rsmka_gethead_msgbuf(path);
4906 
4907         mutex_exit(&path->mutex);
4908 
4909         msg = (rsmipc_request_t *)&(head->msg);
4910         msghdr = (rsmipc_msghdr_t *)msg;
4911 
4912         src_node = msghdr->rsmipc_src;
4913 
4914         /*
4915          * messages that need to send a reply should check the message version
4916          * before processing the message. And all messages that need to
4917          * send a reply should be processed here by the worker thread.
4918          */
4919         switch (msghdr->rsmipc_type) {
4920         case RSMIPC_MSG_SEGCONNECT:
4921                 if (msghdr->rsmipc_version != RSM_VERSION) {
4922                         rsmipc_reply_t reply;
4923                         reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4924                         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4925                         reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4926                         (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4927                 } else {
4928                         rsm_intr_segconnect(src_node, msg);
4929                 }
4930                 break;
4931         case RSMIPC_MSG_DISCONNECT:
4932                 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4933                 break;
4934         case RSMIPC_MSG_SUSPEND:
4935                 importer_suspend(src_node);
4936                 break;
4937         case RSMIPC_MSG_SUSPEND_DONE:
4938                 rsm_suspend_complete(src_node, 0);
4939                 break;
4940         case RSMIPC_MSG_RESUME:
4941                 importer_resume(src_node);
4942                 break;
4943         default:
4944                 ASSERT(0);
4945         }
4946 
4947         mutex_enter(&path->mutex);
4948 
4949         rsmka_dequeue_msgbuf(path);
4950 
4951         /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4952         if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4953                 path->procmsg_cnt++;
4954 
4955         ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4956 
4957         /* No need to send credits if path is going down */
4958         if ((path->state == RSMKA_PATH_ACTIVE) &&
4959             (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4960                 /*
4961                  * send credits and reset procmsg_cnt if success otherwise
4962                  * credits will be sent after processing the next message
4963                  */
4964                 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4965                 if (e == 0)
4966                         path->procmsg_cnt = 0;
4967                 else
4968                         DBG_PRINTF((category, RSM_ERR,
4969                             "rsm_intr_proc_deferred:send credits err=%d\n", e));
4970         }
4971 
4972         /*
4973          * decrement the path refcnt since we incremented it in
4974          * rsm_intr_callback_dispatch
4975          */
4976         PATH_RELE_NOLOCK(path);
4977 
4978         mutex_exit(&path->mutex);
4979 
4980         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4981             "rsm_intr_proc_deferred done\n"));
4982 }
4983 
4984 /*
4985  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4986  */
4987 static void
4988 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4989     rsm_intr_hand_arg_t arg)
4990 {
4991         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
4992         path_t                  *path;
4993         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4994         DBG_DEFINE(category,
4995             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4996 
4997         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4998             "rsm_intr_callback_dispatch enter\n"));
4999         ASSERT(data && hdlr_argp);
5000 
5001         /* look up the path - incr the path refcnt */
5002         path = rsm_find_path(hdlr_argp->adapter_name,
5003             hdlr_argp->adapter_instance, src_hwaddr);
5004 
5005         /* the path has been removed - drop this message */
5006         if (path == NULL) {
5007                 DBG_PRINTF((category, RSM_DEBUG,
5008                     "rsm_intr_callback_dispatch done: msg dropped\n"));
5009                 return;
5010         }
5011         /* the path is not active - don't accept new messages */
5012         if (path->state != RSMKA_PATH_ACTIVE) {
5013                 PATH_RELE_NOLOCK(path);
5014                 mutex_exit(&path->mutex);
5015                 DBG_PRINTF((category, RSM_DEBUG,
5016                     "rsm_intr_callback_dispatch done: msg dropped"
5017                     " path=%lx !ACTIVE\n", path));
5018                 return;
5019         }
5020 
5021         /*
5022          * Check if this message was sent to an older incarnation
5023          * of the path/sendq.
5024          */
5025         if (path->local_incn != msghdr->rsmipc_incn) {
5026                 /* decrement the refcnt */
5027                 PATH_RELE_NOLOCK(path);
5028                 mutex_exit(&path->mutex);
5029                 DBG_PRINTF((category, RSM_DEBUG,
5030                     "rsm_intr_callback_dispatch done: old incn %lld\n",
5031                     msghdr->rsmipc_incn));
5032                 return;
5033         }
5034 
5035         /* copy and enqueue msg on the path's msgbuf queue */
5036         rsmka_enqueue_msgbuf(path, data);
5037 
5038         /*
5039          * schedule task to process messages - ignore retval from
5040          * task_dispatch because we sender cannot send more than
5041          * what receiver can handle.
5042          */
5043         (void) taskq_dispatch(path->recv_taskq,
5044             rsm_intr_proc_deferred, path, KM_NOSLEEP);
5045 
5046         mutex_exit(&path->mutex);
5047 
5048         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5049             "rsm_intr_callback_dispatch done\n"));
5050 }
5051 
5052 /*
5053  * This procedure is called from rsm_srv_func when a remote node creates a
5054  * a send queue.  This event is used as a hint that an  earlier failed
5055  * attempt to create a send queue to that remote node may now succeed and
5056  * should be retried.  Indication of an earlier failed attempt is provided
5057  * by the RSMKA_SQCREATE_PENDING flag.
5058  */
5059 static void
5060 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5061 {
5062         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
5063         path_t                  *path;
5064         DBG_DEFINE(category,
5065             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5066 
5067         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5068             "rsm_sqcreateop_callback enter\n"));
5069 
5070         /* look up the path - incr the path refcnt */
5071         path = rsm_find_path(hdlr_argp->adapter_name,
5072             hdlr_argp->adapter_instance, src_hwaddr);
5073 
5074         if (path == NULL) {
5075                 DBG_PRINTF((category, RSM_DEBUG,
5076                     "rsm_sqcreateop_callback done: no path\n"));
5077                 return;
5078         }
5079 
5080         if ((path->state == RSMKA_PATH_UP) &&
5081             (path->flags & RSMKA_SQCREATE_PENDING)) {
5082                 /*
5083                  * previous attempt to create sendq had failed, retry
5084                  * it and move to RSMKA_PATH_ACTIVE state if successful.
5085                  * the refcnt will be decremented in the do_deferred_work
5086                  */
5087                 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5088         } else {
5089                 /* decrement the refcnt */
5090                 PATH_RELE_NOLOCK(path);
5091         }
5092         mutex_exit(&path->mutex);
5093 
5094         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5095             "rsm_sqcreateop_callback done\n"));
5096 }
5097 
5098 static void
5099 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5100 {
5101         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5102         rsmipc_request_t *msg = (rsmipc_request_t *)data;
5103         rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5104         rsm_node_id_t src_node;
5105         DBG_DEFINE(category,
5106             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5107 
5108         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5109             "src=%d, type=%d\n", msghdr->rsmipc_src,
5110             msghdr->rsmipc_type));
5111 
5112         /*
5113          * Check for the version number in the msg header. If it is not
5114          * RSM_VERSION, drop the message. In the future, we need to manage
5115          * incompatible version numbers in some way
5116          */
5117         if (msghdr->rsmipc_version != RSM_VERSION) {
5118                 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5119                 /*
5120                  * Drop requests that don't have a reply right here
5121                  * Request with reply will send a BAD_VERSION reply
5122                  * when they get processed by the worker thread.
5123                  */
5124                 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5125                         return;
5126                 }
5127 
5128         }
5129 
5130         src_node = msghdr->rsmipc_src;
5131 
5132         switch (msghdr->rsmipc_type) {
5133         case RSMIPC_MSG_SEGCONNECT:
5134         case RSMIPC_MSG_DISCONNECT:
5135         case RSMIPC_MSG_SUSPEND:
5136         case RSMIPC_MSG_SUSPEND_DONE:
5137         case RSMIPC_MSG_RESUME:
5138                 /*
5139                  * These message types are handled by a worker thread using
5140                  * the flow-control algorithm.
5141                  * Any message processing that does one or more of the
5142                  * following should be handled in a worker thread.
5143                  *      - allocates resources and might sleep
5144                  *      - makes RSMPI calls down to the interconnect driver
5145                  *      this by defn include requests with reply.
5146                  *      - takes a long duration of time
5147                  */
5148                 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5149                 break;
5150         case RSMIPC_MSG_NOTIMPORTING:
5151                 importer_list_rm(src_node, msg->rsmipc_key,
5152                     msg->rsmipc_segment_cookie);
5153                 break;
5154         case RSMIPC_MSG_SQREADY:
5155                 rsm_proc_sqready(data, src_hwaddr, arg);
5156                 break;
5157         case RSMIPC_MSG_SQREADY_ACK:
5158                 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5159                 break;
5160         case RSMIPC_MSG_CREDIT:
5161                 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5162                 break;
5163         case RSMIPC_MSG_REPLY:
5164                 rsm_intr_reply(msghdr);
5165                 break;
5166         case RSMIPC_MSG_BELL:
5167                 rsm_intr_event(msg);
5168                 break;
5169         case RSMIPC_MSG_IMPORTING:
5170                 importer_list_add(src_node, msg->rsmipc_key,
5171                     msg->rsmipc_adapter_hwaddr,
5172                     msg->rsmipc_segment_cookie);
5173                 break;
5174         case RSMIPC_MSG_REPUBLISH:
5175                 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5176                 break;
5177         default:
5178                 DBG_PRINTF((category, RSM_DEBUG,
5179                     "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5180                     (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5181         }
5182 
5183         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5184 
5185 }
5186 
5187 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5188     rsm_intr_q_op_t opcode, rsm_addr_t src,
5189     void *data, size_t size, rsm_intr_hand_arg_t arg)
5190 {
5191         DBG_DEFINE(category,
5192             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5193 
5194         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5195 
5196         switch (opcode) {
5197         case RSM_INTR_Q_OP_CREATE:
5198                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5199                 rsm_sqcreateop_callback(src, arg);
5200                 break;
5201         case RSM_INTR_Q_OP_DESTROY:
5202                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5203                 break;
5204         case RSM_INTR_Q_OP_RECEIVE:
5205                 rsm_intr_callback(data, src, arg);
5206                 break;
5207         default:
5208                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5209                     "rsm_srv_func: unknown opcode = %x\n", opcode));
5210         }
5211 
5212         chd = chd;
5213         size = size;
5214 
5215         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5216 
5217         return (RSM_INTR_HAND_CLAIMED);
5218 }
5219 
5220 /* *************************** IPC slots ************************* */
5221 static rsmipc_slot_t *
5222 rsmipc_alloc()
5223 {
5224         int i;
5225         rsmipc_slot_t *slot;
5226         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5227 
5228         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5229 
5230         /* try to find a free slot, if not wait */
5231         mutex_enter(&rsm_ipc.lock);
5232 
5233         while (rsm_ipc.count == 0) {
5234                 rsm_ipc.wanted = 1;
5235                 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5236         }
5237 
5238         /* An empty slot is available, find it */
5239         slot = &rsm_ipc.slots[0];
5240         for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5241                 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5242                         RSMIPC_CLEAR(slot, RSMIPC_FREE);
5243                         break;
5244                 }
5245         }
5246 
5247         ASSERT(i < RSMIPC_SZ);
5248         rsm_ipc.count--;        /* one less is available */
5249         rsm_ipc.sequence++; /* new sequence */
5250 
5251         slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5252         slot->rsmipc_cookie.ic.index = (uint_t)i;
5253 
5254         mutex_exit(&rsm_ipc.lock);
5255 
5256         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5257 
5258         return (slot);
5259 }
5260 
5261 static void
5262 rsmipc_free(rsmipc_slot_t *slot)
5263 {
5264         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5265 
5266         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5267 
5268         ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5269         ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5270 
5271         mutex_enter(&rsm_ipc.lock);
5272 
5273         RSMIPC_SET(slot, RSMIPC_FREE);
5274 
5275         slot->rsmipc_cookie.ic.sequence = 0;
5276 
5277         mutex_exit(&slot->rsmipc_lock);
5278         rsm_ipc.count++;
5279         ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5280         if (rsm_ipc.wanted) {
5281                 rsm_ipc.wanted = 0;
5282                 cv_broadcast(&rsm_ipc.cv);
5283         }
5284 
5285         mutex_exit(&rsm_ipc.lock);
5286 
5287         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5288 }
5289 
5290 static int
5291 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5292 {
5293         int             e = 0;
5294         int             credit_check = 0;
5295         int             retry_cnt = 0;
5296         int             min_retry_cnt = 10;
5297         rsm_send_t      is;
5298         rsmipc_slot_t   *rslot;
5299         adapter_t       *adapter;
5300         path_t          *path;
5301         sendq_token_t   *sendq_token;
5302         sendq_token_t   *used_sendq_token = NULL;
5303         rsm_send_q_handle_t     ipc_handle;
5304         DBG_DEFINE(category,
5305             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5306 
5307         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5308             dest));
5309 
5310         /*
5311          * Check if this is a local case
5312          */
5313         if (dest == my_nodeid) {
5314                 switch (req->rsmipc_hdr.rsmipc_type) {
5315                 case RSMIPC_MSG_SEGCONNECT:
5316                         reply->rsmipc_status = (short)rsmsegacl_validate(
5317                             req, dest, reply);
5318                         break;
5319                 case RSMIPC_MSG_BELL:
5320                         req->rsmipc_hdr.rsmipc_src = dest;
5321                         rsm_intr_event(req);
5322                         break;
5323                 case RSMIPC_MSG_IMPORTING:
5324                         importer_list_add(dest, req->rsmipc_key,
5325                             req->rsmipc_adapter_hwaddr,
5326                             req->rsmipc_segment_cookie);
5327                         break;
5328                 case RSMIPC_MSG_NOTIMPORTING:
5329                         importer_list_rm(dest, req->rsmipc_key,
5330                             req->rsmipc_segment_cookie);
5331                         break;
5332                 case RSMIPC_MSG_REPUBLISH:
5333                         importer_update(dest, req->rsmipc_key,
5334                             req->rsmipc_perm);
5335                         break;
5336                 case RSMIPC_MSG_SUSPEND:
5337                         importer_suspend(dest);
5338                         break;
5339                 case RSMIPC_MSG_SUSPEND_DONE:
5340                         rsm_suspend_complete(dest, 0);
5341                         break;
5342                 case RSMIPC_MSG_RESUME:
5343                         importer_resume(dest);
5344                         break;
5345                 default:
5346                         ASSERT(0);
5347                 }
5348                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5349                     "rsmipc_send done\n"));
5350                 return (0);
5351         }
5352 
5353         if (dest >= MAX_NODES) {
5354                 DBG_PRINTF((category, RSM_ERR,
5355                     "rsm: rsmipc_send bad node number %x\n", dest));
5356                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5357         }
5358 
5359         /*
5360          * Oh boy! we are going remote.
5361          */
5362 
5363         /*
5364          * identify if we need to have credits to send this message
5365          * - only selected requests are flow controlled
5366          */
5367         if (req != NULL) {
5368                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5369                     "rsmipc_send:request type=%d\n",
5370                     req->rsmipc_hdr.rsmipc_type));
5371 
5372                 switch (req->rsmipc_hdr.rsmipc_type) {
5373                 case RSMIPC_MSG_SEGCONNECT:
5374                 case RSMIPC_MSG_DISCONNECT:
5375                 case RSMIPC_MSG_IMPORTING:
5376                 case RSMIPC_MSG_SUSPEND:
5377                 case RSMIPC_MSG_SUSPEND_DONE:
5378                 case RSMIPC_MSG_RESUME:
5379                         credit_check = 1;
5380                         break;
5381                 default:
5382                         credit_check = 0;
5383                 }
5384         }
5385 
5386 again:
5387         if (retry_cnt++ == min_retry_cnt) {
5388                 /* backoff before further retries for 10ms */
5389                 delay(drv_usectohz(10000));
5390                 retry_cnt = 0; /* reset retry_cnt */
5391         }
5392         sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5393         if (sendq_token == NULL) {
5394                 DBG_PRINTF((category, RSM_ERR,
5395                     "rsm: rsmipc_send no device to reach node %d\n", dest));
5396                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5397         }
5398 
5399         if ((sendq_token == used_sendq_token) &&
5400             ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5401             (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5402                 rele_sendq_token(sendq_token);
5403                 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5404                 return (RSMERR_CONN_ABORTED);
5405         } else
5406                 used_sendq_token = sendq_token;
5407 
5408 /* lint -save -e413 */
5409         path = SQ_TOKEN_TO_PATH(sendq_token);
5410         adapter = path->local_adapter;
5411 /* lint -restore */
5412         ipc_handle = sendq_token->rsmpi_sendq_handle;
5413 
5414         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5415             "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5416 
5417         if (reply == NULL) {
5418                 /* Send request without ack */
5419                 /*
5420                  * Set the rsmipc_version number in the msghdr for KA
5421                  * communication versioning
5422                  */
5423                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5424                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5425                 /*
5426                  * remote endpoints incn should match the value in our
5427                  * path's remote_incn field. No need to grab any lock
5428                  * since we have refcnted the path in rsmka_get_sendq_token
5429                  */
5430                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5431 
5432                 is.is_data = (void *)req;
5433                 is.is_size = sizeof (*req);
5434                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5435                 is.is_wait = 0;
5436 
5437                 if (credit_check) {
5438                         mutex_enter(&path->mutex);
5439                         /*
5440                          * wait till we recv credits or path goes down. If path
5441                          * goes down rsm_send will fail and we handle the error
5442                          * then
5443                          */
5444                         while ((sendq_token->msgbuf_avail == 0) &&
5445                             (path->state == RSMKA_PATH_ACTIVE)) {
5446                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5447                                     &path->mutex);
5448                                 if (e == 0) {
5449                                         mutex_exit(&path->mutex);
5450                                         no_reply_cnt++;
5451                                         rele_sendq_token(sendq_token);
5452                                         DBG_PRINTF((category, RSM_DEBUG,
5453                                             "rsmipc_send done: "
5454                                             "cv_wait INTERRUPTED"));
5455                                         return (RSMERR_INTERRUPTED);
5456                                 }
5457                         }
5458 
5459                         /*
5460                          * path is not active retry on another path.
5461                          */
5462                         if (path->state != RSMKA_PATH_ACTIVE) {
5463                                 mutex_exit(&path->mutex);
5464                                 rele_sendq_token(sendq_token);
5465                                 e = RSMERR_CONN_ABORTED;
5466                                 DBG_PRINTF((category, RSM_ERR,
5467                                     "rsm: rsmipc_send: path !ACTIVE"));
5468                                 goto again;
5469                         }
5470 
5471                         ASSERT(sendq_token->msgbuf_avail > 0);
5472 
5473                         /*
5474                          * reserve a msgbuf
5475                          */
5476                         sendq_token->msgbuf_avail--;
5477 
5478                         mutex_exit(&path->mutex);
5479 
5480                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5481                             NULL);
5482 
5483                         if (e != RSM_SUCCESS) {
5484                                 mutex_enter(&path->mutex);
5485                                 /*
5486                                  * release the reserved msgbuf since
5487                                  * the send failed
5488                                  */
5489                                 sendq_token->msgbuf_avail++;
5490                                 cv_broadcast(&sendq_token->sendq_cv);
5491                                 mutex_exit(&path->mutex);
5492                         }
5493                 } else
5494                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5495                             NULL);
5496 
5497                 no_reply_cnt++;
5498                 rele_sendq_token(sendq_token);
5499                 if (e != RSM_SUCCESS) {
5500                         DBG_PRINTF((category, RSM_ERR,
5501                             "rsm: rsmipc_send no reply send"
5502                             " err = %d no reply count = %d\n",
5503                             e, no_reply_cnt));
5504                         ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5505                             e != RSMERR_BAD_BARRIER_HNDL);
5506                         atomic_inc_64(&rsm_ipcsend_errcnt);
5507                         goto again;
5508                 } else {
5509                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5510                             "rsmipc_send done\n"));
5511                         return (e);
5512                 }
5513 
5514         }
5515 
5516         if (req == NULL) {
5517                 /* Send reply - No flow control is done for reply */
5518                 /*
5519                  * Set the version in the msg header for KA communication
5520                  * versioning
5521                  */
5522                 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5523                 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5524                 /* incn number is not used for reply msgs currently */
5525                 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5526 
5527                 is.is_data = (void *)reply;
5528                 is.is_size = sizeof (*reply);
5529                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5530                 is.is_wait = 0;
5531                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5532                 rele_sendq_token(sendq_token);
5533                 if (e != RSM_SUCCESS) {
5534                         DBG_PRINTF((category, RSM_ERR,
5535                             "rsm: rsmipc_send reply send"
5536                             " err = %d\n", e));
5537                         atomic_inc_64(&rsm_ipcsend_errcnt);
5538                         goto again;
5539                 } else {
5540                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5541                             "rsmipc_send done\n"));
5542                         return (e);
5543                 }
5544         }
5545 
5546         /* Reply needed */
5547         rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5548 
5549         mutex_enter(&rslot->rsmipc_lock);
5550 
5551         rslot->rsmipc_data = (void *)reply;
5552         RSMIPC_SET(rslot, RSMIPC_PENDING);
5553 
5554         while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5555                 /*
5556                  * Set the rsmipc_version number in the msghdr for KA
5557                  * communication versioning
5558                  */
5559                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5560                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5561                 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5562                 /*
5563                  * remote endpoints incn should match the value in our
5564                  * path's remote_incn field. No need to grab any lock
5565                  * since we have refcnted the path in rsmka_get_sendq_token
5566                  */
5567                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5568 
5569                 is.is_data = (void *)req;
5570                 is.is_size = sizeof (*req);
5571                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5572                 is.is_wait = 0;
5573                 if (credit_check) {
5574 
5575                         mutex_enter(&path->mutex);
5576                         /*
5577                          * wait till we recv credits or path goes down. If path
5578                          * goes down rsm_send will fail and we handle the error
5579                          * then.
5580                          */
5581                         while ((sendq_token->msgbuf_avail == 0) &&
5582                             (path->state == RSMKA_PATH_ACTIVE)) {
5583                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5584                                     &path->mutex);
5585                                 if (e == 0) {
5586                                         mutex_exit(&path->mutex);
5587                                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5588                                         rsmipc_free(rslot);
5589                                         rele_sendq_token(sendq_token);
5590                                         DBG_PRINTF((category, RSM_DEBUG,
5591                                             "rsmipc_send done: "
5592                                             "cv_wait INTERRUPTED"));
5593                                         return (RSMERR_INTERRUPTED);
5594                                 }
5595                         }
5596 
5597                         /*
5598                          * path is not active retry on another path.
5599                          */
5600                         if (path->state != RSMKA_PATH_ACTIVE) {
5601                                 mutex_exit(&path->mutex);
5602                                 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5603                                 rsmipc_free(rslot);
5604                                 rele_sendq_token(sendq_token);
5605                                 e = RSMERR_CONN_ABORTED;
5606                                 DBG_PRINTF((category, RSM_ERR,
5607                                     "rsm: rsmipc_send: path !ACTIVE"));
5608                                 goto again;
5609                         }
5610 
5611                         ASSERT(sendq_token->msgbuf_avail > 0);
5612 
5613                         /*
5614                          * reserve a msgbuf
5615                          */
5616                         sendq_token->msgbuf_avail--;
5617 
5618                         mutex_exit(&path->mutex);
5619 
5620                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5621                             NULL);
5622 
5623                         if (e != RSM_SUCCESS) {
5624                                 mutex_enter(&path->mutex);
5625                                 /*
5626                                  * release the reserved msgbuf since
5627                                  * the send failed
5628                                  */
5629                                 sendq_token->msgbuf_avail++;
5630                                 cv_broadcast(&sendq_token->sendq_cv);
5631                                 mutex_exit(&path->mutex);
5632                         }
5633                 } else
5634                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5635                             NULL);
5636 
5637                 if (e != RSM_SUCCESS) {
5638                         DBG_PRINTF((category, RSM_ERR,
5639                             "rsm: rsmipc_send rsmpi send err = %d\n", e));
5640                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5641                         rsmipc_free(rslot);
5642                         rele_sendq_token(sendq_token);
5643                         atomic_inc_64(&rsm_ipcsend_errcnt);
5644                         goto again;
5645                 }
5646 
5647                 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5648                 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5649                     drv_usectohz(5000000), TR_CLOCK_TICK);
5650                 if (e < 0) {
5651                         /* timed out - retry */
5652                         e = RSMERR_TIMEOUT;
5653                 } else if (e == 0) {
5654                         /* signalled - return error */
5655                         e = RSMERR_INTERRUPTED;
5656                         break;
5657                 } else {
5658                         e = RSM_SUCCESS;
5659                 }
5660         }
5661 
5662         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5663         rsmipc_free(rslot);
5664         rele_sendq_token(sendq_token);
5665 
5666         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5667         return (e);
5668 }
5669 
5670 static int
5671 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5672 {
5673         rsmipc_request_t request;
5674 
5675         /*
5676          *  inform the exporter to delete this importer
5677          */
5678         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5679         request.rsmipc_key = segid;
5680         request.rsmipc_segment_cookie = cookie;
5681         return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5682 }
5683 
5684 static void
5685 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5686     int acl_len, rsm_permission_t default_permission)
5687 {
5688         int                     i;
5689         importing_token_t       *token;
5690         rsmipc_request_t        request;
5691         republish_token_t       *republish_list = NULL;
5692         republish_token_t       *rp;
5693         rsm_permission_t        permission;
5694         int                     index;
5695 
5696         /*
5697          * send the new access mode to all the nodes that have imported
5698          * this segment.
5699          * If the new acl does not have a node that was present in
5700          * the old acl a access permission of 0 is sent.
5701          */
5702 
5703         index = rsmhash(segid);
5704 
5705         /*
5706          * create a list of node/permissions to send the republish message
5707          */
5708         mutex_enter(&importer_list.lock);
5709 
5710         token = importer_list.bucket[index];
5711         while (token != NULL) {
5712                 if (segid == token->key) {
5713                         permission = default_permission;
5714 
5715                         for (i = 0; i < acl_len; i++) {
5716                                 if (token->importing_node == acl[i].ae_node) {
5717                                         permission = acl[i].ae_permission;
5718                                         break;
5719                                 }
5720                         }
5721                         rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5722 
5723                         rp->key = segid;
5724                         rp->importing_node = token->importing_node;
5725                         rp->permission = permission;
5726                         rp->next = republish_list;
5727                         republish_list = rp;
5728                 }
5729                 token = token->next;
5730         }
5731 
5732         mutex_exit(&importer_list.lock);
5733 
5734         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5735         request.rsmipc_key = segid;
5736 
5737         while (republish_list != NULL) {
5738                 request.rsmipc_perm = republish_list->permission;
5739                 (void) rsmipc_send(republish_list->importing_node,
5740                     &request, RSM_NO_REPLY);
5741                 rp = republish_list;
5742                 republish_list = republish_list->next;
5743                 kmem_free(rp, sizeof (republish_token_t));
5744         }
5745 }
5746 
5747 static void
5748 rsm_send_suspend()
5749 {
5750         int                     i, e;
5751         rsmipc_request_t        request;
5752         list_element_t          *tokp;
5753         list_element_t          *head = NULL;
5754         importing_token_t       *token;
5755         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5756             "rsm_send_suspend enter\n"));
5757 
5758         /*
5759          * create a list of node to send the suspend message
5760          *
5761          * Currently the whole importer list is scanned and we obtain
5762          * all the nodes - this basically gets all nodes that at least
5763          * import one segment from the local node.
5764          *
5765          * no need to grab the rsm_suspend_list lock here since we are
5766          * single threaded when suspend is called.
5767          */
5768 
5769         mutex_enter(&importer_list.lock);
5770         for (i = 0; i < rsm_hash_size; i++) {
5771 
5772                 token = importer_list.bucket[i];
5773 
5774                 while (token != NULL) {
5775 
5776                         tokp = head;
5777 
5778                         /*
5779                          * make sure that the token's node
5780                          * is not already on the suspend list
5781                          */
5782                         while (tokp != NULL) {
5783                                 if (tokp->nodeid == token->importing_node) {
5784                                         break;
5785                                 }
5786                                 tokp = tokp->next;
5787                         }
5788 
5789                         if (tokp == NULL) { /* not in suspend list */
5790                                 tokp = kmem_zalloc(sizeof (list_element_t),
5791                                     KM_SLEEP);
5792                                 tokp->nodeid = token->importing_node;
5793                                 tokp->next = head;
5794                                 head = tokp;
5795                         }
5796 
5797                         token = token->next;
5798                 }
5799         }
5800         mutex_exit(&importer_list.lock);
5801 
5802         if (head == NULL) { /* no importers so go ahead and quiesce segments */
5803                 exporter_quiesce();
5804                 return;
5805         }
5806 
5807         mutex_enter(&rsm_suspend_list.list_lock);
5808         ASSERT(rsm_suspend_list.list_head == NULL);
5809         /*
5810          * update the suspend list righaway so that if a node dies the
5811          * pathmanager can set the NODE dead flag
5812          */
5813         rsm_suspend_list.list_head = head;
5814         mutex_exit(&rsm_suspend_list.list_lock);
5815 
5816         tokp = head;
5817 
5818         while (tokp != NULL) {
5819                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5820                 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5821                 /*
5822                  * Error in rsmipc_send currently happens due to inaccessibility
5823                  * of the remote node.
5824                  */
5825                 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5826                         tokp->flags |= RSM_SUSPEND_ACKPENDING;
5827                 }
5828 
5829                 tokp = tokp->next;
5830         }
5831 
5832         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5833             "rsm_send_suspend done\n"));
5834 
5835 }
5836 
5837 static void
5838 rsm_send_resume()
5839 {
5840         rsmipc_request_t        request;
5841         list_element_t          *elem, *head;
5842 
5843         /*
5844          * save the suspend list so that we know where to send
5845          * the resume messages and make the suspend list head
5846          * NULL.
5847          */
5848         mutex_enter(&rsm_suspend_list.list_lock);
5849         head = rsm_suspend_list.list_head;
5850         rsm_suspend_list.list_head = NULL;
5851         mutex_exit(&rsm_suspend_list.list_lock);
5852 
5853         while (head != NULL) {
5854                 elem = head;
5855                 head = head->next;
5856 
5857                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5858 
5859                 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5860 
5861                 kmem_free((void *)elem, sizeof (list_element_t));
5862 
5863         }
5864 
5865 }
5866 
5867 /*
5868  * This function takes path and sends a message using the sendq
5869  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5870  * and RSMIPC_MSG_CREDIT are sent using this function.
5871  */
5872 int
5873 rsmipc_send_controlmsg(path_t *path, int msgtype)
5874 {
5875         int                     e;
5876         int                     retry_cnt = 0;
5877         int                     min_retry_cnt = 10;
5878         adapter_t               *adapter;
5879         rsm_send_t              is;
5880         rsm_send_q_handle_t     ipc_handle;
5881         rsmipc_controlmsg_t     msg;
5882         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5883 
5884         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5885             "rsmipc_send_controlmsg enter\n"));
5886 
5887         ASSERT(MUTEX_HELD(&path->mutex));
5888 
5889         adapter = path->local_adapter;
5890 
5891         DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5892             "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5893             my_nodeid, adapter->hwaddr, path->remote_node,
5894             path->remote_hwaddr, path->procmsg_cnt));
5895 
5896         if (path->state != RSMKA_PATH_ACTIVE) {
5897                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5898                     "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5899                 return (1);
5900         }
5901 
5902         ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5903 
5904         msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5905         msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5906         msg.rsmipc_hdr.rsmipc_type = msgtype;
5907         msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5908 
5909         if (msgtype == RSMIPC_MSG_CREDIT)
5910                 msg.rsmipc_credits = path->procmsg_cnt;
5911 
5912         msg.rsmipc_local_incn = path->local_incn;
5913 
5914         msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5915         /* incr the sendq, path refcnt */
5916         PATH_HOLD_NOLOCK(path);
5917         SENDQ_TOKEN_HOLD(path);
5918 
5919         do {
5920                 /* drop the path lock before doing the rsm_send */
5921                 mutex_exit(&path->mutex);
5922 
5923                 is.is_data = (void *)&msg;
5924                 is.is_size = sizeof (msg);
5925                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5926                 is.is_wait = 0;
5927 
5928                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5929 
5930                 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5931                     e != RSMERR_BAD_BARRIER_HNDL);
5932 
5933                 mutex_enter(&path->mutex);
5934 
5935                 if (e == RSM_SUCCESS) {
5936                         break;
5937                 }
5938                 /* error counter for statistics */
5939                 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5940 
5941                 DBG_PRINTF((category, RSM_ERR,
5942                     "rsmipc_send_controlmsg:rsm_send error=%d", e));
5943 
5944                 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5945                         (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5946                             &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5947                         retry_cnt = 0;
5948                 }
5949         } while (path->state == RSMKA_PATH_ACTIVE);
5950 
5951         /* decrement the sendq,path refcnt that we incr before rsm_send */
5952         SENDQ_TOKEN_RELE(path);
5953         PATH_RELE_NOLOCK(path);
5954 
5955         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5956             "rsmipc_send_controlmsg done=%d", e));
5957         return (e);
5958 }
5959 
5960 /*
5961  * Called from rsm_force_unload and path_importer_disconnect. The memory
5962  * mapping for the imported segment is removed and the segment is
5963  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5964  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5965  * and FALSE from rsm_rebind.
5966  *
5967  * When subsequent accesses cause page faulting, the dummy page is mapped
5968  * to resolve the fault, and the mapping generation number is incremented
5969  * so that the application can be notified on a close barrier operation.
5970  *
5971  * It is important to note that the caller of rsmseg_unload is responsible for
5972  * acquiring the segment lock before making a call to rsmseg_unload. This is
5973  * required to make the caller and rsmseg_unload thread safe. The segment lock
5974  * will be released by the rsmseg_unload function.
5975  */
5976 void
5977 rsmseg_unload(rsmseg_t *im_seg)
5978 {
5979         rsmcookie_t             *hdl;
5980         void                    *shared_cookie;
5981         rsmipc_request_t        request;
5982         uint_t                  maxprot;
5983 
5984         DBG_DEFINE(category,
5985             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5986 
5987         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5988 
5989         ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5990 
5991         /* wait until segment leaves the mapping state */
5992         while (im_seg->s_state == RSM_STATE_MAPPING)
5993                 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5994         /*
5995          * An unload is only necessary if the segment is connected. However,
5996          * if the segment was on the import list in state RSM_STATE_CONNECTING
5997          * then a connection was in progress. Change to RSM_STATE_NEW
5998          * here to cause an early exit from the connection process.
5999          */
6000         if (im_seg->s_state == RSM_STATE_NEW) {
6001                 rsmseglock_release(im_seg);
6002                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6003                     "rsmseg_unload done: RSM_STATE_NEW\n"));
6004                 return;
6005         } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6006                 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6007                 rsmsharelock_acquire(im_seg);
6008                 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6009                 rsmsharelock_release(im_seg);
6010                 rsmseglock_release(im_seg);
6011                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6012                     "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6013                 return;
6014         }
6015 
6016         if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6017                 if (im_seg->s_ckl != NULL) {
6018                         int e;
6019                         /* Setup protections for remap */
6020                         maxprot = PROT_USER;
6021                         if (im_seg->s_mode & RSM_PERM_READ) {
6022                                 maxprot |= PROT_READ;
6023                         }
6024                         if (im_seg->s_mode & RSM_PERM_WRITE) {
6025                                 maxprot |= PROT_WRITE;
6026                         }
6027                         hdl = im_seg->s_ckl;
6028                         for (; hdl != NULL; hdl = hdl->c_next) {
6029                                 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6030                                     remap_cookie,
6031                                     hdl->c_off, hdl->c_len,
6032                                     maxprot, 0, NULL);
6033 
6034                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6035                                     "remap returns %d\n", e));
6036                         }
6037                 }
6038 
6039                 (void) rsm_closeconnection(im_seg, &shared_cookie);
6040 
6041                 if (shared_cookie != NULL) {
6042                         /*
6043                          * inform the exporting node so this import
6044                          * can be deleted from the list of importers.
6045                          */
6046                         request.rsmipc_hdr.rsmipc_type =
6047                             RSMIPC_MSG_NOTIMPORTING;
6048                         request.rsmipc_key = im_seg->s_segid;
6049                         request.rsmipc_segment_cookie = shared_cookie;
6050                         rsmseglock_release(im_seg);
6051                         (void) rsmipc_send(im_seg->s_node, &request,
6052                             RSM_NO_REPLY);
6053                 } else {
6054                         rsmseglock_release(im_seg);
6055                 }
6056         }
6057         else
6058                 rsmseglock_release(im_seg);
6059 
6060         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6061 
6062 }
6063 
6064 /* ****************************** Importer Calls ************************ */
6065 
6066 static int
6067 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6068 {
6069         int shifts = 0;
6070 
6071         if (crgetuid(cr) != owner) {
6072                 shifts += 3;
6073                 if (!groupmember(group, cr))
6074                         shifts += 3;
6075         }
6076 
6077         mode &= ~(perm << shifts);
6078 
6079         if (mode == 0)
6080                 return (0);
6081 
6082         return (secpolicy_rsm_access(cr, owner, mode));
6083 }
6084 
6085 
6086 static int
6087 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6088     intptr_t dataptr, int mode)
6089 {
6090         int e;
6091         int                     recheck_state = 0;
6092         void                    *shared_cookie;
6093         rsmipc_request_t        request;
6094         rsmipc_reply_t          reply;
6095         rsm_permission_t        access;
6096         adapter_t               *adapter;
6097         rsm_addr_t              addr = 0;
6098         rsm_import_share_t      *sharedp;
6099         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6100 
6101         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6102 
6103         adapter = rsm_getadapter(msg, mode);
6104         if (adapter == NULL) {
6105                 DBG_PRINTF((category, RSM_ERR,
6106                     "rsm_connect done:ENODEV adapter=NULL\n"));
6107                 return (RSMERR_CTLR_NOT_PRESENT);
6108         }
6109 
6110         if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6111                 rsmka_release_adapter(adapter);
6112                 DBG_PRINTF((category, RSM_ERR,
6113                     "rsm_connect done:ENODEV loopback\n"));
6114                 return (RSMERR_CTLR_NOT_PRESENT);
6115         }
6116 
6117 
6118         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6119         ASSERT(seg->s_state == RSM_STATE_NEW);
6120 
6121         /*
6122          * Translate perm to access
6123          */
6124         if (msg->perm & ~RSM_PERM_RDWR) {
6125                 rsmka_release_adapter(adapter);
6126                 DBG_PRINTF((category, RSM_ERR,
6127                     "rsm_connect done:EINVAL invalid perms\n"));
6128                 return (RSMERR_BAD_PERMS);
6129         }
6130         access = 0;
6131         if (msg->perm & RSM_PERM_READ)
6132                 access |= RSM_ACCESS_READ;
6133         if (msg->perm & RSM_PERM_WRITE)
6134                 access |= RSM_ACCESS_WRITE;
6135 
6136         seg->s_node = msg->nodeid;
6137 
6138         /*
6139          * Adding to the import list locks the segment; release the segment
6140          * lock so we can get the reply for the send.
6141          */
6142         e = rsmimport_add(seg, msg->key);
6143         if (e) {
6144                 rsmka_release_adapter(adapter);
6145                 DBG_PRINTF((category, RSM_ERR,
6146                     "rsm_connect done:rsmimport_add failed %d\n", e));
6147                 return (e);
6148         }
6149         seg->s_state = RSM_STATE_CONNECTING;
6150 
6151         /*
6152          * Set the s_adapter field here so as to have a valid comparison of
6153          * the adapter and the s_adapter value during rsmshare_get. For
6154          * any error, set s_adapter to NULL before doing a release_adapter
6155          */
6156         seg->s_adapter = adapter;
6157 
6158         rsmseglock_release(seg);
6159 
6160         /*
6161          * get the pointer to the shared data structure; the
6162          * shared data is locked and refcount has been incremented
6163          */
6164         sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6165 
6166         ASSERT(rsmsharelock_held(seg));
6167 
6168         do {
6169                 /* flag indicates whether we need to recheck the state */
6170                 recheck_state = 0;
6171                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6172                     "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6173                 switch (sharedp->rsmsi_state) {
6174                 case RSMSI_STATE_NEW:
6175                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6176                         break;
6177                 case RSMSI_STATE_CONNECTING:
6178                         /* FALLTHRU */
6179                 case RSMSI_STATE_CONN_QUIESCE:
6180                         /* FALLTHRU */
6181                 case RSMSI_STATE_MAP_QUIESCE:
6182                         /* wait for the state to change */
6183                         while ((sharedp->rsmsi_state ==
6184                             RSMSI_STATE_CONNECTING) ||
6185                             (sharedp->rsmsi_state ==
6186                             RSMSI_STATE_CONN_QUIESCE) ||
6187                             (sharedp->rsmsi_state ==
6188                             RSMSI_STATE_MAP_QUIESCE)) {
6189                                 if (cv_wait_sig(&sharedp->rsmsi_cv,
6190                                     &sharedp->rsmsi_lock) == 0) {
6191                                         /* signalled - clean up and return */
6192                                         rsmsharelock_release(seg);
6193                                         rsmimport_rm(seg);
6194                                         seg->s_adapter = NULL;
6195                                         rsmka_release_adapter(adapter);
6196                                         seg->s_state = RSM_STATE_NEW;
6197                                         DBG_PRINTF((category, RSM_ERR,
6198                                             "rsm_connect done: INTERRUPTED\n"));
6199                                         return (RSMERR_INTERRUPTED);
6200                                 }
6201                         }
6202                         /*
6203                          * the state changed, loop back and check what it is
6204                          */
6205                         recheck_state = 1;
6206                         break;
6207                 case RSMSI_STATE_ABORT_CONNECT:
6208                         /* exit the loop and clean up further down */
6209                         break;
6210                 case RSMSI_STATE_CONNECTED:
6211                         /* already connected, good - fall through */
6212                 case RSMSI_STATE_MAPPED:
6213                         /* already mapped, wow - fall through */
6214                         /* access validation etc is done further down */
6215                         break;
6216                 case RSMSI_STATE_DISCONNECTED:
6217                         /* disconnected - so reconnect now */
6218                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6219                         break;
6220                 default:
6221                         ASSERT(0); /* Invalid State */
6222                 }
6223         } while (recheck_state);
6224 
6225         if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6226                 /* we are the first to connect */
6227                 rsmsharelock_release(seg);
6228 
6229                 if (msg->nodeid != my_nodeid) {
6230                         addr = get_remote_hwaddr(adapter, msg->nodeid);
6231 
6232                         if ((int64_t)addr < 0) {
6233                                 rsmsharelock_acquire(seg);
6234                                 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6235                                     RSMSI_STATE_NEW);
6236                                 rsmsharelock_release(seg);
6237                                 rsmimport_rm(seg);
6238                                 seg->s_adapter = NULL;
6239                                 rsmka_release_adapter(adapter);
6240                                 seg->s_state = RSM_STATE_NEW;
6241                                 DBG_PRINTF((category, RSM_ERR,
6242                                     "rsm_connect done: hwaddr<0\n"));
6243                                 return (RSMERR_INTERNAL_ERROR);
6244                         }
6245                 } else {
6246                         addr = adapter->hwaddr;
6247                 }
6248 
6249                 /*
6250                  * send request to node [src, dest, key, msgid] and get back
6251                  * [status, msgid, cookie]
6252                  */
6253                 request.rsmipc_key = msg->key;
6254                 /*
6255                  * we need the s_mode of the exporter so pass
6256                  * RSM_ACCESS_TRUSTED
6257                  */
6258                 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6259                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6260                 request.rsmipc_adapter_hwaddr = addr;
6261                 request.rsmipc_segment_cookie = sharedp;
6262 
6263                 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6264                 if (e) {
6265                         rsmsharelock_acquire(seg);
6266                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6267                             RSMSI_STATE_NEW);
6268                         rsmsharelock_release(seg);
6269                         rsmimport_rm(seg);
6270                         seg->s_adapter = NULL;
6271                         rsmka_release_adapter(adapter);
6272                         seg->s_state = RSM_STATE_NEW;
6273                         DBG_PRINTF((category, RSM_ERR,
6274                             "rsm_connect done:rsmipc_send failed %d\n", e));
6275                         return (e);
6276                 }
6277 
6278                 if (reply.rsmipc_status != RSM_SUCCESS) {
6279                         rsmsharelock_acquire(seg);
6280                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6281                             RSMSI_STATE_NEW);
6282                         rsmsharelock_release(seg);
6283                         rsmimport_rm(seg);
6284                         seg->s_adapter = NULL;
6285                         rsmka_release_adapter(adapter);
6286                         seg->s_state = RSM_STATE_NEW;
6287                         DBG_PRINTF((category, RSM_ERR,
6288                             "rsm_connect done:rsmipc_send reply err %d\n",
6289                             reply.rsmipc_status));
6290                         return (reply.rsmipc_status);
6291                 }
6292 
6293                 rsmsharelock_acquire(seg);
6294                 /* store the information recvd into the shared data struct */
6295                 sharedp->rsmsi_mode = reply.rsmipc_mode;
6296                 sharedp->rsmsi_uid = reply.rsmipc_uid;
6297                 sharedp->rsmsi_gid = reply.rsmipc_gid;
6298                 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6299                 sharedp->rsmsi_cookie = sharedp;
6300         }
6301 
6302         rsmsharelock_release(seg);
6303 
6304         /*
6305          * Get the segment lock and check for a force disconnect
6306          * from the export side which would have changed the state
6307          * back to RSM_STATE_NEW. Once the segment lock is acquired a
6308          * force disconnect will be held off until the connection
6309          * has completed.
6310          */
6311         rsmseglock_acquire(seg);
6312         rsmsharelock_acquire(seg);
6313         ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6314             seg->s_state == RSM_STATE_ABORT_CONNECT);
6315 
6316         shared_cookie = sharedp->rsmsi_cookie;
6317 
6318         if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6319             (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6320                 seg->s_state = RSM_STATE_NEW;
6321                 seg->s_adapter = NULL;
6322                 rsmsharelock_release(seg);
6323                 rsmseglock_release(seg);
6324                 rsmimport_rm(seg);
6325                 rsmka_release_adapter(adapter);
6326 
6327                 rsmsharelock_acquire(seg);
6328                 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6329                         /*
6330                          * set a flag indicating abort handling has been
6331                          * done
6332                          */
6333                         sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6334                         rsmsharelock_release(seg);
6335                         /* send a message to exporter - only once */
6336                         (void) rsm_send_notimporting(msg->nodeid,
6337                             msg->key, shared_cookie);
6338                         rsmsharelock_acquire(seg);
6339                         /*
6340                          * wake up any waiting importers and inform that
6341                          * connection has been aborted
6342                          */
6343                         cv_broadcast(&sharedp->rsmsi_cv);
6344                 }
6345                 rsmsharelock_release(seg);
6346 
6347                 DBG_PRINTF((category, RSM_ERR,
6348                     "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6349                 return (RSMERR_INTERRUPTED);
6350         }
6351 
6352 
6353         /*
6354          * We need to verify that this process has access
6355          */
6356         e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6357             access & sharedp->rsmsi_mode,
6358             (int)(msg->perm & RSM_PERM_RDWR), cred);
6359         if (e) {
6360                 rsmsharelock_release(seg);
6361                 seg->s_state = RSM_STATE_NEW;
6362                 seg->s_adapter = NULL;
6363                 rsmseglock_release(seg);
6364                 rsmimport_rm(seg);
6365                 rsmka_release_adapter(adapter);
6366                 /*
6367                  * No need to lock segment it has been removed
6368                  * from the hash table
6369                  */
6370                 rsmsharelock_acquire(seg);
6371                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6372                         rsmsharelock_release(seg);
6373                         /* this is the first importer */
6374 
6375                         (void) rsm_send_notimporting(msg->nodeid, msg->key,
6376                             shared_cookie);
6377                         rsmsharelock_acquire(seg);
6378                         sharedp->rsmsi_state = RSMSI_STATE_NEW;
6379                         cv_broadcast(&sharedp->rsmsi_cv);
6380                 }
6381                 rsmsharelock_release(seg);
6382 
6383                 DBG_PRINTF((category, RSM_ERR,
6384                     "rsm_connect done: ipcaccess failed\n"));
6385                 return (RSMERR_PERM_DENIED);
6386         }
6387 
6388         /* update state and cookie */
6389         seg->s_segid = sharedp->rsmsi_segid;
6390         seg->s_len = sharedp->rsmsi_seglen;
6391         seg->s_mode = access & sharedp->rsmsi_mode;
6392         seg->s_pid = ddi_get_pid();
6393         seg->s_mapinfo = NULL;
6394 
6395         if (seg->s_node != my_nodeid) {
6396                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6397                         e = adapter->rsmpi_ops->rsm_connect(
6398                             adapter->rsmpi_handle,
6399                             addr, seg->s_segid, &sharedp->rsmsi_handle);
6400 
6401                         if (e != RSM_SUCCESS) {
6402                                 seg->s_state = RSM_STATE_NEW;
6403                                 seg->s_adapter = NULL;
6404                                 rsmsharelock_release(seg);
6405                                 rsmseglock_release(seg);
6406                                 rsmimport_rm(seg);
6407                                 rsmka_release_adapter(adapter);
6408                                 /*
6409                                  *  inform the exporter to delete this importer
6410                                  */
6411                                 (void) rsm_send_notimporting(msg->nodeid,
6412                                     msg->key, shared_cookie);
6413 
6414                                 /*
6415                                  * Now inform any waiting importers to
6416                                  * retry connect. This needs to be done
6417                                  * after sending notimporting so that
6418                                  * the notimporting is sent before a waiting
6419                                  * importer sends a segconnect while retrying
6420                                  *
6421                                  * No need to lock segment it has been removed
6422                                  * from the hash table
6423                                  */
6424 
6425                                 rsmsharelock_acquire(seg);
6426                                 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6427                                 cv_broadcast(&sharedp->rsmsi_cv);
6428                                 rsmsharelock_release(seg);
6429 
6430                                 DBG_PRINTF((category, RSM_ERR,
6431                                     "rsm_connect error %d\n", e));
6432                                 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6433                                         return (
6434                                             RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6435                                 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6436                                     (e == RSMERR_UNKNOWN_RSM_ADDR))
6437                                         return (RSMERR_REMOTE_NODE_UNREACHABLE);
6438                                 else
6439                                         return (e);
6440                         }
6441 
6442                 }
6443                 seg->s_handle.in = sharedp->rsmsi_handle;
6444 
6445         }
6446 
6447         seg->s_state = RSM_STATE_CONNECT;
6448 
6449 
6450         seg->s_flags &= ~RSM_IMPORT_DUMMY;       /* clear dummy flag */
6451         if (bar_va) {
6452                 /* increment generation number on barrier page */
6453                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6454                 /* return user off into barrier page where status will be */
6455                 msg->off = (int)seg->s_hdr.rsmrc_num;
6456                 msg->gnum = bar_va[msg->off];     /* gnum race */
6457         } else {
6458                 msg->off = 0;
6459                 msg->gnum = 0;       /* gnum race */
6460         }
6461 
6462         msg->len = (int)sharedp->rsmsi_seglen;
6463         msg->rnum = seg->s_minor;
6464         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6465         rsmsharelock_release(seg);
6466         rsmseglock_release(seg);
6467 
6468         /* Return back to user the segment size & perm in case it's needed */
6469 
6470 #ifdef _MULTI_DATAMODEL
6471         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6472                 rsm_ioctlmsg32_t msg32;
6473 
6474                 if (msg->len > UINT_MAX)
6475                         msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6476                 else
6477                         msg32.len = msg->len;
6478                 msg32.off = msg->off;
6479                 msg32.perm = msg->perm;
6480                 msg32.gnum = msg->gnum;
6481                 msg32.rnum = msg->rnum;
6482 
6483                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6484                     "rsm_connect done\n"));
6485 
6486                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6487                     sizeof (msg32), mode))
6488                         return (RSMERR_BAD_ADDR);
6489                 else
6490                         return (RSM_SUCCESS);
6491         }
6492 #endif
6493         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6494 
6495         if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6496             mode))
6497                 return (RSMERR_BAD_ADDR);
6498         else
6499                 return (RSM_SUCCESS);
6500 }
6501 
6502 static int
6503 rsm_unmap(rsmseg_t *seg)
6504 {
6505         int                     err;
6506         adapter_t               *adapter;
6507         rsm_import_share_t      *sharedp;
6508         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6509 
6510         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6511             "rsm_unmap enter %u\n", seg->s_segid));
6512 
6513         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6514 
6515         /* assert seg is locked */
6516         ASSERT(rsmseglock_held(seg));
6517         ASSERT(seg->s_state != RSM_STATE_MAPPING);
6518 
6519         if ((seg->s_state != RSM_STATE_ACTIVE) &&
6520             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6521                 /* segment unmap has already been done */
6522                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6523                 return (RSM_SUCCESS);
6524         }
6525 
6526         sharedp = seg->s_share;
6527 
6528         rsmsharelock_acquire(seg);
6529 
6530         /*
6531          *      - shared data struct is in MAPPED or MAP_QUIESCE state
6532          */
6533 
6534         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6535             sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6536 
6537         /*
6538          * Unmap pages - previously rsm_memseg_import_unmap was called only if
6539          * the segment cookie list was NULL; but it is always NULL when
6540          * called from rsmmap_unmap and won't be NULL when called for
6541          * a force disconnect - so the check for NULL cookie list was removed
6542          */
6543 
6544         ASSERT(sharedp->rsmsi_mapcnt > 0);
6545 
6546         sharedp->rsmsi_mapcnt--;
6547 
6548         if (sharedp->rsmsi_mapcnt == 0) {
6549                 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6550                         /* unmap the shared RSMPI mapping */
6551                         adapter = seg->s_adapter;
6552                         if (seg->s_node != my_nodeid) {
6553                                 ASSERT(sharedp->rsmsi_handle != NULL);
6554                                 err = adapter->rsmpi_ops->
6555                                     rsm_unmap(sharedp->rsmsi_handle);
6556                                 DBG_PRINTF((category, RSM_DEBUG,
6557                                     "rsm_unmap: rsmpi unmap %d\n", err));
6558                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6559                                 sharedp->rsmsi_mapinfo = NULL;
6560                         }
6561                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6562                 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6563                         sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6564                 }
6565         }
6566 
6567         rsmsharelock_release(seg);
6568 
6569         /*
6570          * The s_cookie field is used to store the cookie returned from the
6571          * ddi_umem_lock when binding the pages for an export segment. This
6572          * is the primary use of the s_cookie field and does not normally
6573          * pertain to any importing segment except in the loopback case.
6574          * For the loopback case, the import segment and export segment are
6575          * on the same node, the s_cookie field of the segment structure for
6576          * the importer is initialized to the s_cookie field in the exported
6577          * segment during the map operation and is used during the call to
6578          * devmap_umem_setup for the import mapping.
6579          * Thus, during unmap, we simply need to set s_cookie to NULL to
6580          * indicate that the mapping no longer exists.
6581          */
6582         seg->s_cookie = NULL;
6583 
6584         seg->s_mapinfo = NULL;
6585 
6586         if (seg->s_state == RSM_STATE_ACTIVE)
6587                 seg->s_state = RSM_STATE_CONNECT;
6588         else
6589                 seg->s_state = RSM_STATE_CONN_QUIESCE;
6590 
6591         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6592 
6593         return (RSM_SUCCESS);
6594 }
6595 
6596 /*
6597  * cookie returned here if not null indicates that it is
6598  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6599  * message.
6600  */
6601 static int
6602 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6603 {
6604         int                     e;
6605         adapter_t               *adapter;
6606         rsm_import_share_t      *sharedp;
6607         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6608 
6609         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6610             "rsm_closeconnection enter\n"));
6611 
6612         *cookie = (void *)NULL;
6613 
6614         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6615 
6616         /* assert seg is locked */
6617         ASSERT(rsmseglock_held(seg));
6618 
6619         if (seg->s_state == RSM_STATE_DISCONNECT) {
6620                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6621                     "rsm_closeconnection done: already disconnected\n"));
6622                 return (RSM_SUCCESS);
6623         }
6624 
6625         /* wait for all putv/getv ops to get done */
6626         while (seg->s_rdmacnt > 0) {
6627                 cv_wait(&seg->s_cv, &seg->s_lock);
6628         }
6629 
6630         (void) rsm_unmap(seg);
6631 
6632         ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6633             seg->s_state == RSM_STATE_CONN_QUIESCE);
6634 
6635         adapter = seg->s_adapter;
6636         sharedp = seg->s_share;
6637 
6638         ASSERT(sharedp != NULL);
6639 
6640         rsmsharelock_acquire(seg);
6641 
6642         /*
6643          * Disconnect on adapter
6644          *
6645          * The current algorithm is stateless, I don't have to contact
6646          * server when I go away. It only gives me permissions. Of course,
6647          * the adapters will talk to terminate the connect.
6648          *
6649          * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6650          */
6651         if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6652             (sharedp->rsmsi_node != my_nodeid)) {
6653 
6654                 if (sharedp->rsmsi_refcnt == 1) {
6655                         /* this is the last importer */
6656                         ASSERT(sharedp->rsmsi_mapcnt == 0);
6657 
6658                         e = adapter->rsmpi_ops->
6659                             rsm_disconnect(sharedp->rsmsi_handle);
6660                         if (e != RSM_SUCCESS) {
6661                                 DBG_PRINTF((category, RSM_DEBUG,
6662                                     "rsm:disconnect failed seg=%x:err=%d\n",
6663                                     seg->s_key, e));
6664                         }
6665                 }
6666         }
6667 
6668         seg->s_handle.in = NULL;
6669 
6670         sharedp->rsmsi_refcnt--;
6671 
6672         if (sharedp->rsmsi_refcnt == 0) {
6673                 *cookie = (void *)sharedp->rsmsi_cookie;
6674                 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6675                 sharedp->rsmsi_handle = NULL;
6676                 rsmsharelock_release(seg);
6677 
6678                 /* clean up the shared data structure */
6679                 mutex_destroy(&sharedp->rsmsi_lock);
6680                 cv_destroy(&sharedp->rsmsi_cv);
6681                 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6682 
6683         } else {
6684                 rsmsharelock_release(seg);
6685         }
6686 
6687         /* increment generation number on barrier page */
6688         if (bar_va) {
6689                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6690         }
6691 
6692         /*
6693          * The following needs to be done after any
6694          * rsmsharelock calls which use seg->s_share.
6695          */
6696         seg->s_share = NULL;
6697 
6698         seg->s_state = RSM_STATE_DISCONNECT;
6699         /* signal anyone waiting in the CONN_QUIESCE state */
6700         cv_broadcast(&seg->s_cv);
6701 
6702         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6703             "rsm_closeconnection done\n"));
6704 
6705         return (RSM_SUCCESS);
6706 }
6707 
6708 int
6709 rsm_disconnect(rsmseg_t *seg)
6710 {
6711         rsmipc_request_t        request;
6712         void                    *shared_cookie;
6713         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6714 
6715         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6716 
6717         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6718 
6719         /* assert seg isn't locked */
6720         ASSERT(!rsmseglock_held(seg));
6721 
6722 
6723         /* Remove segment from imported list */
6724         rsmimport_rm(seg);
6725 
6726         /* acquire the segment */
6727         rsmseglock_acquire(seg);
6728 
6729         /* wait until segment leaves the mapping state */
6730         while (seg->s_state == RSM_STATE_MAPPING)
6731                 cv_wait(&seg->s_cv, &seg->s_lock);
6732 
6733         if (seg->s_state == RSM_STATE_DISCONNECT) {
6734                 seg->s_state = RSM_STATE_NEW;
6735                 rsmseglock_release(seg);
6736                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6737                     "rsm_disconnect done: already disconnected\n"));
6738                 return (RSM_SUCCESS);
6739         }
6740 
6741         (void) rsm_closeconnection(seg, &shared_cookie);
6742 
6743         /* update state */
6744         seg->s_state = RSM_STATE_NEW;
6745 
6746         if (shared_cookie != NULL) {
6747                 /*
6748                  *  This is the last importer so inform the exporting node
6749                  *  so this import can be deleted from the list of importers.
6750                  */
6751                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6752                 request.rsmipc_key = seg->s_segid;
6753                 request.rsmipc_segment_cookie = shared_cookie;
6754                 rsmseglock_release(seg);
6755                 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6756         } else {
6757                 rsmseglock_release(seg);
6758         }
6759 
6760         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6761 
6762         return (DDI_SUCCESS);
6763 }
6764 
6765 /*ARGSUSED*/
6766 static int
6767 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6768     struct pollhead **phpp)
6769 {
6770         minor_t         rnum;
6771         rsmresource_t   *res;
6772         rsmseg_t        *seg;
6773         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6774 
6775         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6776 
6777         /* find minor, no lock */
6778         rnum = getminor(dev);
6779         res = rsmresource_lookup(rnum, RSM_NOLOCK);
6780 
6781         /* poll is supported only for export/import segments */
6782         if ((res == NULL) || (res == RSMRC_RESERVED) ||
6783             (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6784                 return (ENXIO);
6785         }
6786 
6787         *reventsp = 0;
6788 
6789         /*
6790          * An exported segment must be in state RSM_STATE_EXPORT; an
6791          * imported segment must be in state RSM_STATE_ACTIVE.
6792          */
6793         seg = (rsmseg_t *)res;
6794 
6795         if (seg->s_pollevent) {
6796                 *reventsp = POLLRDNORM;
6797         } else if (!anyyet) {
6798                 /* cannot take segment lock here */
6799                 *phpp = &seg->s_poll;
6800                 seg->s_pollflag |= RSM_SEGMENT_POLL;
6801         }
6802         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6803         return (0);
6804 }
6805 
6806 
6807 
6808 /* ************************* IOCTL Commands ********************* */
6809 
6810 static rsmseg_t *
6811 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6812     rsm_resource_type_t type)
6813 {
6814         /* get segment from resource handle */
6815         rsmseg_t *seg;
6816         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6817 
6818         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6819 
6820 
6821         if (res != RSMRC_RESERVED) {
6822                 seg = (rsmseg_t *)res;
6823         } else {
6824                 /* Allocate segment now and bind it */
6825                 seg = rsmseg_alloc(rnum, credp);
6826 
6827                 /*
6828                  * if DR pre-processing is going on or DR is in progress
6829                  * then the new export segments should be in the NEW_QSCD state
6830                  */
6831                 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6832                         mutex_enter(&rsm_drv_data.drv_lock);
6833                         if ((rsm_drv_data.drv_state ==
6834                             RSM_DRV_PREDEL_STARTED) ||
6835                             (rsm_drv_data.drv_state ==
6836                             RSM_DRV_PREDEL_COMPLETED) ||
6837                             (rsm_drv_data.drv_state ==
6838                             RSM_DRV_DR_IN_PROGRESS)) {
6839                                 seg->s_state = RSM_STATE_NEW_QUIESCED;
6840                         }
6841                         mutex_exit(&rsm_drv_data.drv_lock);
6842                 }
6843 
6844                 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6845         }
6846 
6847         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6848 
6849         return (seg);
6850 }
6851 
6852 static int
6853 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6854     int mode, cred_t *credp)
6855 {
6856         int error;
6857         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6858 
6859         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6860 
6861         arg = arg;
6862         credp = credp;
6863 
6864         ASSERT(seg != NULL);
6865 
6866         switch (cmd) {
6867         case RSM_IOCTL_BIND:
6868                 error = rsm_bind(seg, msg, arg, mode);
6869                 break;
6870         case RSM_IOCTL_REBIND:
6871                 error = rsm_rebind(seg, msg);
6872                 break;
6873         case RSM_IOCTL_UNBIND:
6874                 error = ENOTSUP;
6875                 break;
6876         case RSM_IOCTL_PUBLISH:
6877                 error = rsm_publish(seg, msg, arg, mode);
6878                 break;
6879         case RSM_IOCTL_REPUBLISH:
6880                 error = rsm_republish(seg, msg, mode);
6881                 break;
6882         case RSM_IOCTL_UNPUBLISH:
6883                 error = rsm_unpublish(seg, 1);
6884                 break;
6885         default:
6886                 error = EINVAL;
6887                 break;
6888         }
6889 
6890         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6891             error));
6892 
6893         return (error);
6894 }
6895 static int
6896 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6897     int mode, cred_t *credp)
6898 {
6899         int error;
6900         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6901 
6902         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6903 
6904         ASSERT(seg);
6905 
6906         switch (cmd) {
6907         case RSM_IOCTL_CONNECT:
6908                 error = rsm_connect(seg, msg, credp, arg, mode);
6909                 break;
6910         default:
6911                 error = EINVAL;
6912                 break;
6913         }
6914 
6915         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6916             error));
6917         return (error);
6918 }
6919 
6920 static int
6921 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6922     int mode)
6923 {
6924         int e;
6925         adapter_t *adapter;
6926         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6927 
6928         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6929 
6930 
6931         if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6932                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6933                     "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6934                 return (RSMERR_CONN_ABORTED);
6935         } else if (seg->s_node == my_nodeid) {
6936                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6937                     "rsmbar_ioctl done: loopback\n"));
6938                 return (RSM_SUCCESS);
6939         }
6940 
6941         adapter = seg->s_adapter;
6942 
6943         switch (cmd) {
6944         case RSM_IOCTL_BAR_CHECK:
6945                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6946                     "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6947                 return (bar_va ? RSM_SUCCESS : EINVAL);
6948         case RSM_IOCTL_BAR_OPEN:
6949                 e = adapter->rsmpi_ops->
6950                     rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6951                 break;
6952         case RSM_IOCTL_BAR_ORDER:
6953                 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6954                 break;
6955         case RSM_IOCTL_BAR_CLOSE:
6956                 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6957                 break;
6958         default:
6959                 e = EINVAL;
6960                 break;
6961         }
6962 
6963         if (e == RSM_SUCCESS) {
6964 #ifdef _MULTI_DATAMODEL
6965                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6966                         rsm_ioctlmsg32_t msg32;
6967                         int i;
6968 
6969                         for (i = 0; i < 4; i++) {
6970                                 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6971                         }
6972 
6973                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6974                             "rsmbar_ioctl done\n"));
6975                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6976                             sizeof (msg32), mode))
6977                                 return (RSMERR_BAD_ADDR);
6978                         else
6979                                 return (RSM_SUCCESS);
6980                 }
6981 #endif
6982                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6983                     "rsmbar_ioctl done\n"));
6984                 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6985                     sizeof (*msg), mode))
6986                         return (RSMERR_BAD_ADDR);
6987                 else
6988                         return (RSM_SUCCESS);
6989         }
6990 
6991         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6992             "rsmbar_ioctl done: error=%d\n", e));
6993 
6994         return (e);
6995 }
6996 
6997 /*
6998  * Ring the doorbell of the export segment to which this segment is
6999  * connected.
7000  */
7001 static int
7002 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7003 {
7004         int e = 0;
7005         rsmipc_request_t request;
7006 
7007         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7008 
7009         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7010 
7011         request.rsmipc_key = seg->s_segid;
7012         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7013         request.rsmipc_segment_cookie = NULL;
7014         e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7015 
7016         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7017             "exportbell_ioctl done: %d\n", e));
7018 
7019         return (e);
7020 }
7021 
7022 /*
7023  * Ring the doorbells of all segments importing this segment
7024  */
7025 static int
7026 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7027 {
7028         importing_token_t       *token = NULL;
7029         rsmipc_request_t        request;
7030         int                     index;
7031 
7032         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7033 
7034         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7035 
7036         ASSERT(seg->s_state != RSM_STATE_NEW &&
7037             seg->s_state != RSM_STATE_NEW_QUIESCED);
7038 
7039         request.rsmipc_key = seg->s_segid;
7040         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7041 
7042         index = rsmhash(seg->s_segid);
7043 
7044         token = importer_list.bucket[index];
7045 
7046         while (token != NULL) {
7047                 if (seg->s_key == token->key) {
7048                         request.rsmipc_segment_cookie =
7049                             token->import_segment_cookie;
7050                         (void) rsmipc_send(token->importing_node,
7051                             &request, RSM_NO_REPLY);
7052                 }
7053                 token = token->next;
7054         }
7055 
7056         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7057             "importbell_ioctl done\n"));
7058         return (RSM_SUCCESS);
7059 }
7060 
7061 static int
7062 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7063     rsm_poll_event_t **eventspp, int mode)
7064 {
7065         rsm_poll_event_t        *evlist = NULL;
7066         size_t                  evlistsz;
7067         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7068 
7069 #ifdef _MULTI_DATAMODEL
7070         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7071                 int i;
7072                 rsm_consume_event_msg32_t cemsg32 = {0};
7073                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7074                 rsm_poll_event32_t      *evlist32;
7075                 size_t                  evlistsz32;
7076 
7077                 /* copyin the ioctl message */
7078                 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7079                     sizeof (rsm_consume_event_msg32_t), mode)) {
7080                         DBG_PRINTF((category, RSM_ERR,
7081                             "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7082                         return (RSMERR_BAD_ADDR);
7083                 }
7084                 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7085                 msgp->numents = (int)cemsg32.numents;
7086 
7087                 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7088                 /*
7089                  * If numents is large alloc events list on heap otherwise
7090                  * use the address of array that was passed in.
7091                  */
7092                 if (msgp->numents > RSM_MAX_POLLFDS) {
7093                         if (msgp->numents > max_segs) { /* validate numents */
7094                                 DBG_PRINTF((category, RSM_ERR,
7095                                     "consumeevent_copyin: "
7096                                     "RSMERR_BAD_ARGS_ERRORS\n"));
7097                                 return (RSMERR_BAD_ARGS_ERRORS);
7098                         }
7099                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7100                 } else {
7101                         evlist32 = event32;
7102                 }
7103 
7104                 /* copyin the seglist into the rsm_poll_event32_t array */
7105                 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7106                     evlistsz32, mode)) {
7107                         if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7108                                 kmem_free(evlist32, evlistsz32);
7109                         }
7110                         DBG_PRINTF((category, RSM_ERR,
7111                             "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7112                         return (RSMERR_BAD_ADDR);
7113                 }
7114 
7115                 /* evlist and evlistsz are based on rsm_poll_event_t type */
7116                 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7117 
7118                 if (msgp->numents > RSM_MAX_POLLFDS) {
7119                         evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7120                         *eventspp = evlist;
7121                 } else {
7122                         evlist = *eventspp;
7123                 }
7124                 /*
7125                  * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7126                  * array
7127                  */
7128                 for (i = 0; i < msgp->numents; i++) {
7129                         evlist[i].rnum = evlist32[i].rnum;
7130                         evlist[i].fdsidx = evlist32[i].fdsidx;
7131                         evlist[i].revent = evlist32[i].revent;
7132                 }
7133                 /* free the temp 32-bit event list */
7134                 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7135                         kmem_free(evlist32, evlistsz32);
7136                 }
7137 
7138                 return (RSM_SUCCESS);
7139         }
7140 #endif
7141         /* copyin the ioctl message */
7142         if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7143             mode)) {
7144                 DBG_PRINTF((category, RSM_ERR,
7145                     "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7146                 return (RSMERR_BAD_ADDR);
7147         }
7148         /*
7149          * If numents is large alloc events list on heap otherwise
7150          * use the address of array that was passed in.
7151          */
7152         if (msgp->numents > RSM_MAX_POLLFDS) {
7153                 if (msgp->numents > max_segs) { /* validate numents */
7154                         DBG_PRINTF((category, RSM_ERR,
7155                             "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7156                         return (RSMERR_BAD_ARGS_ERRORS);
7157                 }
7158                 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7159                 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7160                 *eventspp  = evlist;
7161         }
7162 
7163         /* copyin the seglist */
7164         if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7165             sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7166                 if (evlist) {
7167                         kmem_free(evlist, evlistsz);
7168                         *eventspp = NULL;
7169                 }
7170                 DBG_PRINTF((category, RSM_ERR,
7171                     "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7172                 return (RSMERR_BAD_ADDR);
7173         }
7174 
7175         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7176             "consumeevent_copyin done\n"));
7177         return (RSM_SUCCESS);
7178 }
7179 
7180 static int
7181 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7182     rsm_poll_event_t *eventsp, int mode)
7183 {
7184         size_t                  evlistsz;
7185         int                     err = RSM_SUCCESS;
7186         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7187 
7188         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7189             "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7190             msgp->numents, eventsp));
7191 
7192 #ifdef _MULTI_DATAMODEL
7193         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7194                 int i;
7195                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7196                 rsm_poll_event32_t      *evlist32;
7197                 size_t                  evlistsz32;
7198 
7199                 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7200                 if (msgp->numents > RSM_MAX_POLLFDS) {
7201                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7202                 } else {
7203                         evlist32 = event32;
7204                 }
7205 
7206                 /*
7207                  * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7208                  * array
7209                  */
7210                 for (i = 0; i < msgp->numents; i++) {
7211                         evlist32[i].rnum = eventsp[i].rnum;
7212                         evlist32[i].fdsidx = eventsp[i].fdsidx;
7213                         evlist32[i].revent = eventsp[i].revent;
7214                 }
7215 
7216                 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7217                     evlistsz32, mode)) {
7218                         err = RSMERR_BAD_ADDR;
7219                 }
7220 
7221                 if (msgp->numents > RSM_MAX_POLLFDS) {
7222                         if (evlist32) { /* free the temp 32-bit event list */
7223                                 kmem_free(evlist32, evlistsz32);
7224                         }
7225                         /*
7226                          * eventsp and evlistsz are based on rsm_poll_event_t
7227                          * type
7228                          */
7229                         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7230                         /* event list on the heap and needs to be freed here */
7231                         if (eventsp) {
7232                                 kmem_free(eventsp, evlistsz);
7233                         }
7234                 }
7235 
7236                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7237                     "consumeevent_copyout done: err=%d\n", err));
7238                 return (err);
7239         }
7240 #endif
7241         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7242 
7243         if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7244             mode)) {
7245                 err = RSMERR_BAD_ADDR;
7246         }
7247 
7248         if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7249                 /* event list on the heap and needs to be freed here */
7250                 kmem_free(eventsp, evlistsz);
7251         }
7252 
7253         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7254             "consumeevent_copyout done: err=%d\n", err));
7255         return (err);
7256 }
7257 
7258 static int
7259 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7260 {
7261         int     rc;
7262         int     i;
7263         minor_t rnum;
7264         rsm_consume_event_msg_t msg = {0};
7265         rsmseg_t                *seg;
7266         rsm_poll_event_t        *event_list;
7267         rsm_poll_event_t        events[RSM_MAX_POLLFDS];
7268         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7269 
7270         event_list = events;
7271 
7272         if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7273             RSM_SUCCESS) {
7274                 return (rc);
7275         }
7276 
7277         for (i = 0; i < msg.numents; i++) {
7278                 rnum = event_list[i].rnum;
7279                 event_list[i].revent = 0;
7280                 /* get the segment structure */
7281                 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7282                 if (seg) {
7283                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7284                             "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7285                             seg));
7286                         if (seg->s_pollevent) {
7287                                 /* consume the event */
7288                                 atomic_dec_32(&seg->s_pollevent);
7289                                 event_list[i].revent = POLLRDNORM;
7290                         }
7291                         rsmseglock_release(seg);
7292                 }
7293         }
7294 
7295         if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7296             RSM_SUCCESS) {
7297                 return (rc);
7298         }
7299 
7300         return (RSM_SUCCESS);
7301 }
7302 
7303 static int
7304 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7305 {
7306         int size;
7307         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7308 
7309         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7310 
7311 #ifdef _MULTI_DATAMODEL
7312         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7313                 rsmka_iovec32_t *iovec32, *iovec32_base;
7314                 int i;
7315 
7316                 size = count * sizeof (rsmka_iovec32_t);
7317                 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7318                 if (ddi_copyin((caddr_t)user_vec,
7319                     (caddr_t)iovec32, size, mode)) {
7320                         kmem_free(iovec32, size);
7321                         DBG_PRINTF((category, RSM_DEBUG,
7322                             "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7323                         return (RSMERR_BAD_ADDR);
7324                 }
7325 
7326                 for (i = 0; i < count; i++, iovec++, iovec32++) {
7327                         iovec->io_type = (int)iovec32->io_type;
7328                         if (iovec->io_type == RSM_HANDLE_TYPE)
7329                                 iovec->local.segid = (rsm_memseg_id_t)
7330                                     iovec32->local;
7331                         else
7332                                 iovec->local.vaddr =
7333                                     (caddr_t)(uintptr_t)iovec32->local;
7334                         iovec->local_offset = (size_t)iovec32->local_offset;
7335                         iovec->remote_offset = (size_t)iovec32->remote_offset;
7336                         iovec->transfer_len = (size_t)iovec32->transfer_len;
7337 
7338                 }
7339                 kmem_free(iovec32_base, size);
7340                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7341                     "iovec_copyin done\n"));
7342                 return (DDI_SUCCESS);
7343         }
7344 #endif
7345 
7346         size = count * sizeof (rsmka_iovec_t);
7347         if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7348                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7349                     "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7350                 return (RSMERR_BAD_ADDR);
7351         }
7352 
7353         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7354 
7355         return (DDI_SUCCESS);
7356 }
7357 
7358 
7359 static int
7360 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7361 {
7362         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7363 
7364         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7365 
7366 #ifdef _MULTI_DATAMODEL
7367         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7368                 rsmka_scat_gath32_t sg_io32;
7369 
7370                 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7371                     mode)) {
7372                         DBG_PRINTF((category, RSM_DEBUG,
7373                             "sgio_copyin done: returning EFAULT\n"));
7374                         return (RSMERR_BAD_ADDR);
7375                 }
7376                 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7377                 sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7378                 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7379                 sg_io->flags = (size_t)sg_io32.flags;
7380                 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7381                     (uintptr_t)sg_io32.remote_handle;
7382                 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7383                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7384                     "sgio_copyin done\n"));
7385                 return (DDI_SUCCESS);
7386         }
7387 #endif
7388         if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7389             mode)) {
7390                 DBG_PRINTF((category, RSM_DEBUG,
7391                     "sgio_copyin done: returning EFAULT\n"));
7392                 return (RSMERR_BAD_ADDR);
7393         }
7394         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7395         return (DDI_SUCCESS);
7396 }
7397 
7398 static int
7399 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7400 {
7401         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7402 
7403         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7404             "sgio_resid_copyout enter\n"));
7405 
7406 #ifdef _MULTI_DATAMODEL
7407         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7408                 rsmka_scat_gath32_t sg_io32;
7409 
7410                 sg_io32.io_residual_count = sg_io->io_residual_count;
7411                 sg_io32.flags = sg_io->flags;
7412 
7413                 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7414                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7415                     sizeof (uint32_t), mode)) {
7416 
7417                         DBG_PRINTF((category, RSM_ERR,
7418                             "sgio_resid_copyout error: rescnt\n"));
7419                         return (RSMERR_BAD_ADDR);
7420                 }
7421 
7422                 if (ddi_copyout((caddr_t)&sg_io32.flags,
7423                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7424                     sizeof (uint32_t), mode)) {
7425 
7426                         DBG_PRINTF((category, RSM_ERR,
7427                             "sgio_resid_copyout error: flags\n"));
7428                         return (RSMERR_BAD_ADDR);
7429                 }
7430                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7431                     "sgio_resid_copyout done\n"));
7432                 return (DDI_SUCCESS);
7433         }
7434 #endif
7435         if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7436             (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7437             sizeof (ulong_t), mode)) {
7438 
7439                 DBG_PRINTF((category, RSM_ERR,
7440                     "sgio_resid_copyout error:rescnt\n"));
7441                 return (RSMERR_BAD_ADDR);
7442         }
7443 
7444         if (ddi_copyout((caddr_t)&sg_io->flags,
7445             (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7446             sizeof (uint_t), mode)) {
7447 
7448                 DBG_PRINTF((category, RSM_ERR,
7449                     "sgio_resid_copyout error:flags\n"));
7450                 return (RSMERR_BAD_ADDR);
7451         }
7452 
7453         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7454         return (DDI_SUCCESS);
7455 }
7456 
7457 
7458 static int
7459 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7460 {
7461         rsmka_scat_gath_t       sg_io;
7462         rsmka_iovec_t           ka_iovec_arr[RSM_MAX_IOVLEN];
7463         rsmka_iovec_t           *ka_iovec;
7464         rsmka_iovec_t           *ka_iovec_start;
7465         rsmpi_scat_gath_t       rsmpi_sg_io;
7466         rsmpi_iovec_t           iovec_arr[RSM_MAX_IOVLEN];
7467         rsmpi_iovec_t           *iovec;
7468         rsmpi_iovec_t           *iovec_start = NULL;
7469         rsmapi_access_entry_t   *acl;
7470         rsmresource_t           *res;
7471         minor_t                 rnum;
7472         rsmseg_t                *im_seg, *ex_seg;
7473         int                     e;
7474         int                     error = 0;
7475         uint_t                  i;
7476         uint_t                  iov_proc = 0; /* num of iovecs processed */
7477         size_t                  size = 0;
7478         size_t                  ka_size;
7479 
7480         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7481 
7482         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7483 
7484         credp = credp;
7485 
7486         /*
7487          * Copyin the scatter/gather structure  and build new structure
7488          * for rsmpi.
7489          */
7490         e = sgio_copyin(arg, &sg_io, mode);
7491         if (e != DDI_SUCCESS) {
7492                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7493                     "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7494                 return (e);
7495         }
7496 
7497         if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7498                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7499                     "rsm_iovec_ioctl done: request_count(%d) too large\n",
7500                     sg_io.io_request_count));
7501                 return (RSMERR_BAD_SGIO);
7502         }
7503 
7504         rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7505         rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7506         rsmpi_sg_io.io_segflg = 0;
7507 
7508         /* Allocate memory and copyin io vector array  */
7509         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7510                 ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7511                 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7512         } else {
7513                 ka_iovec_start = ka_iovec = ka_iovec_arr;
7514         }
7515         e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7516             sg_io.io_request_count, mode);
7517         if (e != DDI_SUCCESS) {
7518                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7519                         kmem_free(ka_iovec, ka_size);
7520                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7521                     "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7522                 return (e);
7523         }
7524 
7525         /* get the import segment descriptor */
7526         rnum = getminor(dev);
7527         res = rsmresource_lookup(rnum, RSM_LOCK);
7528 
7529         /*
7530          * The following sequence of locking may (or MAY NOT) cause a
7531          * deadlock but this is currently not addressed here since the
7532          * implementation will be changed to incorporate the use of
7533          * reference counting for both the import and the export segments.
7534          */
7535 
7536         /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7537 
7538         im_seg = (rsmseg_t *)res;
7539 
7540         if (im_seg == NULL) {
7541                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7542                         kmem_free(ka_iovec, ka_size);
7543                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7544                     "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7545                 return (EINVAL);
7546         }
7547         /* putv/getv supported is supported only on import segments */
7548         if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7549                 rsmseglock_release(im_seg);
7550                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7551                         kmem_free(ka_iovec, ka_size);
7552                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7553                     "rsm_iovec_ioctl done: not an import segment\n"));
7554                 return (EINVAL);
7555         }
7556 
7557         /*
7558          * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7559          * as well as wait for a local DR to complete.
7560          */
7561         while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7562             (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7563             (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7564                 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7565                         DBG_PRINTF((category, RSM_DEBUG,
7566                             "rsm_iovec_ioctl done: cv_wait INTR"));
7567                         rsmseglock_release(im_seg);
7568                         return (RSMERR_INTERRUPTED);
7569                 }
7570         }
7571 
7572         if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7573             (im_seg->s_state != RSM_STATE_ACTIVE)) {
7574 
7575                 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7576                     im_seg->s_state == RSM_STATE_NEW);
7577 
7578                 DBG_PRINTF((category, RSM_DEBUG,
7579                     "rsm_iovec_ioctl done: im_seg not conn/map"));
7580                 rsmseglock_release(im_seg);
7581                 e = RSMERR_BAD_SGIO;
7582                 goto out;
7583         }
7584 
7585         im_seg->s_rdmacnt++;
7586         rsmseglock_release(im_seg);
7587 
7588         /*
7589          * Allocate and set up the io vector for rsmpi
7590          */
7591         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7592                 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7593                 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7594         } else {
7595                 iovec_start = iovec = iovec_arr;
7596         }
7597 
7598         rsmpi_sg_io.iovec = iovec;
7599         for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7600                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7601                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7602 
7603                         if (ex_seg == NULL) {
7604                                 e = RSMERR_BAD_SGIO;
7605                                 break;
7606                         }
7607                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7608 
7609                         acl = ex_seg->s_acl;
7610                         if (acl[0].ae_permission == 0) {
7611                                 struct buf *xbuf;
7612                                 dev_t sdev = 0;
7613 
7614                                 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7615                                     0, ex_seg->s_len, B_WRITE,
7616                                     sdev, 0, NULL, DDI_UMEM_SLEEP);
7617 
7618                                 ASSERT(xbuf != NULL);
7619 
7620                                 iovec->local_mem.ms_type = RSM_MEM_BUF;
7621                                 iovec->local_mem.ms_memory.bp = xbuf;
7622                         } else {
7623                                 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7624                                 iovec->local_mem.ms_memory.handle =
7625                                     ex_seg->s_handle.out;
7626                         }
7627                         ex_seg->s_rdmacnt++; /* refcnt the handle */
7628                         rsmseglock_release(ex_seg);
7629                 } else {
7630                         iovec->local_mem.ms_type = RSM_MEM_VADDR;
7631                         iovec->local_mem.ms_memory.vr.vaddr =
7632                             ka_iovec->local.vaddr;
7633                 }
7634 
7635                 iovec->local_offset = ka_iovec->local_offset;
7636                 iovec->remote_handle = im_seg->s_handle.in;
7637                 iovec->remote_offset = ka_iovec->remote_offset;
7638                 iovec->transfer_length = ka_iovec->transfer_len;
7639                 iovec++;
7640                 ka_iovec++;
7641         }
7642 
7643         if (iov_proc <  sg_io.io_request_count) {
7644                 /* error while processing handle */
7645                 rsmseglock_acquire(im_seg);
7646                 im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7647                 if (im_seg->s_rdmacnt == 0) {
7648                         cv_broadcast(&im_seg->s_cv);
7649                 }
7650                 rsmseglock_release(im_seg);
7651                 goto out;
7652         }
7653 
7654         /* call rsmpi */
7655         if (cmd == RSM_IOCTL_PUTV)
7656                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7657                     im_seg->s_adapter->rsmpi_handle,
7658                     &rsmpi_sg_io);
7659         else if (cmd == RSM_IOCTL_GETV)
7660                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7661                     im_seg->s_adapter->rsmpi_handle,
7662                     &rsmpi_sg_io);
7663         else {
7664                 e = EINVAL;
7665                 DBG_PRINTF((category, RSM_DEBUG,
7666                     "iovec_ioctl: bad command = %x\n", cmd));
7667         }
7668 
7669 
7670         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7671             "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7672 
7673         sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7674 
7675         /*
7676          * Check for implicit signal post flag and do the signal
7677          * post if needed
7678          */
7679         if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7680             e == RSM_SUCCESS) {
7681                 rsmipc_request_t request;
7682 
7683                 request.rsmipc_key = im_seg->s_segid;
7684                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7685                 request.rsmipc_segment_cookie = NULL;
7686                 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7687                 /*
7688                  * Reset the implicit signal post flag to 0 to indicate
7689                  * that the signal post has been done and need not be
7690                  * done in the RSMAPI library
7691                  */
7692                 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7693         }
7694 
7695         rsmseglock_acquire(im_seg);
7696         im_seg->s_rdmacnt--;
7697         if (im_seg->s_rdmacnt == 0) {
7698                 cv_broadcast(&im_seg->s_cv);
7699         }
7700         rsmseglock_release(im_seg);
7701         error = sgio_resid_copyout(arg, &sg_io, mode);
7702 out:
7703         iovec = iovec_start;
7704         ka_iovec = ka_iovec_start;
7705         for (i = 0; i < iov_proc; i++) {
7706                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7707                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7708 
7709                         ASSERT(ex_seg != NULL);
7710                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7711 
7712                         ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7713                         if (ex_seg->s_rdmacnt == 0) {
7714                                 cv_broadcast(&ex_seg->s_cv);
7715                         }
7716                         rsmseglock_release(ex_seg);
7717                 }
7718 
7719                 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7720 
7721                 /*
7722                  * At present there is no dependency on the existence of xbufs
7723                  * created by ddi_umem_iosetup for each of the iovecs. So we
7724                  * can these xbufs here.
7725                  */
7726                 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7727                         freerbuf(iovec->local_mem.ms_memory.bp);
7728                 }
7729 
7730                 iovec++;
7731                 ka_iovec++;
7732         }
7733 
7734         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7735                 if (iovec_start)
7736                         kmem_free(iovec_start, size);
7737                 kmem_free(ka_iovec_start, ka_size);
7738         }
7739 
7740         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7741             "rsm_iovec_ioctl done %d\n", e));
7742         /* if RSMPI call fails return that else return copyout's retval */
7743         return ((e != RSM_SUCCESS) ? e : error);
7744 
7745 }
7746 
7747 
7748 static int
7749 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7750 {
7751         adapter_t       *adapter;
7752         rsm_addr_t      addr;
7753         rsm_node_id_t   node;
7754         int             rval = DDI_SUCCESS;
7755         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7756 
7757         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7758 
7759         adapter =  rsm_getadapter(msg, mode);
7760         if (adapter == NULL) {
7761                 DBG_PRINTF((category, RSM_DEBUG,
7762                     "rsmaddr_ioctl done: adapter not found\n"));
7763                 return (RSMERR_CTLR_NOT_PRESENT);
7764         }
7765 
7766         switch (cmd) {
7767         case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7768                 /* returns the hwaddr in msg->hwaddr */
7769                 if (msg->nodeid == my_nodeid) {
7770                         msg->hwaddr = adapter->hwaddr;
7771                 } else {
7772                         addr = get_remote_hwaddr(adapter, msg->nodeid);
7773                         if ((int64_t)addr < 0) {
7774                                 rval = RSMERR_INTERNAL_ERROR;
7775                         } else {
7776                                 msg->hwaddr = addr;
7777                         }
7778                 }
7779                 break;
7780         case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7781                 /* returns the nodeid in msg->nodeid */
7782                 if (msg->hwaddr == adapter->hwaddr) {
7783                         msg->nodeid = my_nodeid;
7784                 } else {
7785                         node = get_remote_nodeid(adapter, msg->hwaddr);
7786                         if ((int)node < 0) {
7787                                 rval = RSMERR_INTERNAL_ERROR;
7788                         } else {
7789                                 msg->nodeid = (rsm_node_id_t)node;
7790                         }
7791                 }
7792                 break;
7793         default:
7794                 rval = EINVAL;
7795                 break;
7796         }
7797 
7798         rsmka_release_adapter(adapter);
7799         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7800             "rsmaddr_ioctl done: %d\n", rval));
7801         return (rval);
7802 }
7803 
7804 static int
7805 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7806 {
7807         DBG_DEFINE(category,
7808             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7809 
7810         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7811 
7812 #ifdef _MULTI_DATAMODEL
7813 
7814         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7815                 rsm_ioctlmsg32_t msg32;
7816                 int i;
7817 
7818                 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7819                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7820                             "rsm_ddi_copyin done: EFAULT\n"));
7821                         return (RSMERR_BAD_ADDR);
7822                 }
7823                 msg->len = msg32.len;
7824                 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7825                 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7826                 msg->key = msg32.key;
7827                 msg->acl_len = msg32.acl_len;
7828                 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7829                 msg->cnum = msg32.cnum;
7830                 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7831                 msg->cname_len = msg32.cname_len;
7832                 msg->nodeid = msg32.nodeid;
7833                 msg->hwaddr = msg32.hwaddr;
7834                 msg->perm = msg32.perm;
7835                 for (i = 0; i < 4; i++) {
7836                         msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7837                 }
7838                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7839                     "rsm_ddi_copyin done\n"));
7840                 return (RSM_SUCCESS);
7841         }
7842 #endif
7843         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7844         if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7845                 return (RSMERR_BAD_ADDR);
7846         else
7847                 return (RSM_SUCCESS);
7848 }
7849 
7850 static int
7851 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7852 {
7853         rsmka_int_controller_attr_t     rsm_cattr;
7854         DBG_DEFINE(category,
7855             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7856 
7857         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7858             "rsmattr_ddi_copyout enter\n"));
7859         /*
7860          * need to copy appropriate data from rsm_controller_attr_t
7861          * to rsmka_int_controller_attr_t
7862          */
7863 #ifdef  _MULTI_DATAMODEL
7864         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7865                 rsmka_int_controller_attr32_t rsm_cattr32;
7866 
7867                 rsm_cattr32.attr_direct_access_sizes =
7868                     adapter->rsm_attr.attr_direct_access_sizes;
7869                 rsm_cattr32.attr_atomic_sizes =
7870                     adapter->rsm_attr.attr_atomic_sizes;
7871                 rsm_cattr32.attr_page_size =
7872                     adapter->rsm_attr.attr_page_size;
7873                 if (adapter->rsm_attr.attr_max_export_segment_size >
7874                     UINT_MAX)
7875                         rsm_cattr32.attr_max_export_segment_size =
7876                             RSM_MAXSZ_PAGE_ALIGNED;
7877                 else
7878                         rsm_cattr32.attr_max_export_segment_size =
7879                             adapter->rsm_attr.attr_max_export_segment_size;
7880                 if (adapter->rsm_attr.attr_tot_export_segment_size >
7881                     UINT_MAX)
7882                         rsm_cattr32.attr_tot_export_segment_size =
7883                             RSM_MAXSZ_PAGE_ALIGNED;
7884                 else
7885                         rsm_cattr32.attr_tot_export_segment_size =
7886                             adapter->rsm_attr.attr_tot_export_segment_size;
7887                 if (adapter->rsm_attr.attr_max_export_segments >
7888                     UINT_MAX)
7889                         rsm_cattr32.attr_max_export_segments =
7890                             UINT_MAX;
7891                 else
7892                         rsm_cattr32.attr_max_export_segments =
7893                             adapter->rsm_attr.attr_max_export_segments;
7894                 if (adapter->rsm_attr.attr_max_import_map_size >
7895                     UINT_MAX)
7896                         rsm_cattr32.attr_max_import_map_size =
7897                             RSM_MAXSZ_PAGE_ALIGNED;
7898                 else
7899                         rsm_cattr32.attr_max_import_map_size =
7900                             adapter->rsm_attr.attr_max_import_map_size;
7901                 if (adapter->rsm_attr.attr_tot_import_map_size >
7902                     UINT_MAX)
7903                         rsm_cattr32.attr_tot_import_map_size =
7904                             RSM_MAXSZ_PAGE_ALIGNED;
7905                 else
7906                         rsm_cattr32.attr_tot_import_map_size =
7907                             adapter->rsm_attr.attr_tot_import_map_size;
7908                 if (adapter->rsm_attr.attr_max_import_segments >
7909                     UINT_MAX)
7910                         rsm_cattr32.attr_max_import_segments =
7911                             UINT_MAX;
7912                 else
7913                         rsm_cattr32.attr_max_import_segments =
7914                             adapter->rsm_attr.attr_max_import_segments;
7915                 rsm_cattr32.attr_controller_addr =
7916                     adapter->rsm_attr.attr_controller_addr;
7917 
7918                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7919                     "rsmattr_ddi_copyout done\n"));
7920                 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7921                     sizeof (rsmka_int_controller_attr32_t), mode)) {
7922                         return (RSMERR_BAD_ADDR);
7923                 }
7924                 else
7925                         return (RSM_SUCCESS);
7926         }
7927 #endif
7928         rsm_cattr.attr_direct_access_sizes =
7929             adapter->rsm_attr.attr_direct_access_sizes;
7930         rsm_cattr.attr_atomic_sizes =
7931             adapter->rsm_attr.attr_atomic_sizes;
7932         rsm_cattr.attr_page_size =
7933             adapter->rsm_attr.attr_page_size;
7934         rsm_cattr.attr_max_export_segment_size =
7935             adapter->rsm_attr.attr_max_export_segment_size;
7936         rsm_cattr.attr_tot_export_segment_size =
7937             adapter->rsm_attr.attr_tot_export_segment_size;
7938         rsm_cattr.attr_max_export_segments =
7939             adapter->rsm_attr.attr_max_export_segments;
7940         rsm_cattr.attr_max_import_map_size =
7941             adapter->rsm_attr.attr_max_import_map_size;
7942         rsm_cattr.attr_tot_import_map_size =
7943             adapter->rsm_attr.attr_tot_import_map_size;
7944         rsm_cattr.attr_max_import_segments =
7945             adapter->rsm_attr.attr_max_import_segments;
7946         rsm_cattr.attr_controller_addr =
7947             adapter->rsm_attr.attr_controller_addr;
7948         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7949             "rsmattr_ddi_copyout done\n"));
7950         if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7951             sizeof (rsmka_int_controller_attr_t), mode)) {
7952                 return (RSMERR_BAD_ADDR);
7953         }
7954         else
7955                 return (RSM_SUCCESS);
7956 }
7957 
7958 /*ARGSUSED*/
7959 static int
7960 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7961     int *rvalp)
7962 {
7963         rsmseg_t *seg;
7964         rsmresource_t   *res;
7965         minor_t         rnum;
7966         rsm_ioctlmsg_t msg = {0};
7967         int error;
7968         adapter_t *adapter;
7969         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7970 
7971         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7972 
7973         if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7974                 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7975                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7976                     "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7977                 return (error);
7978         }
7979 
7980         /* topology cmd does not use the arg common to other cmds */
7981         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7982                 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7983                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7984                     "rsm_ioctl done: %d\n", error));
7985                 return (error);
7986         }
7987 
7988         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7989                 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7990                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7991                     "rsm_ioctl done: %d\n", error));
7992                 return (error);
7993         }
7994 
7995         /*
7996          * try to load arguments
7997          */
7998         if (cmd != RSM_IOCTL_RING_BELL &&
7999             rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
8000                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8001                     "rsm_ioctl done: EFAULT\n"));
8002                 return (RSMERR_BAD_ADDR);
8003         }
8004 
8005         if (cmd == RSM_IOCTL_ATTR) {
8006                 adapter =  rsm_getadapter(&msg, mode);
8007                 if (adapter == NULL) {
8008                         DBG_PRINTF((category, RSM_DEBUG,
8009                             "rsm_ioctl done: ENODEV\n"));
8010                         return (RSMERR_CTLR_NOT_PRESENT);
8011                 }
8012                 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8013                 rsmka_release_adapter(adapter);
8014                 DBG_PRINTF((category, RSM_DEBUG,
8015                     "rsm_ioctl:after copyout %d\n", error));
8016                 return (error);
8017         }
8018 
8019         if (cmd == RSM_IOCTL_BAR_INFO) {
8020                 /* Return library off,len of barrier page */
8021                 msg.off = barrier_offset;
8022                 msg.len = (int)barrier_size;
8023 #ifdef _MULTI_DATAMODEL
8024                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8025                         rsm_ioctlmsg32_t msg32;
8026 
8027                         if (msg.len > UINT_MAX)
8028                                 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8029                         else
8030                                 msg32.len = (int32_t)msg.len;
8031                         msg32.off = (int32_t)msg.off;
8032                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8033                             "rsm_ioctl done\n"));
8034                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8035                             sizeof (msg32), mode))
8036                                 return (RSMERR_BAD_ADDR);
8037                         else
8038                                 return (RSM_SUCCESS);
8039                 }
8040 #endif
8041                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8042                     "rsm_ioctl done\n"));
8043                 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8044                     sizeof (msg), mode))
8045                         return (RSMERR_BAD_ADDR);
8046                 else
8047                         return (RSM_SUCCESS);
8048         }
8049 
8050         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8051                 /* map the nodeid or hwaddr */
8052                 error = rsmaddr_ioctl(cmd, &msg, mode);
8053                 if (error == RSM_SUCCESS) {
8054 #ifdef _MULTI_DATAMODEL
8055                         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8056                                 rsm_ioctlmsg32_t msg32;
8057 
8058                                 msg32.hwaddr = (uint64_t)msg.hwaddr;
8059                                 msg32.nodeid = (uint32_t)msg.nodeid;
8060 
8061                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8062                                     "rsm_ioctl done\n"));
8063                                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8064                                     sizeof (msg32), mode))
8065                                         return (RSMERR_BAD_ADDR);
8066                                 else
8067                                         return (RSM_SUCCESS);
8068                         }
8069 #endif
8070                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8071                             "rsm_ioctl done\n"));
8072                         if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8073                             sizeof (msg), mode))
8074                                 return (RSMERR_BAD_ADDR);
8075                         else
8076                                 return (RSM_SUCCESS);
8077                 }
8078                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8079                     "rsm_ioctl done: %d\n", error));
8080                 return (error);
8081         }
8082 
8083         /* Find resource and look it in read mode */
8084         rnum = getminor(dev);
8085         res = rsmresource_lookup(rnum, RSM_NOLOCK);
8086         ASSERT(res != NULL);
8087 
8088         /*
8089          * Find command group
8090          */
8091         switch (RSM_IOCTL_CMDGRP(cmd)) {
8092         case RSM_IOCTL_EXPORT_SEG:
8093                 /*
8094                  * Export list is searched during publish, loopback and
8095                  * remote lookup call.
8096                  */
8097                 seg = rsmresource_seg(res, rnum, credp,
8098                     RSM_RESOURCE_EXPORT_SEGMENT);
8099                 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8100                         error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8101                             credp);
8102                 } else { /* export ioctl on an import/barrier resource */
8103                         error = RSMERR_BAD_SEG_HNDL;
8104                 }
8105                 break;
8106         case RSM_IOCTL_IMPORT_SEG:
8107                 /* Import list is searched during remote unmap call. */
8108                 seg = rsmresource_seg(res, rnum, credp,
8109                     RSM_RESOURCE_IMPORT_SEGMENT);
8110                 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8111                         error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8112                             credp);
8113                 } else  { /* import ioctl on an export/barrier resource */
8114                         error = RSMERR_BAD_SEG_HNDL;
8115                 }
8116                 break;
8117         case RSM_IOCTL_BAR:
8118                 if (res != RSMRC_RESERVED &&
8119                     res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8120                         error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8121                             mode);
8122                 } else { /* invalid res value */
8123                         error = RSMERR_BAD_SEG_HNDL;
8124                 }
8125                 break;
8126         case RSM_IOCTL_BELL:
8127                 if (res != RSMRC_RESERVED) {
8128                         if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8129                                 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8130                         else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8131                                 error = importbell_ioctl((rsmseg_t *)res, cmd);
8132                         else /* RSM_RESOURCE_BAR */
8133                                 error = RSMERR_BAD_SEG_HNDL;
8134                 } else { /* invalid res value */
8135                         error = RSMERR_BAD_SEG_HNDL;
8136                 }
8137                 break;
8138         default:
8139                 error = EINVAL;
8140         }
8141 
8142         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8143             error));
8144         return (error);
8145 }
8146 
8147 
8148 /* **************************** Segment Mapping Operations ********* */
8149 static rsm_mapinfo_t *
8150 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8151     size_t *map_len)
8152 {
8153         rsm_mapinfo_t   *p;
8154         /*
8155          * Find the correct mapinfo structure to use during the mapping
8156          * from the seg->s_mapinfo list.
8157          * The seg->s_mapinfo list contains in reverse order the mappings
8158          * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8159          * access the correct entry within this list for the mapping
8160          * requested.
8161          *
8162          * The algorithm for selecting a list entry is as follows:
8163          *
8164          * When start_offset of an entry <= off we have found the entry
8165          * we were looking for. Adjust the dev_offset and map_len (needs
8166          * to be PAGESIZE aligned).
8167          */
8168         p = seg->s_mapinfo;
8169         for (; p; p = p->next) {
8170                 if (p->start_offset <= off) {
8171                         *dev_offset = p->dev_offset + off - p->start_offset;
8172                         *map_len = (len > p->individual_len) ?
8173                             p->individual_len : ptob(btopr(len));
8174                         return (p);
8175                 }
8176                 p = p->next;
8177         }
8178 
8179         return (NULL);
8180 }
8181 
8182 static void
8183 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8184 {
8185         rsm_mapinfo_t *p;
8186 
8187         while (mapinfo != NULL) {
8188                 p = mapinfo;
8189                 mapinfo = mapinfo->next;
8190                 kmem_free(p, sizeof (*p));
8191         }
8192 }
8193 
8194 static int
8195 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8196     size_t len, void **pvtp)
8197 {
8198         rsmcookie_t     *p;
8199         rsmresource_t   *res;
8200         rsmseg_t        *seg;
8201         minor_t rnum;
8202         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8203 
8204         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8205 
8206         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8207             "rsmmap_map: dhp = %x\n", dhp));
8208 
8209         flags = flags;
8210 
8211         rnum = getminor(dev);
8212         res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8213         ASSERT(res != NULL);
8214 
8215         seg = (rsmseg_t *)res;
8216 
8217         rsmseglock_acquire(seg);
8218 
8219         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8220 
8221         /*
8222          * Allocate structure and add cookie to segment list
8223          */
8224         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8225 
8226         p->c_dhp = dhp;
8227         p->c_off = off;
8228         p->c_len = len;
8229         p->c_next = seg->s_ckl;
8230         seg->s_ckl = p;
8231 
8232         *pvtp = (void *)seg;
8233 
8234         rsmseglock_release(seg);
8235 
8236         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8237         return (DDI_SUCCESS);
8238 }
8239 
8240 /*
8241  * Page fault handling is done here. The prerequisite mapping setup
8242  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8243  * ddi_umem_setup
8244  */
8245 static int
8246 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8247     uint_t type, uint_t rw)
8248 {
8249         int e;
8250         rsmseg_t *seg = (rsmseg_t *)pvt;
8251         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8252 
8253         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8254 
8255         rsmseglock_acquire(seg);
8256 
8257         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8258 
8259         while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8260                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8261                         DBG_PRINTF((category, RSM_DEBUG,
8262                             "rsmmap_access done: cv_wait INTR"));
8263                         rsmseglock_release(seg);
8264                         return (RSMERR_INTERRUPTED);
8265                 }
8266         }
8267 
8268         ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8269             seg->s_state == RSM_STATE_ACTIVE);
8270 
8271         if (seg->s_state == RSM_STATE_DISCONNECT)
8272                 seg->s_flags |= RSM_IMPORT_DUMMY;
8273 
8274         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8275             "rsmmap_access: dhp = %x\n", dhp));
8276 
8277         rsmseglock_release(seg);
8278 
8279         if (e = devmap_load(dhp, offset, len, type, rw)) {
8280                 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8281         }
8282 
8283 
8284         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8285 
8286         return (e);
8287 }
8288 
8289 static int
8290 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8291         void **newpvt)
8292 {
8293         rsmseg_t        *seg = (rsmseg_t *)oldpvt;
8294         rsmcookie_t     *p, *old;
8295         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8296 
8297         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8298 
8299         /*
8300          * Same as map, create an entry to hold cookie and add it to
8301          * connect segment list. The oldpvt is a pointer to segment.
8302          * Return segment pointer in newpvt.
8303          */
8304         rsmseglock_acquire(seg);
8305 
8306         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8307 
8308         /*
8309          * Find old cookie
8310          */
8311         for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8312                 if (old->c_dhp == dhp) {
8313                         break;
8314                 }
8315         }
8316         if (old == NULL) {
8317                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8318                     "rsmmap_dup done: EINVAL\n"));
8319                 rsmseglock_release(seg);
8320                 return (EINVAL);
8321         }
8322 
8323         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8324 
8325         p->c_dhp = new_dhp;
8326         p->c_off = old->c_off;
8327         p->c_len = old->c_len;
8328         p->c_next = seg->s_ckl;
8329         seg->s_ckl = p;
8330 
8331         *newpvt = (void *)seg;
8332 
8333         rsmseglock_release(seg);
8334 
8335         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8336 
8337         return (DDI_SUCCESS);
8338 }
8339 
8340 static void
8341 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8342         devmap_cookie_t new_dhp1, void **pvtp1,
8343         devmap_cookie_t new_dhp2, void **pvtp2)
8344 {
8345         /*
8346          * Remove pvtp structure from segment list.
8347          */
8348         rsmseg_t        *seg = (rsmseg_t *)pvtp;
8349         int freeflag;
8350 
8351         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8352 
8353         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8354 
8355         off = off; len = len;
8356         pvtp1 = pvtp1; pvtp2 = pvtp2;
8357 
8358         rsmseglock_acquire(seg);
8359 
8360         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8361 
8362         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8363             "rsmmap_unmap: dhp = %x\n", dhp));
8364         /*
8365          * We can go ahead and remove the dhps even if we are in
8366          * the MAPPING state because the dhps being removed here
8367          * belong to a different mmap and we are holding the segment
8368          * lock.
8369          */
8370         if (new_dhp1 == NULL && new_dhp2 == NULL) {
8371                 /* find and remove dhp handle */
8372                 rsmcookie_t *tmp, **back = &seg->s_ckl;
8373 
8374                 while (*back != NULL) {
8375                         tmp = *back;
8376                         if (tmp->c_dhp == dhp) {
8377                                 *back = tmp->c_next;
8378                                 kmem_free(tmp, sizeof (*tmp));
8379                                 break;
8380                         }
8381                         back = &tmp->c_next;
8382                 }
8383         } else {
8384                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8385                     "rsmmap_unmap:parital unmap"
8386                     "new_dhp1 %lx, new_dhp2 %lx\n",
8387                     (size_t)new_dhp1, (size_t)new_dhp2));
8388         }
8389 
8390         /*
8391          * rsmmap_unmap is called for each mapping cookie on the list.
8392          * When the list becomes empty and we are not in the MAPPING
8393          * state then unmap in the rsmpi driver.
8394          */
8395         if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8396                 (void) rsm_unmap(seg);
8397 
8398         if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8399                 freeflag = 1;
8400         } else {
8401                 freeflag = 0;
8402         }
8403 
8404         rsmseglock_release(seg);
8405 
8406         if (freeflag) {
8407                 /* Free the segment structure */
8408                 rsmseg_free(seg);
8409         }
8410         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8411 
8412 }
8413 
8414 static struct devmap_callback_ctl rsmmap_ops = {
8415         DEVMAP_OPS_REV, /* devmap_ops version number    */
8416         rsmmap_map,     /* devmap_ops map routine */
8417         rsmmap_access,  /* devmap_ops access routine */
8418         rsmmap_dup,             /* devmap_ops dup routine               */
8419         rsmmap_unmap,   /* devmap_ops unmap routine */
8420 };
8421 
8422 static int
8423 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8424     size_t *maplen, uint_t model /*ARGSUSED*/)
8425 {
8426         struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8427         int             err;
8428         uint_t          maxprot;
8429         minor_t         rnum;
8430         rsmseg_t        *seg;
8431         off_t           dev_offset;
8432         size_t          cur_len;
8433         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8434 
8435         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8436 
8437         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8438             "rsm_devmap: off = %lx, len = %lx\n", off, len));
8439         rnum = getminor(dev);
8440         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8441         ASSERT(seg != NULL);
8442 
8443         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8444                 if ((off == barrier_offset) &&
8445                     (len == barrier_size)) {
8446 
8447                         ASSERT(bar_va != NULL && bar_cookie != NULL);
8448 
8449                         /*
8450                          * The offset argument in devmap_umem_setup represents
8451                          * the offset within the kernel memory defined by the
8452                          * cookie. We use this offset as barrier_offset.
8453                          */
8454                         err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8455                             barrier_offset, len, PROT_USER|PROT_READ,
8456                             DEVMAP_DEFAULTS, 0);
8457 
8458                         if (err != 0) {
8459                                 DBG_PRINTF((category, RSM_ERR,
8460                                     "rsm_devmap done: %d\n", err));
8461                                 return (RSMERR_MAP_FAILED);
8462                         }
8463                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8464                             "rsm_devmap done: %d\n", err));
8465 
8466                         *maplen = barrier_size;
8467 
8468                         return (err);
8469                 } else {
8470                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8471                             "rsm_devmap done: %d\n", err));
8472                         return (RSMERR_MAP_FAILED);
8473                 }
8474         }
8475 
8476         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8477         ASSERT(seg->s_state == RSM_STATE_MAPPING);
8478 
8479         /*
8480          * Make sure we still have permission for the map operation.
8481          */
8482         maxprot = PROT_USER;
8483         if (seg->s_mode & RSM_PERM_READ) {
8484                 maxprot |= PROT_READ;
8485         }
8486 
8487         if (seg->s_mode & RSM_PERM_WRITE) {
8488                 maxprot |= PROT_WRITE;
8489         }
8490 
8491         /*
8492          * For each devmap call, rsmmap_map is called. This maintains driver
8493          * private information for the mapping. Thus, if there are multiple
8494          * devmap calls there will be multiple rsmmap_map calls and for each
8495          * call, the mapping information will be stored.
8496          * In case of an error during the processing of the devmap call, error
8497          * will be returned. This error return causes the caller of rsm_devmap
8498          * to undo all the mappings by calling rsmmap_unmap for each one.
8499          * rsmmap_unmap will free up the private information for the requested
8500          * mapping.
8501          */
8502         if (seg->s_node != my_nodeid) {
8503                 rsm_mapinfo_t *p;
8504 
8505                 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8506                 if (p == NULL) {
8507                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8508                             "rsm_devmap: incorrect mapping info\n"));
8509                         return (RSMERR_MAP_FAILED);
8510                 }
8511                 err = devmap_devmem_setup(dhc, p->dip,
8512                     callbackops, p->dev_register,
8513                     dev_offset, cur_len, maxprot,
8514                     DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8515 
8516                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8517                     "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8518                     "off=%lx,len=%lx\n",
8519                     p->dip, p->dev_register, dev_offset, off, cur_len));
8520 
8521                 if (err != 0) {
8522                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8523                             "rsm_devmap: devmap_devmem_setup failed %d\n",
8524                             err));
8525                         return (RSMERR_MAP_FAILED);
8526                 }
8527                 /* cur_len is always an integral multiple pagesize */
8528                 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8529                 *maplen = cur_len;
8530                 return (err);
8531 
8532         } else {
8533                 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8534                     seg->s_cookie, off, len, maxprot,
8535                     DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8536                 if (err != 0) {
8537                         DBG_PRINTF((category, RSM_DEBUG,
8538                             "rsm_devmap: devmap_umem_setup failed %d\n",
8539                             err));
8540                         return (RSMERR_MAP_FAILED);
8541                 }
8542                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8543                     "rsm_devmap: loopback done\n"));
8544 
8545                 *maplen = ptob(btopr(len));
8546 
8547                 return (err);
8548         }
8549 }
8550 
8551 /*
8552  * We can use the devmap framework for mapping device memory to user space by
8553  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8554  * processing calls this entry point and devmap_setup is called within this
8555  * function, which eventually calls rsm_devmap
8556  */
8557 static int
8558 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8559     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8560 {
8561         int                     error = 0;
8562         int                     old_state;
8563         minor_t                 rnum;
8564         rsmseg_t                *seg, *eseg;
8565         adapter_t               *adapter;
8566         rsm_import_share_t      *sharedp;
8567         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8568 
8569         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8570 
8571         /*
8572          * find segment
8573          */
8574         rnum = getminor(dev);
8575         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8576 
8577         if (seg == NULL) {
8578                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8579                     "rsm_segmap done: invalid segment\n"));
8580                 return (EINVAL);
8581         }
8582 
8583         /*
8584          * the user is trying to map a resource that has not been
8585          * defined yet. The library uses this to map in the
8586          * barrier page.
8587          */
8588         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8589                 rsmseglock_release(seg);
8590 
8591                 /*
8592                  * The mapping for the barrier page is identified
8593                  * by the special offset barrier_offset
8594                  */
8595 
8596                 if (off == (off_t)barrier_offset ||
8597                     len == (off_t)barrier_size) {
8598                         if (bar_cookie == NULL || bar_va == NULL) {
8599                                 DBG_PRINTF((category, RSM_DEBUG,
8600                                     "rsm_segmap: bar cookie/va is NULL\n"));
8601                                 return (EINVAL);
8602                         }
8603 
8604                         error = devmap_setup(dev, (offset_t)off, as, addrp,
8605                             (size_t)len, prot, maxprot, flags,  cred);
8606 
8607                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8608                             "rsm_segmap done: %d\n", error));
8609                         return (error);
8610                 } else {
8611                         DBG_PRINTF((category, RSM_DEBUG,
8612                             "rsm_segmap: bad offset/length\n"));
8613                         return (EINVAL);
8614                 }
8615         }
8616 
8617         /* Make sure you can only map imported segments */
8618         if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8619                 rsmseglock_release(seg);
8620                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8621                     "rsm_segmap done: not an import segment\n"));
8622                 return (EINVAL);
8623         }
8624         /* check means library is broken */
8625         ASSERT(seg->s_hdr.rsmrc_num == rnum);
8626 
8627         /* wait for the segment to become unquiesced */
8628         while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8629                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8630                         rsmseglock_release(seg);
8631                         DBG_PRINTF((category, RSM_DEBUG,
8632                             "rsm_segmap done: cv_wait INTR"));
8633                         return (ENODEV);
8634                 }
8635         }
8636 
8637         /* wait until segment leaves the mapping state */
8638         while (seg->s_state == RSM_STATE_MAPPING)
8639                 cv_wait(&seg->s_cv, &seg->s_lock);
8640 
8641         /*
8642          * we allow multiple maps of the same segment in the KA
8643          * and it works because we do an rsmpi map of the whole
8644          * segment during the first map and all the device mapping
8645          * information needed in rsm_devmap is in the mapinfo list.
8646          */
8647         if ((seg->s_state != RSM_STATE_CONNECT) &&
8648             (seg->s_state != RSM_STATE_ACTIVE)) {
8649                 rsmseglock_release(seg);
8650                 DBG_PRINTF((category, RSM_DEBUG,
8651                     "rsm_segmap done: segment not connected\n"));
8652                 return (ENODEV);
8653         }
8654 
8655         /*
8656          * Make sure we are not mapping a larger segment than what's
8657          * exported
8658          */
8659         if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8660                 rsmseglock_release(seg);
8661                 DBG_PRINTF((category, RSM_DEBUG,
8662                     "rsm_segmap done: off+len>seg size\n"));
8663                 return (ENXIO);
8664         }
8665 
8666         /*
8667          * Make sure we still have permission for the map operation.
8668          */
8669         maxprot = PROT_USER;
8670         if (seg->s_mode & RSM_PERM_READ) {
8671                 maxprot |= PROT_READ;
8672         }
8673 
8674         if (seg->s_mode & RSM_PERM_WRITE) {
8675                 maxprot |= PROT_WRITE;
8676         }
8677 
8678         if ((prot & maxprot) != prot) {
8679                 /* No permission */
8680                 rsmseglock_release(seg);
8681                 DBG_PRINTF((category, RSM_DEBUG,
8682                     "rsm_segmap done: no permission\n"));
8683                 return (EACCES);
8684         }
8685 
8686         old_state = seg->s_state;
8687 
8688         ASSERT(seg->s_share != NULL);
8689 
8690         rsmsharelock_acquire(seg);
8691 
8692         sharedp = seg->s_share;
8693 
8694         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8695             "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8696 
8697         if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8698             (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8699                 rsmsharelock_release(seg);
8700                 rsmseglock_release(seg);
8701                 DBG_PRINTF((category, RSM_DEBUG,
8702                     "rsm_segmap done:RSMSI_STATE %d invalid\n",
8703                     sharedp->rsmsi_state));
8704                 return (ENODEV);
8705         }
8706 
8707         /*
8708          * Do the map - since we want importers to share mappings
8709          * we do the rsmpi map for the whole segment
8710          */
8711         if (seg->s_node != my_nodeid) {
8712                 uint_t dev_register;
8713                 off_t dev_offset;
8714                 dev_info_t *dip;
8715                 size_t tmp_len;
8716                 size_t total_length_mapped = 0;
8717                 size_t length_to_map = seg->s_len;
8718                 off_t tmp_off = 0;
8719                 rsm_mapinfo_t *p;
8720 
8721                 /*
8722                  * length_to_map = seg->s_len is always an integral
8723                  * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8724                  * list is a multiple of PAGESIZE - RSMPI map ensures this
8725                  */
8726 
8727                 adapter = seg->s_adapter;
8728                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8729                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8730 
8731                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8732                         error = 0;
8733                         /* map the whole segment */
8734                         while (total_length_mapped < seg->s_len) {
8735                                 tmp_len = 0;
8736 
8737                                 error = adapter->rsmpi_ops->rsm_map(
8738                                     seg->s_handle.in, tmp_off,
8739                                     length_to_map, &tmp_len,
8740                                     &dip, &dev_register, &dev_offset,
8741                                     NULL, NULL);
8742 
8743                                 if (error != 0)
8744                                         break;
8745 
8746                                 /*
8747                                  * Store the mapping info obtained from rsm_map
8748                                  */
8749                                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8750                                 p->dev_register = dev_register;
8751                                 p->dev_offset = dev_offset;
8752                                 p->dip = dip;
8753                                 p->individual_len = tmp_len;
8754                                 p->start_offset = tmp_off;
8755                                 p->next = sharedp->rsmsi_mapinfo;
8756                                 sharedp->rsmsi_mapinfo = p;
8757 
8758                                 total_length_mapped += tmp_len;
8759                                 length_to_map -= tmp_len;
8760                                 tmp_off += tmp_len;
8761                         }
8762                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8763 
8764                         if (error != RSM_SUCCESS) {
8765                                 /* Check if this is the the first rsm_map */
8766                                 if (sharedp->rsmsi_mapinfo != NULL) {
8767                                         /*
8768                                          * A single rsm_unmap undoes
8769                                          * multiple rsm_maps.
8770                                          */
8771                                         (void) seg->s_adapter->rsmpi_ops->
8772                                             rsm_unmap(sharedp->rsmsi_handle);
8773                                         rsm_free_mapinfo(sharedp->
8774                                             rsmsi_mapinfo);
8775                                 }
8776                                 sharedp->rsmsi_mapinfo = NULL;
8777                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8778                                 rsmsharelock_release(seg);
8779                                 rsmseglock_release(seg);
8780                                 DBG_PRINTF((category, RSM_DEBUG,
8781                                     "rsm_segmap done: rsmpi map err %d\n",
8782                                     error));
8783                                 ASSERT(error != RSMERR_BAD_LENGTH &&
8784                                     error != RSMERR_BAD_MEM_ALIGNMENT &&
8785                                     error != RSMERR_BAD_SEG_HNDL);
8786                                 if (error == RSMERR_UNSUPPORTED_OPERATION)
8787                                         return (ENOTSUP);
8788                                 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8789                                         return (EAGAIN);
8790                                 else if (error == RSMERR_CONN_ABORTED)
8791                                         return (ENODEV);
8792                                 else
8793                                         return (error);
8794                         } else {
8795                                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8796                         }
8797                 } else {
8798                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8799                 }
8800 
8801                 sharedp->rsmsi_mapcnt++;
8802 
8803                 rsmsharelock_release(seg);
8804 
8805                 /* move to an intermediate mapping state */
8806                 seg->s_state = RSM_STATE_MAPPING;
8807                 rsmseglock_release(seg);
8808 
8809                 error = devmap_setup(dev, (offset_t)off, as, addrp,
8810                     len, prot, maxprot, flags, cred);
8811 
8812                 rsmseglock_acquire(seg);
8813                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8814 
8815                 if (error == DDI_SUCCESS) {
8816                         seg->s_state = RSM_STATE_ACTIVE;
8817                 } else {
8818                         rsmsharelock_acquire(seg);
8819 
8820                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8821 
8822                         sharedp->rsmsi_mapcnt--;
8823                         if (sharedp->rsmsi_mapcnt == 0) {
8824                                 /* unmap the shared RSMPI mapping */
8825                                 ASSERT(sharedp->rsmsi_handle != NULL);
8826                                 (void) adapter->rsmpi_ops->
8827                                     rsm_unmap(sharedp->rsmsi_handle);
8828                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8829                                 sharedp->rsmsi_mapinfo = NULL;
8830                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8831                         }
8832 
8833                         rsmsharelock_release(seg);
8834                         seg->s_state = old_state;
8835                         DBG_PRINTF((category, RSM_ERR,
8836                             "rsm: devmap_setup failed %d\n", error));
8837                 }
8838                 cv_broadcast(&seg->s_cv);
8839                 rsmseglock_release(seg);
8840                 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8841                     error));
8842                 return (error);
8843         } else {
8844                 /*
8845                  * For loopback, the export segment mapping cookie (s_cookie)
8846                  * is also used as the s_cookie value for its import segments
8847                  * during mapping.
8848                  * Note that reference counting for s_cookie of the export
8849                  * segment is not required due to the following:
8850                  * We never have a case of the export segment being destroyed,
8851                  * leaving the import segments with a stale value for the
8852                  * s_cookie field, since a force disconnect is done prior to a
8853                  * destroy of an export segment. The force disconnect causes
8854                  * the s_cookie value to be reset to NULL. Also for the
8855                  * rsm_rebind operation, we change the s_cookie value of the
8856                  * export segment as well as of all its local (loopback)
8857                  * importers.
8858                  */
8859                 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8860 
8861                 rsmsharelock_release(seg);
8862                 /*
8863                  * In order to maintain the lock ordering between the export
8864                  * and import segment locks, we need to acquire the export
8865                  * segment lock first and only then acquire the import
8866                  * segment lock.
8867                  * The above is necessary to avoid any deadlock scenarios
8868                  * with rsm_rebind which also acquires both the export
8869                  * and import segment locks in the above mentioned order.
8870                  * Based on code inspection, there seem to be no other
8871                  * situations in which both the export and import segment
8872                  * locks are acquired either in the same or opposite order
8873                  * as mentioned above.
8874                  * Thus in order to conform to the above lock order, we
8875                  * need to change the state of the import segment to
8876                  * RSM_STATE_MAPPING, release the lock. Once this is done we
8877                  * can now safely acquire the export segment lock first
8878                  * followed by the import segment lock which is as per
8879                  * the lock order mentioned above.
8880                  */
8881                 /* move to an intermediate mapping state */
8882                 seg->s_state = RSM_STATE_MAPPING;
8883                 rsmseglock_release(seg);
8884 
8885                 eseg = rsmexport_lookup(seg->s_key);
8886 
8887                 if (eseg == NULL) {
8888                         rsmseglock_acquire(seg);
8889                         /*
8890                          * Revert to old_state and signal any waiters
8891                          * The shared state is not changed
8892                          */
8893 
8894                         seg->s_state = old_state;
8895                         cv_broadcast(&seg->s_cv);
8896                         rsmseglock_release(seg);
8897                         DBG_PRINTF((category, RSM_DEBUG,
8898                             "rsm_segmap done: key %d not found\n", seg->s_key));
8899                         return (ENODEV);
8900                 }
8901 
8902                 rsmsharelock_acquire(seg);
8903                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8904                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8905 
8906                 sharedp->rsmsi_mapcnt++;
8907                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8908                 rsmsharelock_release(seg);
8909 
8910                 ASSERT(eseg->s_cookie != NULL);
8911 
8912                 /*
8913                  * It is not required or necessary to acquire the import
8914                  * segment lock here to change the value of s_cookie since
8915                  * no one will touch the import segment as long as it is
8916                  * in the RSM_STATE_MAPPING state.
8917                  */
8918                 seg->s_cookie = eseg->s_cookie;
8919 
8920                 rsmseglock_release(eseg);
8921 
8922                 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8923                     prot, maxprot, flags, cred);
8924 
8925                 rsmseglock_acquire(seg);
8926                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8927                 if (error == 0) {
8928                         seg->s_state = RSM_STATE_ACTIVE;
8929                 } else {
8930                         rsmsharelock_acquire(seg);
8931 
8932                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8933 
8934                         sharedp->rsmsi_mapcnt--;
8935                         if (sharedp->rsmsi_mapcnt == 0) {
8936                                 sharedp->rsmsi_mapinfo = NULL;
8937                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8938                         }
8939                         rsmsharelock_release(seg);
8940                         seg->s_state = old_state;
8941                         seg->s_cookie = NULL;
8942                 }
8943                 cv_broadcast(&seg->s_cv);
8944                 rsmseglock_release(seg);
8945                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8946                     "rsm_segmap done: %d\n", error));
8947                 return (error);
8948         }
8949 }
8950 
8951 int
8952 rsmka_null_seg_create(
8953     rsm_controller_handle_t argcp,
8954     rsm_memseg_export_handle_t *handle,
8955     size_t size,
8956     uint_t flags,
8957     rsm_memory_local_t *memory,
8958     rsm_resource_callback_t callback,
8959     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8960 {
8961         return (RSM_SUCCESS);
8962 }
8963 
8964 
8965 int
8966 rsmka_null_seg_destroy(
8967     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
8968 {
8969         return (RSM_SUCCESS);
8970 }
8971 
8972 
8973 int
8974 rsmka_null_bind(
8975     rsm_memseg_export_handle_t argmemseg,
8976     off_t offset,
8977     rsm_memory_local_t *argmemory,
8978     rsm_resource_callback_t callback,
8979     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8980 {
8981         return (RSM_SUCCESS);
8982 }
8983 
8984 
8985 int
8986 rsmka_null_unbind(
8987     rsm_memseg_export_handle_t argmemseg,
8988     off_t offset,
8989     size_t length       /*ARGSUSED*/)
8990 {
8991         return (DDI_SUCCESS);
8992 }
8993 
8994 int
8995 rsmka_null_rebind(
8996     rsm_memseg_export_handle_t argmemseg,
8997     off_t offset,
8998     rsm_memory_local_t *memory,
8999     rsm_resource_callback_t callback,
9000     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9001 {
9002         return (RSM_SUCCESS);
9003 }
9004 
9005 int
9006 rsmka_null_publish(
9007     rsm_memseg_export_handle_t argmemseg,
9008     rsm_access_entry_t access_list[],
9009     uint_t access_list_length,
9010     rsm_memseg_id_t segment_id,
9011     rsm_resource_callback_t callback,
9012     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9013 {
9014         return (RSM_SUCCESS);
9015 }
9016 
9017 
9018 int
9019 rsmka_null_republish(
9020     rsm_memseg_export_handle_t memseg,
9021     rsm_access_entry_t access_list[],
9022     uint_t access_list_length,
9023     rsm_resource_callback_t callback,
9024     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9025 {
9026         return (RSM_SUCCESS);
9027 }
9028 
9029 int
9030 rsmka_null_unpublish(
9031     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
9032 {
9033         return (RSM_SUCCESS);
9034 }
9035 
9036 
9037 void
9038 rsmka_init_loopback()
9039 {
9040         rsm_ops_t       *ops = &null_rsmpi_ops;
9041         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9042 
9043         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9044             "rsmka_init_loopback enter\n"));
9045 
9046         /* initialize null ops vector */
9047         ops->rsm_seg_create = rsmka_null_seg_create;
9048         ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9049         ops->rsm_bind = rsmka_null_bind;
9050         ops->rsm_unbind = rsmka_null_unbind;
9051         ops->rsm_rebind = rsmka_null_rebind;
9052         ops->rsm_publish = rsmka_null_publish;
9053         ops->rsm_unpublish = rsmka_null_unpublish;
9054         ops->rsm_republish = rsmka_null_republish;
9055 
9056         /* initialize attributes for loopback adapter */
9057         loopback_attr.attr_name = loopback_str;
9058         loopback_attr.attr_page_size = 0x8; /* 8K */
9059 
9060         /* initialize loopback adapter */
9061         loopback_adapter.rsm_attr = loopback_attr;
9062         loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9063         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9064             "rsmka_init_loopback done\n"));
9065 }
9066 
9067 /* ************** DR functions ********************************** */
9068 static void
9069 rsm_quiesce_exp_seg(rsmresource_t *resp)
9070 {
9071         int             recheck_state;
9072         rsmseg_t        *segp = (rsmseg_t *)resp;
9073         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9074         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9075 
9076         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9077             "%s enter: key=%u\n", function, segp->s_key));
9078 
9079         rsmseglock_acquire(segp);
9080         do {
9081                 recheck_state = 0;
9082                 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9083                     (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9084                     (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9085                     (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9086                         rsmseglock_release(segp);
9087                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9088                             "%s done:state =%d\n", function,
9089                             segp->s_state));
9090                         return;
9091                 }
9092 
9093                 if (segp->s_state == RSM_STATE_NEW) {
9094                         segp->s_state = RSM_STATE_NEW_QUIESCED;
9095                         rsmseglock_release(segp);
9096                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9097                             "%s done:state =%d\n", function,
9098                             segp->s_state));
9099                         return;
9100                 }
9101 
9102                 if (segp->s_state == RSM_STATE_BIND) {
9103                         /* unbind */
9104                         (void) rsm_unbind_pages(segp);
9105                         segp->s_state = RSM_STATE_BIND_QUIESCED;
9106                         rsmseglock_release(segp);
9107                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9108                             "%s done:state =%d\n", function,
9109                             segp->s_state));
9110                         return;
9111                 }
9112 
9113                 if (segp->s_state == RSM_STATE_EXPORT) {
9114                         /*
9115                          * wait for putv/getv to complete if the segp is
9116                          * a local memory handle
9117                          */
9118                         while ((segp->s_state == RSM_STATE_EXPORT) &&
9119                             (segp->s_rdmacnt != 0)) {
9120                                 cv_wait(&segp->s_cv, &segp->s_lock);
9121                         }
9122 
9123                         if (segp->s_state != RSM_STATE_EXPORT) {
9124                                 /*
9125                                  * state changed need to see what it
9126                                  * should be changed to.
9127                                  */
9128                                 recheck_state = 1;
9129                                 continue;
9130                         }
9131 
9132                         segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9133                         rsmseglock_release(segp);
9134                         /*
9135                          * send SUSPEND messages - currently it will be
9136                          * done at the end
9137                          */
9138                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9139                             "%s done:state =%d\n", function,
9140                             segp->s_state));
9141                         return;
9142                 }
9143         } while (recheck_state);
9144 
9145         rsmseglock_release(segp);
9146 }
9147 
9148 static void
9149 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9150 {
9151         int                     ret;
9152         rsmseg_t                *segp = (rsmseg_t *)resp;
9153         rsmapi_access_entry_t   *acl;
9154         rsm_access_entry_t      *rsmpi_acl;
9155         int                     acl_len;
9156         int                     create_flags = 0;
9157         struct buf              *xbuf;
9158         rsm_memory_local_t      mem;
9159         adapter_t               *adapter;
9160         dev_t                   sdev = 0;
9161         rsm_resource_callback_t callback_flag;
9162         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9163         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9164 
9165         rsmseglock_acquire(segp);
9166 
9167         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9168             "%s enter: key=%u, state=%d\n", function, segp->s_key,
9169             segp->s_state));
9170 
9171         if ((segp->s_state == RSM_STATE_NEW) ||
9172             (segp->s_state == RSM_STATE_BIND) ||
9173             (segp->s_state == RSM_STATE_EXPORT)) {
9174                 rsmseglock_release(segp);
9175                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9176                     function, segp->s_state));
9177                 return;
9178         }
9179 
9180         if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9181                 segp->s_state = RSM_STATE_NEW;
9182                 cv_broadcast(&segp->s_cv);
9183                 rsmseglock_release(segp);
9184                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9185                     function, segp->s_state));
9186                 return;
9187         }
9188 
9189         if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9190                 /* bind the segment */
9191                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9192                     segp->s_len, segp->s_proc);
9193                 if (ret == RSM_SUCCESS) { /* bind successful */
9194                         segp->s_state = RSM_STATE_BIND;
9195                 } else { /* bind failed - resource unavailable */
9196                         segp->s_state = RSM_STATE_NEW;
9197                 }
9198                 cv_broadcast(&segp->s_cv);
9199                 rsmseglock_release(segp);
9200                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9201                     "%s done: bind_qscd bind = %d\n", function, ret));
9202                 return;
9203         }
9204 
9205         while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9206                 /* wait for the segment to move to EXPORT_QUIESCED state */
9207                 cv_wait(&segp->s_cv, &segp->s_lock);
9208         }
9209 
9210         if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9211                 /* bind the segment */
9212                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9213                     segp->s_len, segp->s_proc);
9214 
9215                 if (ret != RSM_SUCCESS) {
9216                         /* bind failed - resource unavailable */
9217                         acl_len = segp->s_acl_len;
9218                         acl = segp->s_acl;
9219                         rsmpi_acl = segp->s_acl_in;
9220                         segp->s_acl_len = 0;
9221                         segp->s_acl = NULL;
9222                         segp->s_acl_in = NULL;
9223                         rsmseglock_release(segp);
9224 
9225                         rsmexport_rm(segp);
9226                         rsmacl_free(acl, acl_len);
9227                         rsmpiacl_free(rsmpi_acl, acl_len);
9228 
9229                         rsmseglock_acquire(segp);
9230                         segp->s_state = RSM_STATE_NEW;
9231                         cv_broadcast(&segp->s_cv);
9232                         rsmseglock_release(segp);
9233                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9234                             "%s done: exp_qscd bind failed = %d\n",
9235                             function, ret));
9236                         return;
9237                 }
9238                 /*
9239                  * publish the segment
9240                  * if  successful
9241                  *   segp->s_state = RSM_STATE_EXPORT;
9242                  * else failed
9243                  *   segp->s_state = RSM_STATE_BIND;
9244                  */
9245 
9246                 /* check whether it is a local_memory_handle */
9247                 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9248                         if ((segp->s_acl[0].ae_node == my_nodeid) &&
9249                             (segp->s_acl[0].ae_permission == 0)) {
9250                                 segp->s_state = RSM_STATE_EXPORT;
9251                                 cv_broadcast(&segp->s_cv);
9252                                 rsmseglock_release(segp);
9253                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9254                                     "%s done:exp_qscd\n", function));
9255                                 return;
9256                         }
9257                 }
9258                 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9259                     sdev, 0, NULL, DDI_UMEM_SLEEP);
9260                 ASSERT(xbuf != NULL);
9261 
9262                 mem.ms_type = RSM_MEM_BUF;
9263                 mem.ms_bp = xbuf;
9264 
9265                 adapter = segp->s_adapter;
9266 
9267                 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9268                         create_flags = RSM_ALLOW_UNBIND_REBIND;
9269                 }
9270 
9271                 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9272                         callback_flag  = RSM_RESOURCE_DONTWAIT;
9273                 } else {
9274                         callback_flag  = RSM_RESOURCE_SLEEP;
9275                 }
9276 
9277                 ret = adapter->rsmpi_ops->rsm_seg_create(
9278                     adapter->rsmpi_handle, &segp->s_handle.out,
9279                     segp->s_len, create_flags, &mem,
9280                     callback_flag, NULL);
9281 
9282                 if (ret != RSM_SUCCESS) {
9283                         acl_len = segp->s_acl_len;
9284                         acl = segp->s_acl;
9285                         rsmpi_acl = segp->s_acl_in;
9286                         segp->s_acl_len = 0;
9287                         segp->s_acl = NULL;
9288                         segp->s_acl_in = NULL;
9289                         rsmseglock_release(segp);
9290 
9291                         rsmexport_rm(segp);
9292                         rsmacl_free(acl, acl_len);
9293                         rsmpiacl_free(rsmpi_acl, acl_len);
9294 
9295                         rsmseglock_acquire(segp);
9296                         segp->s_state = RSM_STATE_BIND;
9297                         cv_broadcast(&segp->s_cv);
9298                         rsmseglock_release(segp);
9299                         DBG_PRINTF((category, RSM_ERR,
9300                             "%s done: exp_qscd create failed = %d\n",
9301                             function, ret));
9302                         return;
9303                 }
9304 
9305                 ret = adapter->rsmpi_ops->rsm_publish(
9306                     segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9307                     segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9308 
9309                 if (ret != RSM_SUCCESS) {
9310                         acl_len = segp->s_acl_len;
9311                         acl = segp->s_acl;
9312                         rsmpi_acl = segp->s_acl_in;
9313                         segp->s_acl_len = 0;
9314                         segp->s_acl = NULL;
9315                         segp->s_acl_in = NULL;
9316                         adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9317                         rsmseglock_release(segp);
9318 
9319                         rsmexport_rm(segp);
9320                         rsmacl_free(acl, acl_len);
9321                         rsmpiacl_free(rsmpi_acl, acl_len);
9322 
9323                         rsmseglock_acquire(segp);
9324                         segp->s_state = RSM_STATE_BIND;
9325                         cv_broadcast(&segp->s_cv);
9326                         rsmseglock_release(segp);
9327                         DBG_PRINTF((category, RSM_ERR,
9328                             "%s done: exp_qscd publish failed = %d\n",
9329                             function, ret));
9330                         return;
9331                 }
9332 
9333                 segp->s_state = RSM_STATE_EXPORT;
9334                 cv_broadcast(&segp->s_cv);
9335                 rsmseglock_release(segp);
9336                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9337                     function));
9338                 return;
9339         }
9340 
9341         rsmseglock_release(segp);
9342 
9343         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9344 }
9345 
9346 static void
9347 rsm_quiesce_imp_seg(rsmresource_t *resp)
9348 {
9349         rsmseg_t        *segp = (rsmseg_t *)resp;
9350         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9351         DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9352 
9353         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9354             "%s enter: key=%u\n", function, segp->s_key));
9355 
9356         rsmseglock_acquire(segp);
9357         segp->s_flags |= RSM_DR_INPROGRESS;
9358 
9359         while (segp->s_rdmacnt != 0) {
9360                 /* wait for the RDMA to complete */
9361                 cv_wait(&segp->s_cv, &segp->s_lock);
9362         }
9363 
9364         rsmseglock_release(segp);
9365 
9366         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9367 
9368 }
9369 
9370 static void
9371 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9372 {
9373         rsmseg_t        *segp = (rsmseg_t *)resp;
9374         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9375         DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9376 
9377         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9378             "%s enter: key=%u\n", function, segp->s_key));
9379 
9380         rsmseglock_acquire(segp);
9381 
9382         segp->s_flags &= ~RSM_DR_INPROGRESS;
9383         /* wake up any waiting putv/getv ops */
9384         cv_broadcast(&segp->s_cv);
9385 
9386         rsmseglock_release(segp);
9387 
9388         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9389 
9390 
9391 }
9392 
9393 static void
9394 rsm_process_exp_seg(rsmresource_t *resp, int event)
9395 {
9396         if (event == RSM_DR_QUIESCE)
9397                 rsm_quiesce_exp_seg(resp);
9398         else /* UNQUIESCE */
9399                 rsm_unquiesce_exp_seg(resp);
9400 }
9401 
9402 static void
9403 rsm_process_imp_seg(rsmresource_t *resp, int event)
9404 {
9405         if (event == RSM_DR_QUIESCE)
9406                 rsm_quiesce_imp_seg(resp);
9407         else /* UNQUIESCE */
9408                 rsm_unquiesce_imp_seg(resp);
9409 }
9410 
9411 static void
9412 rsm_dr_process_local_segments(int event)
9413 {
9414 
9415         int i, j;
9416         rsmresource_blk_t       *blk;
9417         rsmresource_t           *p;
9418         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9419 
9420         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9421             "rsm_dr_process_local_segments enter\n"));
9422 
9423         /* iterate through the resource structure */
9424 
9425         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9426 
9427         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9428                 blk = rsm_resource.rsmrc_root[i];
9429                 if (blk != NULL) {
9430                         for (j = 0; j < RSMRC_BLKSZ; j++) {
9431                                 p = blk->rsmrcblk_blks[j];
9432                                 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9433                                         /* valid resource */
9434                                         if (p->rsmrc_type ==
9435                                             RSM_RESOURCE_EXPORT_SEGMENT)
9436                                                 rsm_process_exp_seg(p, event);
9437                                         else if (p->rsmrc_type ==
9438                                             RSM_RESOURCE_IMPORT_SEGMENT)
9439                                                 rsm_process_imp_seg(p, event);
9440                                 }
9441                         }
9442                 }
9443         }
9444 
9445         rw_exit(&rsm_resource.rsmrc_lock);
9446 
9447         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9448             "rsm_dr_process_local_segments done\n"));
9449 }
9450 
9451 /* *************** DR callback functions ************ */
9452 static void
9453 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9454 {
9455         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9456         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9457             "rsm_dr_callback_post_add is a no-op\n"));
9458         /* Noop */
9459 }
9460 
9461 static int
9462 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9463 {
9464         int     recheck_state = 0;
9465         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9466 
9467         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9468             "rsm_dr_callback_pre_del enter\n"));
9469 
9470         mutex_enter(&rsm_drv_data.drv_lock);
9471 
9472         do {
9473                 recheck_state = 0;
9474                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9475                     "rsm_dr_callback_pre_del:state=%d\n",
9476                     rsm_drv_data.drv_state));
9477 
9478                 switch (rsm_drv_data.drv_state) {
9479                 case RSM_DRV_NEW:
9480                         /*
9481                          * The state should usually never be RSM_DRV_NEW
9482                          * since in this state the callbacks have not yet
9483                          * been registered. So, ASSERT.
9484                          */
9485                         ASSERT(0);
9486                         return (0);
9487                 case RSM_DRV_REG_PROCESSING:
9488                         /*
9489                          * The driver is in the process of registering
9490                          * with the DR framework. So, wait till the
9491                          * registration process is complete.
9492                          */
9493                         recheck_state = 1;
9494                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9495                         break;
9496                 case RSM_DRV_UNREG_PROCESSING:
9497                         /*
9498                          * If the state is RSM_DRV_UNREG_PROCESSING, the
9499                          * module is in the process of detaching and
9500                          * unregistering the callbacks from the DR
9501                          * framework. So, simply return.
9502                          */
9503                         mutex_exit(&rsm_drv_data.drv_lock);
9504                         DBG_PRINTF((category, RSM_DEBUG,
9505                             "rsm_dr_callback_pre_del:"
9506                             "pre-del on NEW/UNREG\n"));
9507                         return (0);
9508                 case RSM_DRV_OK:
9509                         rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9510                         break;
9511                 case RSM_DRV_PREDEL_STARTED:
9512                         /* FALLTHRU */
9513                 case RSM_DRV_PREDEL_COMPLETED:
9514                         /* FALLTHRU */
9515                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9516                         recheck_state = 1;
9517                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9518                         break;
9519                 case RSM_DRV_DR_IN_PROGRESS:
9520                         rsm_drv_data.drv_memdel_cnt++;
9521                         mutex_exit(&rsm_drv_data.drv_lock);
9522                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9523                             "rsm_dr_callback_pre_del done\n"));
9524                         return (0);
9525                         /* break; */
9526                 default:
9527                         ASSERT(0);
9528                         break;
9529                 }
9530 
9531         } while (recheck_state);
9532 
9533         rsm_drv_data.drv_memdel_cnt++;
9534 
9535         mutex_exit(&rsm_drv_data.drv_lock);
9536 
9537         /* Do all the quiescing stuff here */
9538         DBG_PRINTF((category, RSM_DEBUG,
9539             "rsm_dr_callback_pre_del: quiesce things now\n"));
9540 
9541         rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9542 
9543         /*
9544          * now that all local segments have been quiesced lets inform
9545          * the importers
9546          */
9547         rsm_send_suspend();
9548 
9549         /*
9550          * In response to the suspend message the remote node(s) will process
9551          * the segments and send a suspend_complete message. Till all
9552          * the nodes send the suspend_complete message we wait in the
9553          * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9554          * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9555          */
9556         mutex_enter(&rsm_drv_data.drv_lock);
9557 
9558         while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9559                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9560         }
9561 
9562         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9563 
9564         rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9565         cv_broadcast(&rsm_drv_data.drv_cv);
9566 
9567         mutex_exit(&rsm_drv_data.drv_lock);
9568 
9569         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9570             "rsm_dr_callback_pre_del done\n"));
9571 
9572         return (0);
9573 }
9574 
9575 static void
9576 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9577 {
9578         int     recheck_state = 0;
9579         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9580 
9581         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9582             "rsm_dr_callback_post_del enter\n"));
9583 
9584         mutex_enter(&rsm_drv_data.drv_lock);
9585 
9586         do {
9587                 recheck_state = 0;
9588                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9589                     "rsm_dr_callback_post_del:state=%d\n",
9590                     rsm_drv_data.drv_state));
9591 
9592                 switch (rsm_drv_data.drv_state) {
9593                 case RSM_DRV_NEW:
9594                         /*
9595                          * The driver state cannot not be RSM_DRV_NEW
9596                          * since in this state the callbacks have not
9597                          * yet been registered.
9598                          */
9599                         ASSERT(0);
9600                         return;
9601                 case RSM_DRV_REG_PROCESSING:
9602                         /*
9603                          * The driver is in the process of registering with
9604                          * the DR framework. Wait till the registration is
9605                          * complete.
9606                          */
9607                         recheck_state = 1;
9608                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9609                         break;
9610                 case RSM_DRV_UNREG_PROCESSING:
9611                         /*
9612                          * RSM_DRV_UNREG_PROCESSING state means the module
9613                          * is detaching and unregistering the callbacks
9614                          * from the DR framework. So simply return.
9615                          */
9616                         /* FALLTHRU */
9617                 case RSM_DRV_OK:
9618                         /*
9619                          * RSM_DRV_OK means we missed the pre-del
9620                          * corresponding to this post-del coz we had not
9621                          * registered yet, so simply return.
9622                          */
9623                         mutex_exit(&rsm_drv_data.drv_lock);
9624                         DBG_PRINTF((category, RSM_DEBUG,
9625                             "rsm_dr_callback_post_del:"
9626                             "post-del on OK/UNREG\n"));
9627                         return;
9628                         /* break; */
9629                 case RSM_DRV_PREDEL_STARTED:
9630                         /* FALLTHRU */
9631                 case RSM_DRV_PREDEL_COMPLETED:
9632                         /* FALLTHRU */
9633                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9634                         recheck_state = 1;
9635                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9636                         break;
9637                 case RSM_DRV_DR_IN_PROGRESS:
9638                         rsm_drv_data.drv_memdel_cnt--;
9639                         if (rsm_drv_data.drv_memdel_cnt > 0) {
9640                                 mutex_exit(&rsm_drv_data.drv_lock);
9641                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9642                                     "rsm_dr_callback_post_del done:\n"));
9643                                 return;
9644                         }
9645                         rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9646                         break;
9647                 default:
9648                         ASSERT(0);
9649                         return;
9650                         /* break; */
9651                 }
9652         } while (recheck_state);
9653 
9654         mutex_exit(&rsm_drv_data.drv_lock);
9655 
9656         /* Do all the unquiescing stuff here */
9657         DBG_PRINTF((category, RSM_DEBUG,
9658             "rsm_dr_callback_post_del: unquiesce things now\n"));
9659 
9660         rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9661 
9662         /*
9663          * now that all local segments have been unquiesced lets inform
9664          * the importers
9665          */
9666         rsm_send_resume();
9667 
9668         mutex_enter(&rsm_drv_data.drv_lock);
9669 
9670         rsm_drv_data.drv_state = RSM_DRV_OK;
9671 
9672         cv_broadcast(&rsm_drv_data.drv_cv);
9673 
9674         mutex_exit(&rsm_drv_data.drv_lock);
9675 
9676         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9677             "rsm_dr_callback_post_del done\n"));
9678 
9679         return;
9680 
9681 }